1984
@article{AlR1984,
vgclass = {refpap},
vgproject = {invariance},
author = {J\"{u}rgen Altmann and Herbert J.P. Reitb\"{o}ck},
title = {A Fast Correlation Method for Scale- and
Translation-Invariant Pattern Recognition},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
volume = {6},
number = {1},
pages = {46--57},
month = {January},
year = {1984},
abstract = {A size- and position-invariant description of an image
function can be obtained via the absolute value of the Mellin transform
of its Fourier amplitude spectrum. If the transform is implemented on a
digital computer via a discrete Fourier-Mellin transform, these exact
invariances are not preserved due to sampling- and border-effects. In
this paper these effects are discussed, and an alternative correlation
method is proposed. The method consists of calculating the normalized
absolute magnitude of the discrete Fourier transform (DFT) of the image
function (which gives invariance to translation and multiplicative
amplitude changes) and a subsequent logarithmic distortion in $x$- and
$y$-direction, which converts scaling to translation. Two such
transforms are compared by calculating the normalized Euclidean
distances between both for all possible relative shifts along the main
diagonal. If, for some shift, the distance has a minimum below a
similarity threshold, the underlying image functions will probably
differ only by translation and scaling. The magnitude of this shift is
related to the scale factor between the objects. Good separation
between similar and nonsimilar objects is possible if two size criteria
imposed by the DFT are met: the total object size must not exceed
$\frac{N}{4}$ ($N$ is the number of image points in each dimension),
and object details have to be larger than about 4 image points. As a
consequence, $N$ increases with object complexity and desired scale
range. With $N = 64$, only a limited object manifold can be handled;
with $N = 256$, useful results are found for quite complicated forms.
The method described has the additional advantage that the
magnification factor is accessible. Its disadvantage, the requirement
that two transforms have to be compared at many relative positions, can
be avoided by an independent evaluation of the scale factor via the
normalized central second moments of the image functions. These moments
can be calculated from the normalized absolute magnitude of the
discrete Fourier transform. If the size ratio of image and sample is
thus known, the relative position can be estimated, where the
logarithmically distorted spectra would coincide in case of similarity.
Correct scale factors are obtained if the object size is between 4 and
about $\frac{N}{2}$ image points. Since one comparison suffices for
every stored sample, the proposed method is even faster than a discrete
Fourier-Mellin transform.},
}