2003
@article{HaS2003,
vgclass = {refpap},
author = {G\'{\i}sli R. Hjaltason and Hanan Samet},
title = {Properties of embedding methods for similarity searching
in metric spaces},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
volume = {25},
number = {5},
pages = {530--549},
month = {May},
year = {2003},
url = {http://ieeexplore.ieee.org/iel5/34/26906/01195989.pdf},
abstract = {Complex data types-such as images, documents, DNA
sequences, etc.-are becoming increasingly important in modern database
applications. A typical query in many of these applications seeks to
find objects that are similar to some target object, where
(dis)similarity is defined by some distance function. Often, the cost
of evaluating the distance between two objects is very high. Thus, the
number of distance evaluations should be kept at a minimum, while
(ideally) maintaining the quality of the result. One way to approach
this goal is to embed the data objects in a vector space so that the
distances of the embedded objects approximates the actual distances.
Thus, queries can be performed (for the most part) on the embedded
objects. We are especially interested in examining the issue of whether
or not the embedding methods will ensure that no relevant objects are
left out. Particular attention is paid to the SparseMap, FastMap, and
MetricMap embedding methods. SparseMap is a variant of Lipschitz
embeddings, while FastMap and MetricMap are inspired by dimension
reduction methods for Euclidean spaces. We show that, in general, none
of these embedding methods guarantee that queries on the embedded
objects have no false dismissals, while also demonstrating the limited
cases in which the guarantee does hold. Moreover, we describe a variant
of SparseMap that allows queries with no false dismissals. In addition,
we show that with FastMap and MetricMap, the distances of the embedded
objects can be much greater than the actual distances. This makes it
impossible (or at least impractical) to modify FastMap and MetricMap to
guarantee no false dismissals.},
}