1998
@inproceedings{WSB1998,
vgclass = {refpap},
vgproject = {cbir},
author = {Roger Weber and Hans-J. Schek and Stephen Blott},
title = {A Quantitative Analysis and Performance Study for
Similarity-Search Methods in High-Dimensional Spaces},
editor = {Ashish Gupta and Oded Shmueli and Jennifer Widom},
booktitle = {Proceedings of 24th International Conference on Very Large Databases (VLDB'98)},
address = {New York, NY, USA},
month = {24--27~August},
year = {1998},
url = {http://www.vldb.org/dblp/db/conf/vldb/WeberSB98.html},
url1 = {http://www.vldb.org/conf/1998/p194.pdf},
abstract = {For similarity search in high-dimensional vector spaces
(or ``HDVSs''), researchers have proposed a number of new methods (or
adaptations of existing methods) based, in the main, on data-space
partitioning. However, the performance of these methods generally
degrades as dimensionality increases. Although this phenomenon-known as
the ``dimensional curse''-is well known, little or no quantitative
analysis of the phenomenon is available. In this paper, we provide a
detailed analysis of partitioning and clustering techniques for
similarity search in HDVSs. We show formally that these methods
exhibit linear complexity at high dimensionality, and that existing
methods are outperformed on average by a simple sequential scan if the
number of dimensions exceeds around 10. Consequently, we come up with
an alternative organization based on approximations to make the
unavoidable sequential scan as fast as possible. We describe a simple
vector approximation scheme, called VA-file, and report on an
experimental evaluation of this and of two tree-based index methods (an
$R^*$-tree and an X-tree).},
}