Search results for key=BGR1999 : 1 match found.

Refereed full papers (journals, book chapters, international conferences)

1999

@inproceedings{BGR1999,
	vgclass =	{refpap},
	author =	{Kevin Beyer and Jonathan Goldstein and Raghu Ramakrishnan
	and Uri Shaft},
	title =	{When Is ``Nearest Neighbor'' Meaningful?},
	editor =	{C. Beeri and P. Bruneman},
	booktitle =	{Proceeding of the 7th International Conference on Database
	Theory (ICDT'99), Jerusalem, Israel},
	number =	{2432},
	series =	{Lecture Notes in Computer Science},
	pages =	{217--235},
	publisher =	{Springer-Verlag},
	month =	{10--12~January},
	year =	{1999},
	url =	{http://www.springerlink.com/link.asp?id=04p94cqnbge862kh},
	abstract =	{We explore the effect of dimensionality on the ``nearest
	neighbor'' problem. We show that under a broad set of conditions (much
	broader than independent and identically distributed dimensions), as
	dimensionality increases, the distance to the nearest data point
	approaches the distance to the farthest data point. To provide a
	practical perspective, we present empirical results on both real and
	synthetic data sets that demonstrate that this effect can occur for as
	few as 10-15 dimensions.

	These results should not be interpreted to mean that high-dimensional
	indexing is never meaningful; we illustrate this point by identifying
	some high-dimensional workloads for which this effect does not occur.
	However, our results do emphasize that the methodology used almost
	universally in the database literature to evaluate high-dimensional
	indexing techniques is flawed, and should be modified. In particular,
	most such techniques proposed in the literature are not evaluated
	versus simple linear scan, and are evaluated over workloads for which
	nearest neighbor is not meaningful.  Often, even the reported
	experiments, when analyzed carefully, show that linear scan would
	outperform the techniques being proposed on the workloads studied in
	high (10-15) dimensionality!},
}