Search results for key=WhJ1996 : 1 match found.

Technical Reports

1996

@techreport{WhJ1996,
	vgclass =	{report},
	vgproject =	{cbir},
	author =	{David A. White and Ramesh Jain},
	title =	{Algorithms and Strategies for Similarity Retrieval},
	number =	{VCL-96-101},
	institution =	{Visual Computing Laboratory, University of California,
	San Diego},
	address =	{9500 Gilman Drive, Mail Code 0407, La Jolla, CA
	92093-0407},
	month =	{July},
	year =	{1996},
	abstract =	{In the database and multimedia community there has been
	increasing interest in retrieval of objects based on similarity. We
	summarize the common strategies used for similarity retrieval and
	survey related work on nearest neighbor searching, including relevant
	complexity results. We focus our effort on the Vector Space Model where
	each object is represented as a vector of fixed dimension. Further, we
	develop structures that are efficient for medium and high dimensional
	vectors (4-100 dimensions) because these higher dimensions are commonly
	used in similarity retrieval applications. Since we believe that query
	performance is of primary importance in most applications that require
	similarity retrieval, we focus on methods that create optimized R-trees
	and \emph{k-d} trees given a dataset, rather than on dynamic algorithms
	that we argue will almost certainly provide decreased query
	performance. We present algorithms for similarity retrieval that
	include support for approximate queries, including an optimal algorithm
	for similarity retrieval (similarity selection) in the case where an
	inexpensive similarity measure is used as a filter for a more expensive
	similarity measure in an indexing structure.

	In addition, we present the results of extensive empirical tests on
	synthetic and real datasets that show that our optimized structures,
	the VAMSplit \emph{k-d} tree and VAMSplit R-tree, are superior to the
	R*-tree and SS-tree in terms of query performance, time to create an
	index, and space utilization. Since many of our empirical results on
	query performance are independent of the size of the dataset, they
	should help researchers and practitioners predict the speedups possible
	in a given application.  Included are empirical results that show the
	performance improvement possible when different degrees of
	approximation are used. We also discuss the advantages and
	disadvantages of approximation, and suggest the idea of an approximate
	incremental similarity query that should be useful in interactive
	applications such as visual information systems.},
}