1987
@article{JoF1987,
vgclass = {refpap},
author = {William P. Jones and George W. Furnas},
title = {Pictures of relevance: {A} geometric analysis of similarity
measures},
journal = {Journal of the American Society for Information Science},
volume = {38},
number = {6},
pages = {420--442},
year = {1987},
url = {http://www3.interscience.wiley.com/cgi-bin/abstract/10017079/},
abstract = {We want computer systems that can help us assess the
similarity or relevance of existing objects (e.g., documents,
functions, commands, etc.) to a statement of our current needs (e.g.,
the query). Towards this end, a variety of similarity measures have
been proposed. However, the relationship between a measure's formula
and its performance is not always obvious. A geometric analysis is
advanced and its utility demonstrated through its application to six
conventional information retrieval similarity measures and a seventh
spreading activation measure. All seven similarity measures work with a
representational scheme wherein a query and the database objects are
represented as vectors of term weights. A geometric analysis
characterizes each similarity measure by the nature of its
iso-similarity contours in an n-space containing query and object
vectors. This analysis reveals important differences among the
similarity measures and suggests conditions in which these differences
will affect retrieval performance. The cosine coefficient, for example,
is shown to be insensitive to between-document differences in the
magnitude of term weights while the inner product measure is sometimes
overly affected by such differences. The context-sensitive spreading
activation measure may overcome both of these limitations and deserves
further study. The geometric analysis is intended to complement, and
perhaps to guide, the empirical analysis of similarity measures.},
}