2003
@article{EgM2003,
vgclass = {refpap},
author = {L. Egghe and C. Michel},
title = {Construction of weak and strong similarity measures for
ordered sets of documents using fuzzy set techniques},
journal = {Information Processing and Management},
volume = {39},
number = {5},
pages = {771--807},
month = {September},
year = {2003},
url = {http://dx.doi.org/10.1016/S0306-4573(02)00027-4},
abstract = {Ordered sets of documents are encountered more and more in
information distribution systems, such as information retrieval
systems. Classical similarity measures for ordinary sets of documents
hence need to be extended to these ordered sets. This is done in this
paper using fuzzy set techniques. First a general similarity measure is
developed which contains the classical strong similarity measures such
as Jaccard, Dice, Cosine and which contains the classical weak
similarity measures such as Recall and Precision.
Then these measures are extended to comparing fuzzy sets of documents.
Measuring the similarity for ordered sets of documents is a special
case of this, where, the higher the rank of a document, the lower its
weight is in the fuzzy set. Concrete forms of these similarity measures
are presented. All these measures are new and the ones for the weak
similarity measures are the first of this kind (other strong similarity
measures have been given in a previous paper by Egghe and Michel).
Some of these measures are then tested in the IR-system Profil-Doc. The
engine SPIRIT� extracts ranked documents sets in three different
contexts, each for 600 request. The practical useability of the
OS-measures is then discussed based on these experiments.},
}