2000
@inproceedings{MMM2000e,
vgclass = {fullconf},
vgproject = {cbir,viper},
author = {Wolfgang M\"{u}ller and St\'ephane Marchand-Maillet and Henning M\"{u}ller and Thierry Pun},
title = {Towards a fair benchmark for image browsers},
editor = {John R. Smith and Chinh Le and Sethuraman Panchanathan and
C.-C. Jay Kuo},
booktitle = {Internet Multimedia Management Systems},
address = {Boston, Massachusetts, USA},
volume = {4210},
series = {SPIE Proceedings},
month = {November~6--7},
year = {2000},
note = {(IT 2000, SPIE Conference on Information Technologies)},
abstract = {The recent literature has shown that the principal
difficulty in multimedia retrieval is the bridging of the "semantic
gap" between the user's wishes and his ability to fomulate queries.
This insight has spawned two main directions of research: Query By
Example (QBE) with relevance feedback (i.e. learning to improve the
result of a previsously formulated query) and the research in query
formulation techniques, like browsing or query by sketch. Browsing
techniques try to help the user in finding his target image, or an
image which is sufficiently close to the desired result that it can be
used in a subsequent QBE query.
From the feature space viewpoint, each browsing system tries to permit
the user to move consciously in feature space and eventually reach the
target image. How to provide this functionality to the user is
presently an open question. In fact even obtaining objective
performance evaluation and comparison of these browsing paradigms is
difficult.
We distinguish here between deterministic browsers, which try to
optimise the possibility for the user to learn how the system behaves,
and stochastic browsers based on more sophisticated Monte-Carlo
algorithms thus sacrificing reproducibility to a better performance.
Presently, these two browsing paradigms are practically incomparable,
except by large scale user studies. This makes it infeasible for
research groups to evaluate incremental improvement of browsing
schemes. Moreover, automated benchmarks in the current literature
simulate a user by a model derived directly from the distance measures
used within the tested systems. Such a circular reference cannot
provide a serious alternative to real user tests.
In this paper, we present an automatic benchmark which uses
user-annotated collections for simulating the semantic gap, thus
providing a means for automatic evaluation and comparison of the
different browsing paradigms. We use a very precise annotation of few
words together with a thesaurus to provide sufficiently smooth
behaviour of the annotation-based user model. We discuss the design and
evaluation of this annotation as well as the implementation of the
benchmark in an MRML-compliant script with pluggable modules which
allow testing of new interaction schemes (see http://www.mrml.net).},
}