2007
@article{GoH2007,
vgclass = {refpap},
author = {Iker Gondra and Douglas R. Heisterkamp},
title = {Content-based image retrieval with the normalized
information distance},
journal = {Computer Vision and Image Understanding},
year = {2007},
note = {(in press)},
url = {http://dx.doi.org/10.1016/j.cviu.2007.11.001},
abstract = {The main idea of content-based image retrieval (CBIR) is
to search on an image's visual content directly. Typically, features
(e.g., color, shape, texture) are extracted from each image and
organized into a feature vector. Retrieval is performed by image
example where a query image is given as input by the user and an
appropriate metric is used to find the best matches in the
corresponding feature space. We attempt to bypass the feature selection
step (and the metric in the corresponding feature space) by following
what we believe is the logical continuation of the CBIR idea of
searching visual content directly. It is based on the observation that,
since ultimately, the entire visual content of an image is encoded into
its raw data (i.e., the raw pixel values), in theory, it should be
possible to determine image similarity based on the raw data alone. The
main advantage of this approach is its simplicity in that explicit
selection, extraction, and weighting of features is not needed. This
work is an investigation into an image dissimilarity measure following
from the theoretical foundation of the recently proposed normalized
information distance (NID) [M. Li, X. Chen, X. Li, B. Ma, P. Vitanyi,
The similarity metric, in: Proceedings of the 14th ACM-SIAM Symposium
on Discrete Algorithms, 2003, pp. 863-872]. Approximations of the
Kolmogorov complexity of an image are created by using different
compression methods. Using those approximations, the NID between images
is calculated and used as a metric for CBIR. The compression-based
approximations to Kolmogorov complexity are shown to be valid by
proving that they create statistically significant dissimilarity
measures by testing them against a null hypothesis of random retrieval.
Furthermore, when compared against several feature-based methods, the
NID approach performed surprisingly well.},
}