2004
@inproceedings{NgS2004,
vgclass = {refpap},
author = {Hieu T. Nguyen and Arnold Smeulders},
title = {Everything Gets Better All the Time, Apart from the Amount
of Data},
booktitle = {Proceedings of the Third International Conference on Image
and Video Retrieval (CIVR 2004)},
address = {Dublin, Ireland},
number = {3115},
series = {Lecture Notes in Computer Science},
pages = {33--41},
publisher = {Springer-Verlag},
month = {July~21--23},
year = {2004},
url = {http://www.springerlink.com/link.asp?id=xn67e0vtfc8a5mx9},
abstract = {The paper first addresses the main issues in current
content-based image retrieval to conclude that the largest factors of
innovations are found in the large size of the datasets, the ability to
segment an image softly, the interactive specification of the users
wish, the sharpness and invariant capabilities of features, and the
machine learning of concepts. Among these everything gets better every
year apart from the need for annotation which gets worse with every
increase in the dataset size. Therefore, we direct our attention to the
question what fraction of images needs to be labeled to get an almost
similar result compared to the case when all images would have been
labeled by annotation? And, how can we design an interactive annotation
scheme where we put up for annotation those images which are most
informative in the definition of the concept (boundaries)? It appears
that we have developed an random followed by a sequential annotation
scheme which requires annotating 1\% equal to 25 items in a dataset of
2500 faces and non-faces to yield an almost identical boundary of the
face-concept compared to the situation where all images would have been
labeled. This approach for this dataset has reduced the effort of
annotation by 99\%.},
}