2004
@article{LFJ2004,
vgclass = {refpap},
author = {Martin H. C. Law and Mario A. T. Figueiredo and Anil K. Jain},
title = {Simultaneous Feature Selection and Clustering Using
Mixture Models},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
volume = {26},
number = {9},
pages = {1154-01166},
month = {September},
year = {2004},
url = {http://dx.doi.org/10.1109/TPAMI.2004.71},
abstract = {Clustering is a common unsupervised learning technique
used to discover group structure in a set of data. While there exist
many algorithms for clustering, the important issue of feature
selection, that is, what attributes of the data should be used by the
clustering algorithms, is rarely touched upon. Feature selection for
clustering is difficult because, unlike in supervised learning, there
are no class labels for the data and, thus, no obvious criteria to
guide the search. Another important problem in clustering is the
determination of the number of clusters, which clearly impacts and is
influenced by the feature selection issue. In this paper, we propose
the concept of feature saliency and introduce an
expectation-maximization (EM) algorithm to estimate it, in the context
of mixture-based clustering. Due to the introduction of a minimum
message length model selection criterion, the saliency of irrelevant
features is driven toward zero, which corresponds to performing feature
selection. The criterion and algorithm are then extended to
simultaneously estimate the feature saliencies and the number of
clusters.},
}