2001
@inproceedings{CoB2001,
vgclass = {refpap},
author = {Adrian Corduneanu and Christopher M. Bishop},
title = {Variational {B}ayesian Model Selection for Mixture
Distributions},
editor = {T. Richardson and T. Jaakkola},
booktitle = {Proceedings of the Eighth International Conference on
Artificial Intelligence and Statistics},
pages = {27--34},
publisher = {Morgan Kaufmann},
year = {2001},
url = {http://research.microsoft.com/~cmbishop/downloads/Bishop-AIStats01.ps},
abstract = {Mixture models, in which a probability distribution is
represented as a linear superposition of component distributions, are
widely used in statistical modelling and pattern recognition. One of
the key tasks in the application of mixture models is the determination
of a suitable number of components. Conventional approaches based on
cross-validation are computationally expensive, are wasteful of data,
and give noisy estimates for the optimal number of components. A fully
Bayesian treatment, based on Markov chain Monte Carlo methods for
instance, will return a posterior distribution over the number of
components. However, in practical applications it is generally
convenient, or even computationally essential, to select a single, most
appropriate model. Recently it has been shown, in the context of linear
latent variable models, that the use of hierarchical priors governed by
continuous hyperparameters whose values are set by type-II maximum
likelihood, can be used to optimize model complexity. In this paper we
extend this framework to mixture distributions by considering the
classical task of density estimation using mixtures of Gaussians. We
show that, by setting the mixing coefficients to maximize the marginal
log-likelihood, unwanted components can be suppressed, and the
appropriate number of components for the mixture can be determined in a
single training run without recourse to cross-validation. Our approach
uses a variational treatment based on a factorized approximation to the
posterior distribution.},
}