Search results for key=FrR1998 : 1 match found.

Technical Reports

1998

@techreport{FrR1998,
	vgclass =	{report},
	author =	{Chris Fraley and Adrian E. Raftery},
	title =	{How many clusters? {W}hich clustering method? -- {A}nswers
	via Model-Based Cluster Analysis},
	number =	{329},
	institution =	{Department of Statistics, Univerisity of Washington},
	address =	{Box 354322, Seattle, WA 98195-4322 USA},
	month =	{February},
	year =	{1998},
	url =	{http://www.stat.washington.edu/tech.reports/tr329.ps},
	abstract =	{We consider the problem of determining the structure of
	clustered data, without prior knowledge of the number of clusters or
	any other information about their composition. Data are represented by
	a mixture model in which each component corresponds to a different
	cluster. Models with varying geometric properties are obtained through
	Gaussian components with different parameterizations and cross-cluster
	constraints. Noise and outliers can be modeled by adding a Poisson
	process component. Partitions are determined by the EM
	(expectation-maximization) algorithm for maximum likelihood, with
	intial values from aglomerative hierarchical clustering.

	Models are compared using an approximation to the Bayes factor based on
	the Bayesian Information Criterion (BIC): unlike significance tests,
	this allows comparison of more than two models at the same time, and
	removes the restriction that the models compared be nested. The
	problems of determining the number of clusters and the clustering
	method are solved simultaneously by choosing the best model. Moreover,
	the EM result provides a measure of uncertainty about the associated
	classification of each data point.

	Examples are given, shoeing that this approach can give performance
	that is much better than standard procedures, which often fail to
	identify groups that are either overlapping or of varying sizes and
	shapes.},
}