Search results for key=Dom2001 : 1 match found.

Technical Reports

2001

@techreport{Dom2001,
	vgclass =	{report},
	author =	{Byron E. Dom},
	title =	{An Information-Theoretic External Cluster-Validity
	Measure},
	number =	{RJ 10219},
	institution =	{IBM Research Division},
	address =	{IBM Almaden Research Center, 650 Harry Rd., San Jose,
	California 95120-6099, USA},
	month =	{October},
	year =	{2001},
	url =	{http://www.almaden.ibm.com/cs/people/dom/rj10219.ps},
	abstract =	{In this paper we propose a measure of
	similarity/association between two partitions of a set of objects. Our
	motivation is the desire to use the measure to characterize the quality
	or accuracy of clustering algorithms by somehow comparing the clusters
	they produce with ``ground truth'' consisting of classes assigned to
	the patterns by manual means or some other means in whose veracity
	there is confidence. Such measures are referred to as ``external''.
	Our measure also allows clusterings with different numbers of clusters
	to be compared in a quantitative and principled way. Our evaluation
	scheme quantitatively measures how useful the cluster labels of the
	patterns are as predictors of their class labels. When all clusterings
	to be compared have the same number of clusters, the measure is
	equivalent to the mutual information between the cluster labels and the
	class labels. In cases where the numbers of clusters are different,
	however, it computes the reduction in the number of bits that would be
	required to encode (compress) the class labels if both the encoder and
	decoder have free access to the cluster labels. To achieve this
	encoding the estimated conditional probabilities of the class labels
	given the cluster labels must also be encoded. These estimated
	probabilities can be seen as a ``model'' for the class labels and their
	associated code length as a ``model cost''. In addition to defining the
	measure we compare it to other commonly used external measures and
	demonstrate its superiority as judged by certain criteria.},
}