Search results for key=PaL2002 : 1 match found.

Refereed full papers (journals, book chapters, international conferences)

2002

@inproceedings{PaL2002,
	vgclass =	{refpap},
	author =	{Patrick Pantel and Dekang Lin},
	title =	{Document Clustering with Committees},
	booktitle =	{Proceedings of the 25th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR 2002)},
	address =	{Tampere, Finland},
	pages =	{199--206},
	month =	{August~11--15},
	year =	{2002},
	url =	{http://doi.acm.org/10.1145/564376.564412},
	abstract =	{Document clustering is useful in many information
	retrieval tasks: document browsing, organization and viewing of
	retrieval results, generation of Yahoo-like hierarchies of documents,
	etc. The general goal of clustering is to group data elements such that
	the intra-group similarities are high and the inter-group similarities
	are low. We present a clustering algorithm called CBC (Clustering By
	Committee) that is shown to produce higher quality clusters in document
	clustering tasks as compared to several well known clustering
	algorithms. It initially discovers a set of tight clusters (high
	intra-group similarity), called committees, that are well scattered in
	the similarity space (low inter-group similarity). The union of the
	committees is but a subset of all elements. The algorithm proceeds by
	assigning elements to their most similar committee. Evaluating cluster
	quality has always been a difficult task. We present a new evaluation
	methodology that is based on the editing distance between output
	clusters and manually constructed classes (the answer key). This
	evaluation measure is more intuitive and easier to interpret than
	previous evaluation measures.},
}