Search results for key=YpH2002 : 1 match found.

Refereed full papers (journals, book chapters, international conferences)

2002

@inproceedings{YpH2002,
	vgclass =	{refpap},
	author =	{Alexander Ypma and Tom Heskes},
	title =	{Categorization of web pages and user clustering with
	mixtures of hidden {M}arkov models},
	booktitle =	{Proceedings of the International Workshop on Web Knowledge
	Discovery and Data mining (WEBKDD'02)},
	address =	{Edmonton, Canada},
	month =	{July~17},
	year =	{2002},
	url =	{ftp://ftp.mbfys.kun.nl/pub/snn/pub/ypma/Pdf/webkdd02.pdf},
	url1 =	{ftp://ftp.mbfys.kun.nl/pub/snn/pub/ypma/PostScript/webkdd02.ps},
	abstract =	{We propose mixtures of hidden Markov models for modelling
	clickstreams of web surfers. Hence, the page categorization is learned
	from the data without the need for a (possibly cumbersome) manual
	categorization. We provide an EM algorithm for training a mixture of
	HMMs and show that additional static user data can be incorporated
	easily to possibly enhance the labelling of users. Furthermore, we use
	prior knowledge to enhance generalization and avoid numerical problems.
	We use parameter tying to decrease the danger of over tting and to
	reduce computational overhead. We put a  at prior on the parameters to
	deal with the problem that certain transitions between page categories
	occur very seldom or not at all, in order to ensure that a nonzero
	transition probability between these categories nonetheless remains. In
	applications to arti cial data and real-world web logs we demonstrate
	the usefulness of our approach. We train a mixture of HMMs on arti cial
	navigation patterns, and show that the correct model is being learned.
	Moreover, we show that the use of static  satellite data  may enhance
	the labeling of shorter navigation patterns. When applying a mixture of
	HMMs to realworld web logs from a large Dutch commercial web site, we
	demonstrate that sensible page categorizations are being learned.},
}