1998
@inproceedings{SDH1998,
vgclass = {refpap},
author = {Mehran Sahami and Susan Dumais and David Heckerman and Eric Horvitz},
title = {A Bayesian Approach to Filtering Junk E-mail},
booktitle = {AAAI Workshop on Learning for Text Categorization},
address = {Madison, Wisconsin},
month = {July},
year = {1998},
url = {http://research.microsoft.com/\~{}horvitz/junkfilter.htm},
abstract = {In addressing the growing problem of junk email on the
Internet, we examine methods for the automated construction of filters
to eliminate such unwanted messages from a user's mail stream. By
casting this problem in a decision theoretic framework, we are able to
make use of probabilistic learning methods in conjunction with a notion
of differential misclassification cost to produce filters which are
especially appropriate for the nuances of this task. While this may
appear, at first, to be a straightforward text classification problem,
we show that by considering domain-specic features of this problem, in
addition to the raw text of E-mail messages, we can produce much more
accurate filters. Finally, we show the efficacy of such filters in a
real world usage scenario, arguing that this technology is mature
enough for deployment.},
}