2003
@inproceedings{Gra2003,
vgclass = {refpap},
author = {Paul Graham},
title = {Better Bayesian Filtering},
booktitle = {Proceedings of the 2003 Spam Conference},
address = {Cambridge, MA, USA},
month = {January~17},
year = {2003},
url = {http://www.paulgraham.com/better.html},
abstract = {The first discovery I'd like to present here is an
algorithm for lazy evaluation of research papers. Just write whatever
you want and don't cite any previous work, and indignant readers will
send you references to all the papers you should have cited. I
discovered this algorithm after ``A Plan for Spam'' [1] was on
Slashdot.
Spam filtering is a subset of text classification, which is a well
established field, but the first papers about Bayesian spam filtering
per se seem to have been two given at the same conference in 1998, one
by Pantel and Lin [2], and another by a group from Microsoft Research
[3].
When I heard about this work I was a bit surprised. If people had been
onto Bayesian filtering four years ago, why wasn't everyone using it?
When I read the papers I found out why. Pantel and Lin's filter was the
more effective of the two, but it only caught 92\% of spam, with 1.16\%
false positives \ldots},
}