@article{527, author = {Russel W. Havens, Barry Lunt, Chia-Chi Teng}, title = {Log File Filtering with Off-the-shelf Naïve Bayesian Content Filters}, journal = {International Journal of Web Applications}, year = {2011}, volume = {3}, number = {3}, doi = {}, url = {http://www.dline.info/ijwa/fulltext/v3n3/3.pdf}, abstract = {As computer systems become more complex, the state of their inner workings become more and more important to the system administrators working to keep them running. Log files provide much needed visibility into these systems, whether they are hardware, operating systems or applications. Unfortunately, systems can easily create overwhelming amounts of data for administrators to comb through. This research tests the effectiveness of three off-the-shelf Bayesian spam email filters (SpamAssassin, SpamBayes and Bogofilter) for effectiveness as log entry classifiers. A simple scoring system, the Filter Effectiveness Scale (FES), is proposed and used to compare these three filters. The filters are tested in three stages: 1) the filters were tested with the SpamAssassin corpus, with various manipulations made to the messages, 2) the filters were tested for their ability to differentiate log entries from two services, with logs taken from production systems, and 3) the filters were trained with problem-related log entries from system outages and then tested for effectiveness in finding similar outages through similar log files.}, }