@article{411, author = {Akira Maeda, Yukinori Hayashi}, title = {Classifying Web Content Using Discriminant Efficiencies}, journal = {Journal of Information Technology Review}, year = {2010}, volume = {1}, number = {4}, doi = {}, url = {http://www.dline.info/jitr/fulltext/v1n4/2.pdf}, abstract = {In this paper, we propose a method to classify Web documents by genre (not by topic) based on features of terms and HTML tags. For the classifier, we use SVM (Support Vector Machine) and Naïve Bayes. In order to improve the accuracy of classification, we calculate discriminant efficiencies of each pair of a term and a HTML tag to find out HTML tags which are effective for genre classification. We conducted experiments of genre classification of Japanese Web documents using the proposed method. The experimental results show that our method using discriminant efficiencies achieves 8% increase in classification accuracy.}, }