@article{1697, author = {Varsha K. V., Santhosh Kumar C., Reghu Raj P. C.}, title = {Improving the Performance of Text Categorization using N-gram Kernels}, journal = {International Journal of Computational Linguistics Research}, year = {2015}, volume = {6}, number = {1}, doi = {}, url = {}, abstract = {Kernel Methods are known for their robustness in handling large feature space and are widely used as an alternative to external feature extraction based methods in tasks such as classification and regression. This work follows the approach of using different string kernels such as n-gram kernels and gappy-n-gram kernels on text classification. It studies how kernel concatenation and feature combination affects the classification accuracy of the system. It also explores how the kernel combination algorithms work on the system. The kernels are implemented as rational kernels, which satisfies the Mercer’s Theorem ensuring the kernel matrices to be positive definite symmetric. The rational kernels are computed with a general algorithm of composition of weighted transducers which help in dealing with variable length sequences. These kernels are then used with SVM formulating efficient classifier for text categorization. Both one-stage and two stage algorithms are applied for kernel combination which were successful in achieving better system performance compared to that given by individual kernels.}, }