@article{729, author = {A N K Zaman}, title = {Stop Word Lists in Document Retrieval Using Latent Semantic Indexing: an Evaluation}, journal = {Journal of E-Technology}, year = {2012}, volume = {3}, number = {1}, doi = {}, url = {http://www.dline.info/jet/fulltext/v3n1/3.pdf}, abstract = {Removing stop words is very useful for many text processing applications e.g. text/document retrieval, cross language translation, text categorization, text summarization etc. In this world, different language has different stop word lists, and those are useful for text processing applications. Literature claims that the use of such lists improves retrieval performance. The goal of this research is to evaluate the effect of using English stop word lists in Latent Semantic Indexing (LSI)-based information retrieval (IR) systems with large text dataset. Here, three different lists are compared: two were compiled by IR groups at the University of Glasgow, and the University of Tennessee, and the third one is our own list developed at the University of Northern British Columbia. We also examined the case where stop words were not removed from the input dataset. Our research finds that using tailored stop word lists improves retrieval performance. On the other hand, using arbitrary (non-tailored) lists or not using any list reduces the retrieval performance of LSI-based IR systems with large text dataset.}, }