@article{2469, author = {Zar Zar Wint, Theo Ducros, Masayoshi Aritsugi}, title = {Non-words Spell Corrector of Social Media Data in Message Filtering Systems}, journal = {Journal of Digital Information Management}, year = {2018}, volume = {16}, number = {2}, doi = {https://doi.org/10.6025/jdim/2018/16/2/64-75}, url = {http://dline.info/fpaper/jdim/v16i2/jdimv16i2_2.pdf}, abstract = {We develop an extended version of spell checker and corrector to check non-word errors in social media datasets, which will be used in message filtering systems especially for cyberbullying detection. We use the dictionary techniques to check words, twelve-word spell error checking and correction approaches to correct the non-word errors, and n-gram and Levenshtein distance to select the most suitable word among corrected words. If there is more than one corrected word we get from each approach, we use n-gram techniques to choose the corrected and reasonable word from the words in n-gram database. When we used the Levenshtein distance in our previous work, we found that it selected the first corrected word and it was not a reasonable one in some sentences. Therefore, we use the n-gram database in this paper.}, }