@article{2279, author = {Tetsuya Suzuki}, title = {Experimental Comparison of ASCII Art Extraction Methods: a Run-Length Encoding based Method and a Byte Pattern based Method}, journal = {International Journal of Computational Linguistics Research}, year = {2017}, volume = {8}, number = {2}, doi = {}, url = {http://www.dline.info/jcl/fulltext/v8n2/jclv8n2_2.pdf}, abstract = {Pictures consisting of computer characters are called ASCII art. They are widely used in computer texts because they enrich expression in texts. To deal with texts including ASCII art objects, ASCII art extraction methods, which detect ASCII art objects in a given text, are now demanded. For example, such methods are needed to find ASCII art objects in texts for sentiment analysis based on ASCII art objects. Such methods are also needed to remove ASCII art objects from texts in preprocess for natural language processing because non-verbal texts of ASCII art makes texts noisy. Our research group and another research group independently proposed two different ASCII art extraction methods, which are a run-length encoding based method and a byte pattern based method respectively. Both of the methods use ASCII art recognizers constructed by machine learning algorithms, but they use different attributes of texts. In this paper, we compare the two methods by ASCII art extraction experiments where training texts and testing texts are in English and Japanese. According to our experimental results, the two methods are competitive if training texts and testing texts are in a same set of languages, but the run-length encoding based method works better than the byte pattern based method if training texts and testing texts are in different sets of languages.}, }