@article{153, author = {Tetsuya SUZUKI}, title = {A Decision Tree-based Text Art Extraction Method without any Language-Dependent Text Attribute}, journal = {International Journal of Computational Linguistics Research}, year = {2010}, volume = {1}, number = {1}, doi = {}, url = {http://www.dline.info/jcl/fulltext/v1n1/2.pdf}, abstract = {Text based pictures called text art or ASCII art are often used in Web pages, email text and so on. They enrich expression in text data, but they can be noise for text processing and display of text. For example, they can be obstacle for text-to-speech software and natural language processing, and some of them lose their shape in small display devices. With Text art extraction methods, which detects the area of text art in a given text data, we can ignore text arts in text data or replace them with other strings. Because a text data may include one or more natural languages, it is desirable that text art extraction methods are language-independent. In this paper, we propose a decision tree-based text art extraction method without any language-dependent text attribute. Our method uses attributes of a given text data which represent how the text data looks like text art while previously proposed methods use attributes of a given text data which represent how the text data looks like a specific language text. We tested 63 combinations of 7 text attributes including language-dependent attributes and language-independent attributes for text art recognition. The results shows that a combination of language-independent attributes is the best for text art recognition. The attributes are an attribute based on data compression ratio by Run Length Encoding and two text attributes based on text size. We also evaluated the performance of our text art extraction method with the language-independent attributes by an experiment.}, }