@article{1501, author = {Gui-xian Xu, Li-rong Qiu, Lu Yang}, title = {Tibetan Text Clustering Based on Machine Learning}, journal = {Journal of Digital Information Management}, year = {2014}, volume = {12}, number = {3}, doi = {}, url = {http://dline.info/fpaper/jdim/v12i3/2.pdf}, abstract = {Tibetan information processing technology has been obtained some achievements. But it falls behind Chinese and English information processing. It still needs to be paid more attention. Text clustering has the potential to accelerate the development of Tibetan information processing. In this paper, we propose an approach of Tibetan text clustering based on machine learning. Firstly, the approach is to execute Tibetan word segmentation with Tibetan texts. Then feature selection and text representation are conducted. Finally, K-means and DBSCAN are adopted to deal with the text clustering. The experimental results present that DBSCAN has better performance for Tibetan text clustering. Text clustering systems are designed based on proposed approach. The study is meaningful for the Tibetan text classification, information retrieval as well as construction of high-quality Tibetan corpus.}, }