@article{1781, author = {G. X. Xu, W. Sun, X. P. Peng}, title = {Clustering Research across Tibetan and Chinese Texts}, journal = {Journal of Digital Information Management}, year = {2015}, volume = {13}, number = {3}, doi = {}, url = {}, abstract = {Tibetan text clustering has potential in Tibetan information processing domain. In this paper, clustering research across Chinese and Tibetan texts is proposed to benefit Chinese and Tibetan machine translation and sentence alignment. A Tibetan and Chinese keyword table is the main way to implement the text clustering across these two languages. Improved Kmeans and improved density-based spatial clustering of applications with noise (DBSCAN) algorithm are proposed. Experiments show that improved K-means algorithm gains stable text clustering result and performs better than traditional K-means after eliminating the limitation of random selection of initial k data. The improved DBSCAN algorithm obtains good performance through reasonable parameter setting. Improved DBSCAN performs better than improved K-means. The study is helpful and meaningful for the parallel corpus construction of Chinese and Tibetan texts.}, }