@article{1958, author = {Belgacem Brahimi, Mohamed Touahria, Abdelkamel Tari}, title = {Data and Text Mining Techniques for Classifying Arabic Tweet Polarity}, journal = {Journal of Digital Information Management}, year = {2016}, volume = {14}, number = {1}, doi = {}, url = {http://dline.info/fpaper/jdim/v14i1/v14i1_3.pdf}, abstract = {Sentiment analysis is a new task related to text mining that extracts opinions from textual data and classifies them into positive, negative or neutral. The goal of this paper is to determine the effect of applying stemming and n-gram techniques for Arabic texts (tweets) on sentiment classification. This study also aims at investigating the impact of feature selection on the performance of the classifier. For this reason, three classifiers Support Vector Machines (SVM), Naïve Bayes, (NB), and K-nearest neighbor (KNN) are used. The obtained results showed that the best results of performance are obtained when applying a hybrid representation which includes tokens with character 3- grams. The experiment results also revealed that the use of feature selection technique improves significantly the accuracy of the three classifiers for the task of opinion classification. Regarding The classifiers, SVM outperforms the other classifiers when using all the features, while when selecting the most relevant features by the SVM feature selection technique, SVM and NB provided the best results.}, }