@article{2997, author = {Zahra Putri Agusta, Adiwijaya}, title = {Modified Balanced Random Forest (MBRF) Algorithm for Classifying Imbalanced Data}, journal = {Journal of Intelligent Computing}, year = {2020}, volume = {11}, number = {2}, doi = {https://doi.org/10.6025/jic/2020/11/2/41-51}, url = {http://www.dline.info/jic/fulltext/v11n2/jicv11n2_1.pdf}, abstract = {Customer churn prediction is a method that companies use to anticipate loss in revenue. Some data mining classification techniques can be used to predict customer churn. However, these techniques could become less optimal when faced with imbalanced data conditions. Customer churn data has imbalanced data characteristics, so a process that can handle imbalanced data is required. There are two approaches that can solve these problems, namely sampling method (distribution of training data is modified so that two classes of data can be balanced) and algorithm approach (algorithm process is modified to handle imbalanced data). This paper used the algorithm approach because the consistency of original data distribution will be kept the same as the training data. This will provide more valid data and prediction results that can better represent real conditions. In line with this, we proposed a Modified Balanced Random Forest (MBRF) algorithm as a classification technique to address imbalanced data. The MBRF process changes the process in a Balanced Random Forest by applying an undersampling strategy based on clustering techniques for each data bootstrap decision tree in the Random Forest algorithm. The proposed MBRF method yielded better performance compared to the Balanced Random Forest (BRF) and Random Forest (RF) algorithms, with a sensitivity value or true positive rate (TPR) of 88%, a specificity or true negative rate (TNR) of 94%, and the best AUC accuracy value of 91.65%. Moreover, MBRF also reduced process running time.}, }