@article{993, author = {Ikram Chaïri, Souad Alaoui, Abdelouahid Lyhyaoui}, title = {Balancing Distribution of Intrusion Detection Data Using Sample Selection}, journal = {Journal of Information Security Research}, year = {2012}, volume = {3}, number = {4}, doi = {}, url = {http://www.dline.info/jisr/fulltext/v3n4/1.pdf}, abstract = {The majority of learning systems usually assume that training sets are balanced, however, in real world data this hypothesis is not always true. The problem of between-class imbalance is a challenge that has attracted growing attention from both academia and industry, because of its critical influence on the performance of learning systems. Many solutions were proposed to resolve this problem: Generally, the common practice for dealing with imbalanced data sets is to rebalance them artificially by using sampling methods. Unfortunately, these methods can’t give a high performance of learning. In this paper, we propose a new method based on Sample Selection (SS), to deal with the problem of between class imbalance. We consider that creating balance between classes by maintaining those examples located near the border line improves the performance of the classifier. To reduce the computational cost of selecting all samples, we propose a clustering method as a first step in order to determine the critical centers, and then select samples from those critical clusters. Experimental results with Multi-Layer Perceptron (MLP) architecture, on well known Intrusion Detection data set, show that our approach allows to attend the precision of Boosting methods, that we will explain how it can be considered like a SS method.}, }