@article{1188, author = {Yang Yang, Xiang Long, Biaobiao Shi}, title = {Spanning Tree Method for Minimum Communication Costs In Grouped Virtual MapReduce Cluster}, journal = {Journal of Digital Information Management}, year = {2013}, volume = {11}, number = {3}, doi = {}, url = {http://dline.info/fpaper/jdim/v11i3/9.pdf}, abstract = {Today, MapReduce and virtual cluster are sharp swords for this big data and cloud computing era. To combine these two emerging technologies, it brings feasible-scalability, easy-management, fast-deployment and high-efficiency with the system. As every sword has two sides, the I/O bottleneck of virtualization technologies may seriously impacts on the performance of MapReduce cluster which deals with I/O-intensive applications. In this paper, we analyze the combination advantages and disadvantages of virtualization technology of MapReduce cluster. We also analyze the communication model for both of them and build a communication costs model. Then, we propose a novel algorithm of minimum-weight spanning tree to construct a lower communication costs virtual MapReduce cluster. With the help of constructing minimum-weight spanning tree, we find out a method to select local-master and group the cluster. Theoretical simulation and experiment results demonstrate that our method can greatly reduce communication costs. The performance improvement is up to ~40.4% respectively}, }