@article{920, author = {Rim Moussa}, title = {Massive Data Analytics in the Cloud: TPC-H Experience on Hadoop Clusters}, journal = {International Journal of Web Applications}, year = {2012}, volume = {4}, number = {3}, doi = {}, url = {http://dline.info/ijwa/fulltext/v4n3/1.pdf}, abstract = {NoSQL systems rose alongside internet companies, which have different challenges in dealing with data that the traditional RDBMS solutions could not cope with. Indeed, in order to handle efficiently the continuous growth of data, NoSQL technologies feature dynamic horizontal scaling rather than vertical scaling. To date few studies address On-Line Analytical Processing challenges and solutions using NoSQL systems. In this paper, we first overview NoSQL and adjacent technologies, then discuss analytics challenges in the cloud, and propose a taxonomy for a decision-support system workload, as well as specific analytics scenarios. The proposed scenarios aim at allowing best performances, best availability and tradeoff between space, bandwidth and computing overheads. Finally, we evaluate Hadoop/Pig using TPC-H benchmark, under different analytics scenarios, and report thorough performance tests on Hadoop for various data volumes, workloads, and cluster’ sizes.}, }