@article{2984, author = {Edson Ramiro Lucas Filho1, Eduardo Cunha de Almeida, Stefanie Scherzinger}, title = {SQL Hadoop Processing Engineers using MapReduce}, journal = {Progress in Computing Applications}, year = {2020}, volume = {9}, number = {1}, doi = {https://doi.org/10.6025/pca/2020/9/1/1-5}, url = {http://www.dline.info/pca/fulltext/v9n1/pcav9n1_1.pdf}, abstract = {SQL-on-Hadoop processing engines have become state-of-the art, yet the skills required to tune these systems are rare in the job market. Automated tuning advisers can profile the low-level MapReduce jobs and propose appropriate tuning setups, but up-front tuning is time consuming and costly. In this demo, we present DejaVu. DejaVu integrates with Hive and effectively reduces the tuning costs by caching tuning setups for partial query plans: When the SQLon-Hadoop engine Hive compiles SQL queries into physical query plans, single MapReduce jobs tend to be similar between query plans. By recycling the tuning setups for similar low-level MapReduce jobs, DejaVu can effectively cut down the time spent profiling the TPC-H query workload in half, achieving similar impact on the performance of the jobs. While we employ Starfish in this demo, DejaVu can leverage any third-party MapReduce tuning adviser.}, }