@article{3021, author = {Dihia Lanasri, Carlos Ordonez, Ladjel Bellatreche, Selma Khouri}, title = {Visualization-based Machine Learning Model for Relational Databases}, journal = {Journal of E - Technology}, year = {2020}, volume = {11}, number = {2}, doi = {https://doi.org/10.6025/jet/2020/11/2/49-53}, url = {http://www.dline.info/jet/fulltext/v11n2/jetv11n2_1.pdf}, abstract = {Transforming several relational tables into a data set to be used as input to a Machine Learning (ML) model is a complex task since the data scientist has to derive many intermediate tables, queries and views in a disorganized manner. This process creates many SQL queries to facilitate the exploration task of the data scientist. Because the provenance of the intermediate results is not reflected, similar SQL queries tend to be written multiple times causing repeated manual work. In this paper, we propose a tool “ER4ML” assisting data scientists in modeling and visualizing the transformations applied to the relational database before obtaining a dataset feeding the ML models. ER4ML is a diagram flow based on a conceptual view of the database schema and transformations based on powerful extensions of ER diagram in UML notations. In addition, ER4ML tracks data provenance, improving query and data set reuse.}, }