@article{2123, author = {Priscilla Vieira, Ana Carolina Salgado, Bernadette Farias Lóscio}, title = {A Dynamic Indexing for Incremental Entity Resolution over Query Results }, journal = {International Journal of Computational Linguistics Research }, year = {2016}, volume = {7}, number = {3}, doi = {}, url = {-----------}, abstract = {Entity Resolution (ER) is the problem of identifying groups of tuples from one or multiple data sources that represent the same real-world entity. This is a crucial stage of data integration processes, which often need to integrate data at query time. This task becomes more challenging in scenarios with dynamic data sources or with a large volume of data. As most ER techniques deal with all tuples at once, new solutions have been proposed to deal with large volumes of data. One possible approach consists in performing the ER process on query results rather than the whole data. In this case, previous results of ER tasks are reused in order to reduce the number of comparisons between pairs of tuples at query time. In a similar way, indexing techniques can also be employed to help the identification of equivalent tuples and to reduce the number of comparisons between pairs of tuples. In this context, this work proposes an indexing technique for incremental Entity Resolution processes. The expected contributions of this work are the specification, the implementation and the evaluation of the proposed indexes. We evaluated the reuse of previous results of the ER process and highlighted its impact. The time spent for storing, accessing and updating the indexes was measured. We concluded that the reuse is more efficient than the reprocessing of tuples comparison.}, }