@article{1244, author = {M. Azaouzi, L. B. Romdhane}, title = {A Modified gSpan for Computing Rare Substructures in Graph Databases}, journal = {Journal of Data Processing}, year = {2013}, volume = {3}, number = {2}, doi = {}, url = {http://www.dline.info/jdp/fulltext/v3n2/1.pdf}, abstract = {Graphs can represent any kind of data, as e.g. biology or chemistry, biological networks or chemical compounds. A graph database is a frequently used means to efficiently implementation of these data. With the increasing usage of graph databases, it has become more and more demanding to efficiently process graph queries. Querying graph databases is costly since it involves a test of structure matching of graphs, which is an NP-complete problem. Thus, to improve the performance of querying, must be reduced the number of subgraph isomorphism tests. Therefore efficient methods have been proposed to avoid most of these tests but still allow to identify all graphs containing the query pattern. In this paper, we propose a novel indexing feature, called RAre subGraphs (RGs). The rare subgraphs are candidates occurs in only a small number of graphs in the database. Since discovering patterns is an important problem in data mining, to discover these substructures must have an efficient algorithm. Classics mining systems provide a restricted mechanism on patterns with frequency higher to a minimum support. A major challenge, there are no efficient algorithms for the extraction of the patterns with an exact frequency. For this, we have proposed an adaptation of gSpan to compute the set of subgraphs for a given frequency. By adopting this model, we can extract the rare substructures.}, }