@article{439, author = {Sharifullah Khan, Iram Fatima, Rabia Irfan, Khalid Latif}, title = {A Refined Methodology for Automatic Keyphrase Assignment to Digital Documents}, journal = {Journal of Digital Information Management}, year = {2011}, volume = {9}, number = {2}, doi = {}, url = {http://www.dline.info/fpaper/jdim/v9i2/2.pdf}, abstract = {Keyphrases precisely express the primary top-ics and themes of documents and are valuable for cataloging and classification. Manually assigning keyphrases to existing documents is a tedious task; therefore, automatic keyphrase generation has been extensively used to classify digital docu-ments. Existing automatic keyphrase generation algorithms are limited in assigning semantically relevant keyphrases to documents. In this paper we have proposed a methodology to refine the result set of automatically generated keyphrases by Keyphrase Extraction Algorithm (KEA++), so that the key-phrases accurately and precisely represent the content of the document. Our approach is an additional layer at the top of KEA++ and exploits semantic relationships and hierarchical structure of the controlled vocabulary to filter out irrelevant keyphrases from the result set generated by KEA++. The methodology was applied on different sets of academic publications for evaluation. Evaluation demonstrates that the proposed refinement methodology improves the quality of generated keyphrases.}, }