@article{2470, author = {Abhishek Kumar Singh, Naresh Kumar Nagwani, Sudhakar Pandey}, title = {Efficient Management of Community Question Answering Sites using Improved Spectral Clustering}, journal = {Journal of Digital Information Management}, year = {2018}, volume = {16}, number = {2}, doi = {https://doi.org/10.6025/jdim/2018/16/2/76-84}, url = {http://dline.info/fpaper/jdim/v16i2/jdimv16i2_3.pdf}, abstract = {Community Question-Answering(CQA) sites are the major platform where posts are generated by peers in the form of questions and answers for information seeking in online environments. In general, multiple posts are created by different users on a particular topic or subject. Large number of posts raises the difficulties in information management of these sites. A number of approaches are suggested in recent research work for efficient management of data for CQA sites. Many of the existing approaches have suggested use of clustering techniques for managing the CQA sites, but ignored the tagging data (user tags) of the posts. In this paper, an improved spectral clustering technique is derived based on similarity measures for text processing (SMTP) and utilized for clustering the posts considering the tagging data available on CQA sites. A specialized data structure, namely, folksonomy is developed for clustering using the relationship between tags, posts and users.The proposed method is developed in two stages. In first stage, the folksonomy relation is created and post similarity graph is built with the help of tag frequency-inverse post frequency. In the second stage, the spectral clustering algorithm is applied to the post similarity matrix to group the similar posts. The post clusters are generated as the output of the proposed algorithm where the post information management can be made with the help of user tags. The experimental results show that the improvedspectral clustering algorithm outperforms the other considered clustering algorithms with a huge margin and it will be helpful for information management of the CQA sites. The improved spectral clustering algorithm will be useful for post pre-processing, faster information retrieval, duplicate posts identification and posts management from clusters.}, }