@article{325, author = {Kazunari Ishida}, title = {Detecting Social Periodicities on Massive Online Media}, journal = {Journal of E-Technology}, year = {2010}, volume = {1}, number = {3}, doi = {}, url = {http://www.dline.info/jet/fulltext/v1n3/5.pdf}, abstract = {This paper proposes a detection method for social periodicities from time series data and implementation using Hadoop to handle massive data collected from online media, such as news articles and blogs including spams. The data is collected from Japanese blogs and news sites. Spam blogs are then separated from legitimate blogs using a spam filtering system. To find differences among the three sources, an algorithm is developed to detect periodicities based on autocorrelation. To analyze the vast amount of data, this detection method employs Hadoop, a distributed storage and processing system. To analyze information sources in terms of quantitative aspects, the information sources are characterized by keyword distributions of periodicities. In addition, in order to analyze the three information sources qualitatively, representative keywords of each source are identified in terms of the sum of multiplied adjacent differences in autocorrelation. The results obtained using this detection method indicate that periodic blog topics is apt to be TV programs, hobbies, and social events; periodic news topics tend to be political and economic events; and periodic topics in spam is likely to be automatically copied-and-pasted e-mail newsletters and affiliate offers.}, }