@article{4753,
  author = {Hathairat Ketmaneechairat},
  title = {Modeling and Analyzing Engagement Dynamics of Misleading and Authentic Content on Reddit Using Linguistic and Machine Learning Approaches},
  journal = {International Journal of Computational Linguistics Research},
  year = {2026},
  volume = {17},
  number = {2},
  doi = {https://doi.org/10.6025/ijclr/2026/17/2/78-99},
  url = {https://www.dline.info/jcl/fulltext/v17n2/jclv17n2_2.pdf},
  abstract = {The rapid proliferation of misleading information on social media poses significant challenges to digital
ecosystems, driven by sensational narratives, emotional framing, and strategic engagement tactics. Existing
research often examines linguistic patterns, user engagement, and propagation dynamics in isolation,
limiting comprehensive understanding. This study introduces a unified analytical framework that integrates
linguistic, behavioral, and contextual features to model and classify misleading versus authentic content on
Reddit. Analyzing a dataset of 2,344 posts across twelve subreddits, we engineered 86 multidimensional
features encompassing linguistic structure, sentiment, stylistic markers, clickbait indicators, and TF-IDF
representations. A Linear Support Vector Machine (LinearSVC) was employed for multi-class classification
across true, satire, imposter, and misleading categories. Descriptive and statistical analyses revealed that
authentic content attracts the highest average engagement, while misleading posts exhibit significantly
greater verbosity and clickbait prevalence, strategically mimicking credible narratives. The baseline classifier
achieved 88.7% weighted accuracy, effectively distinguishing satire and imposter content, yet struggled
with misleading posts due to their deliberate lexical overlap with authentic news. Inferential tests confirmed
statistically significant differences in engagement metrics, content length, and clickbait usage across categories.
These findings demonstrate that virality is decoupled from factual accuracy and highlight the limitations
of traditional machine learning in detecting nuanced deception. Future research should incorporate
transformer-based architectures, temporal engagement modeling, and network propagation analysis to
enhance robust misinformation detection in dynamic online communities.},
}