@article{4695, author = {Pit Pichappan}, title = {LA-MIL: Label-Aware Attention Networks for Multi-Label Multi-Instance Text Classification}, journal = {Journal of Information Organization}, year = {2026}, volume = {16}, number = {1}, doi = {https://doi.org/10.6025/jio/2026/16/1/26-39}, url = {https://www.dline.info/jio/fulltext/v16n1/jiov16n1_3.pdf}, abstract = {Multilabel multi instance text classification presents unique challenges due to the weak supervision setting where documents (bags) are labeled but constituent sentences (instances) are not, coupled with severe label imbalance where infrequent “tail” labels dominate real world distributions. Existing approaches typically employ label agnostic aggregation strategies such as max or mean pooling that implicitly assume uniform instance relevance across all labels, an assumption that is frequently violated in social tagging data, where individual sentences often signal specific labels while remaining irrelevant to others. To address this limitation, we propose LA-MIL (Label-Aware Attention Multi Instance Learning), a novel framework that employs dedicated attention heads for each label to enable fine grained, label specific instance selection. This architecture allows different labels to attend to distinct textual evidence within the same document, relaxing the restrictive assumption of uniform instance relevance. Evaluated on the DeliciousMIL benchmark dataset comprising 12,234 web documents annotated with 20 semantic tags, LA-MIL consistently outperforms traditional multi label classifiers, standard MIL models with global pooling, and attention based baselines with shared aggregation mechanisms. Notably, the model achieves significant improvements in macro-F1 scores, demonstrating superior handling of long tailed label distributions. Beyond quantitative gains, LAMIL provides inherent interpretability through learned attention weights that transparently identify labeldiscriminative sentences. Our results establish label aware attention as an essential architectural principle for multi label multi instance learning, particularly in applications requiring both accuracy on imbalanced distributions and human interpretable predictions.}, }