@article{4748,
  author = {M. Krishnamurthy},
  title = {A Quantitative and Semantic Clustering Framework for High- Risk AI Systems under the EU AI Act},
  journal = {Journal of Data Processing},
  year = {2026},
  volume = {16},
  number = {2},
  doi = {https://doi.org/10.6025/jdp/2026/16/2/97-112},
  url = {https://www.dline.info/jdp/fulltext/v16n2/jdpv16n2_3.pdf},
  abstract = {The rapid integration of artificial intelligence into critical societal domains necessitates robust regulatory
frameworks, yet the EU AI Actâ€™s high-risk classifications remain primarily descriptive, lacking quantitative
and structural analysis. This study addresses this gap by introducing an integrated analytical framework
that combines semantic representation with machine learning and multi-dimensional risk modeling. By
transforming the eight high-risk AI categories defined in Annex III of the EU AI Act into numerical
representations using TF-IDF vectorization, the framework applies K-means clustering and Principal
Component Analysis (PCA) to uncover latent structural relationships. The results reveal a low-dimensional
semantic space, yielding three coherent clusters: governance and state authority systems, socio-economic
decision systems, and technical and safety systems. To capture risk complexity beyond semantic similarity,
a composite quantitative risk-scoring model is developed that integrates impact domain, risk type, decision
criticality, and degree of human impact. This multi-dimensional approach demonstrates that semantic
proximity does not equate to equivalent risk severity, with governance systems exhibiting the highest
composite risk scores. Rigorous validation through silhouette analysis, inter-cluster separation metrics,
and stability testing confirms the frameworkâ€™s reliability. By bridging regulatory classification with
quantitative risk evaluation, this study provides a scalable, interpretable tool for policymakers and
practitioners. The findings advocate for differentiated, data-driven regulatory strategies that align oversight
mechanisms with the distinct structural and risk profiles of high-risk AI systems.},
}