@article{4748, author = {M. Krishnamurthy}, title = {A Quantitative and Semantic Clustering Framework for High- Risk AI Systems under the EU AI Act}, journal = {Journal of Data Processing}, year = {2026}, volume = {16}, number = {2}, doi = {https://doi.org/10.6025/jdp/2026/16/2/97-112}, url = {https://www.dline.info/jdp/fulltext/v16n2/jdpv16n2_3.pdf}, abstract = {The rapid integration of artificial intelligence into critical societal domains necessitates robust regulatory frameworks, yet the EU AI Act’s high-risk classifications remain primarily descriptive, lacking quantitative and structural analysis. This study addresses this gap by introducing an integrated analytical framework that combines semantic representation with machine learning and multi-dimensional risk modeling. By transforming the eight high-risk AI categories defined in Annex III of the EU AI Act into numerical representations using TF-IDF vectorization, the framework applies K-means clustering and Principal Component Analysis (PCA) to uncover latent structural relationships. The results reveal a low-dimensional semantic space, yielding three coherent clusters: governance and state authority systems, socio-economic decision systems, and technical and safety systems. To capture risk complexity beyond semantic similarity, a composite quantitative risk-scoring model is developed that integrates impact domain, risk type, decision criticality, and degree of human impact. This multi-dimensional approach demonstrates that semantic proximity does not equate to equivalent risk severity, with governance systems exhibiting the highest composite risk scores. Rigorous validation through silhouette analysis, inter-cluster separation metrics, and stability testing confirms the framework’s reliability. By bridging regulatory classification with quantitative risk evaluation, this study provides a scalable, interpretable tool for policymakers and practitioners. The findings advocate for differentiated, data-driven regulatory strategies that align oversight mechanisms with the distinct structural and risk profiles of high-risk AI systems.}, }