@article{4659,
  author = {Dit Suthiwong},
  title = {Empowering Reliable GenAI with LLM-Assisted Metadata Enrichment: An Empirical Study on Climate Policy Data},
  journal = {Journal of Data Processing},
  year = {2026},
  volume = {16},
  number = {1},
  doi = {https://doi.org/10.6025/jdp/2026/16/1/16-30},
  url = {https://www.dline.info/jdp/fulltext/v16n1/jdpv16n1_2.pdf},
  abstract = {This paper presents an empirical study on the impact of LLM-assisted metadata enrichment on the reliability
and performance of enterprise grade Generative AI (GenAI) systems, using the OECD IFCMA Climate Policy
Dashboard as a real world testbed. The dataset comprising over 1,600 heterogeneous climate policy instruments
across 43 approaches and multiple countries exhibits significant semantic inconsistency and incomplete
metadata, reflecting common challenges in multinational data environments. The authors implement a
three stage GenAI enabled pipeline: (1) definition of a structured metadata schema, (2) LLM-driven semantic
enrichment to infer missing fields and harmonize terminology, and (3) a metadata aware Retrieval Augmented
Generation (RAG) system that leverages enriched context for grounded responses. Quantitative evaluation
demonstrates a statistically significant improvement in metadata completeness from a mean of 0.41 to 0.83
(p < 0.001) and a marked increase in cross country semantic consistency, with cosine similarity rising from
0.64 to 0.85 for carbon pricing policies. These enhancements directly translate into tangible RAG performance
gains: retrieval precision improves by 25.8%, answer faithfulness by 23.5%, and hallucination rates decline
by 41.9%. Crucially, correlation analysis confirms a strong positive relationship between metadata quality
and GenAI reliability (ï²ï€  > 0.7). The study positions high quality metadata not as auxiliary documentation
but as a foundational architectural component that enables observability, explainability, and trust in high
stakes GenAI applications. By bridging a critical gap between conceptual frameworks and empirical
validation, this work establishes metadata centric design as essential for scalable, governance aligned, and
reliable enterprise AI systems.},
}