@article{4658, author = {Maleerat Maliyaem}, title = {Generative AI-Enabled Semantic Music Search: Empirical Evaluation of Embeddings,Vector Databases, and Cross-Instrument Retrieval Performance}, journal = {Journal of Data Processing}, year = {2026}, volume = {16}, number = {1}, doi = {https://doi.org/10.6025/jdp/2026/16/1/1-15}, url = {https://www.dline.info/jdp/fulltext/v16n1/jdpv16n1_1.pdf}, abstract = {The integration of Generative Artificial Intelligence (GenAI) with modern database systems is transforming how we store, query, and interpret multimodal data. This paper presents an empirical investigation into GenAI enabled semantic music search, combining generative audio embeddings with vector database technologies to support instrument specific retrieval. Using a controlled dataset of aligned multitrack recordings including saxophone, piano, percussion, and mixed audio we evaluate the performance of generative embedding models (e.g., AudioMAE) against traditional signal based features such as MFCCs and spectral centroids. Results demonstrate that generative embeddings significantly outperform classical methods, achieving a Recall@1 of 0.67 compared to 0.44 for MFCCs, and exhibit strong cross instrument generalization, with top-1 retrieval accuracy exceeding 84% across all instrument categories. We further assess vector indexing strategies Flat, IVF, and HNSW and find that HNSW offers the best trade off between latency (2.1 ms), memory efficiency (32 MB), and recall (0.95), making it ideal for real time applications. The proposed architecture is not only effective for saxophone centric queries but also readily extensible to other instruments, including the violin, as evidenced by its compatibility with datasets such as TRIOS. Beyond music retrieval, our findings reflect broader trends in GenAI database integration, including probabilistic querying (e.g., GenSQL), distributed cloud scalability, and educational implications. We conclude that the fusion of generative representation learning and purpose built vector databases constitutes a scalable, accurate, and deployable framework suitable for both research prototypes and industrial systems in creative, educational, and enter prise contexts. This work underscores the need to co design AI models and database infrastructure to unlock intelligent, uncertainty aware, and user accessible data ecosystems.}, }