@article{4413, author = {Trevor S. Frisby, Christopher J. Langmead}, title = {Integrating a Fold-specific Regularization Component in Protein Engineering}, journal = {Progress in Computing Applications}, year = {2025}, volume = {14}, number = {1}, doi = {https://doi.org/10.6025/pca/2025/14/1/38-55}, url = {https://www.dline.info/pca/fulltext/v14n1/pcav14n1_3.pdf}, abstract = {Directed Evolution (DE) is a method used in protein engineering that entails multiple cycles of mutagenesis and screening to identify sequences that enhance a specific characteristic (e.g., binding strength to a designated target). However, the fundamental optimization challenge is not fully determined, meaning that alterations to boost the chosen property may negatively impact unmeasured yet significant attributes (e.g., subcellular localization). We aim to tackle this challenge by integrating a fold-specific regularization component into optimisation. This regularization component steers the search towards designs similar to sequences within the fold family of the protein. We implemented our approach on an extensive collection of protein GB1 mutants, measuring their binding affinities to IgG-Fc. Our findings reveal that the regularized optimization process yields more native-like GB1 sequences while only slightly compromising binding affinity. Specifically, the log-odds of our designs, assessed under a generative model of the GB1 fold family, are approximately 41-45% greater than those achieved without regularization, with merely a 7% reduction in binding affinity. Therefore, our technique successfully balances competing characteristics. Additionally, we show that our active-learning-based method lowers the experimental workload needed to pinpoint optimal GB1 designs by 67%, compared to recent findings from the Arnold lab using the same dataset.}, }