Large-scale biobanks, with rich phenotypic and genomic data across hundreds of thousands of samples, provide ample opportunities to elucidate the genetics of complex traits and diseases. Consequently, there is growing demand for robust and scalable methods for disease risk prediction from genotype data. Inference in this setting is challenging due to the high-dimensionality of genomic data, especially when coupled with smaller sample sizes. Popular Polygenic Risk Score (PRS) inference methods address this challenge by adopting sparse Bayesian priors or penalized regression techniques, such as the Least Absolute Shrinkage and Selection Operator (LASSO). However, the former class of methods are not as scalable and do not produce exact sparsity, while the latter tends to over-shrink large coefficients.In this study, we present SSLPRS, a novel PRS method based on the Spike-and-Slab LASSO (SSL) prior, which offers a theoretical bridge between the two frameworks. We extend previous work to derive a coordinate-ascent inference algorithm that operates on GWAS summary statistics, which is orders-of-magnitude more efficient than corresponding individual-level-based implementations. To illustrate the statistical properties of the proposed model, we conducted experiments involving nine simulation configurations and nine quantitative phenotypes from the UK Biobank. Our results demonstrate that SSLPRS is competitive with state-of-the-art methods in terms of prediction accuracy and exhibits superior variable selection performance, especially in sparse genetic architectures. In simulations, this translates to upwards of 50% improvement in positive predictive value. In analysis of real phenotypes, we show that selected variants are highly enriched for meaningful genomic annotations and have better replication rates in larger meta-analyses.SSLPRS is available in the open-source package https://github.com/li-lab-mcgill/penprs.
@article{Song2025sparse,author={Song, Junyi and Zabad, Shadi and Yang, Archer and Gravel, Simon and Li, Yue},title={Sparse polygenic risk score inference with the spike-and-slab LASSO},journal={Bioinformatics},volume={41},number={11},pages={btaf578},year={2025},month=oct,issn={1367-4811},doi={10.1093/bioinformatics/btaf578},url={https://doi.org/10.1093/bioinformatics/btaf578},keywords={J},author+an={2=highlight},}
@article{zabad2025personalized,title={Personalized polygenic risk prediction and assessment with a Mixture-of-Experts framework},author={Zabad, Shadi and Li, Yue and Gravel, Simon},journal={bioRxiv},pages={2025--09},year={2025},month=sep,publisher={Cold Spring Harbor Laboratory},keywords={R},doi={https://doi.org/10.1101/2025.09.15.676165},author+an={1=highlight},}
@article{bercovich2025ld,title={LD Matrix Approximations for Scalable Analysis of High-dimensional Genetic Data},author={Bercovich Szulmajster, Ulises and Zabad, Shadi and Gravel, Simon},journal={bioRxiv},pages={2025--09},year={2025},month=sep,publisher={Cold Spring Harbor Laboratory},keywords={R},doi={https://doi.org/10.1101/2025.09.16.676478},author+an={2=highlight},}
@article{10.1093/gigascience/giaf049,author={Czech, Eric and Tyler, Will and White, Tom and Jeffery, Ben and Millar, Timothy R and Elsworth, Benjamin and Guez, Jérémy and Hancox, Jonny and Karczewski, Konrad J and Miles, Alistair and Tallman, Sam and Unneberg, Per and Wojdyla, Rafal and Zabad, Shadi and Hammerbacher, Jeff and Kelleher, Jerome},title={Analysis-ready VCF at Biobank scale using Zarr},journal={GigaScience},volume={14},pages={giaf049},year={2025},month=jun,issn={2047-217X},doi={10.1093/gigascience/giaf049},keywords={J},author+an={14=highlight},}
@article{zabad2025toward,title={Toward whole-genome inference of polygenic scores with fast and memory-efficient algorithms},author={Zabad, Shadi and Haryan, Chirayu Anant and Gravel, Simon and Misra, Sanchit and Li, Yue},journal={The American Journal of Human Genetics},year={2025},month=may,publisher={Elsevier},keywords={J},doi={https://doi.org/10.1016/j.ajhg.2025.05.002},author+an={1=highlight},}
@article{yang2025phecode,title={PheCode-guided multi-modal topic modeling of electronic health records improves disease incidence prediction and GWAS discovery from UK Biobank},author={Yang, Ziqi and Song, Ziyang and Zabad, Shadi and Legault, Marc-Andr{\'e} and Li, Yue},journal={medRxiv},pages={2025--05},year={2025},month=may,publisher={Cold Spring Harbor Laboratory Press},keywords={R},doi={https://doi.org/10.1101/2025.05.28.25328511},author+an={3=highlight},}
@article{layne2024multi,title={Multi-ancestry polygenic risk scores using phylogenetic regularization},author={Layne, Elliot and Zabad, Shadi and Li, Yue and Blanchette, Mathieu},journal={bioRxiv},pages={2024--02},year={2024},month=feb,publisher={Cold Spring Harbor Laboratory},keywords={R},doi={https://doi.org/10.1101/2024.02.14.580313},author+an={2=highlight},}
@article{diaz2023topological,title={Topological stratification of continuous genetic variation in large biobanks},author={Diaz-Papkovich, Alex and Zabad, Shadi and Ben-Eghan, Chief and Anderson-Trocm{\'e}, Luke and Femerling, Georgette and Nathan, Vikram and Patel, Jenisha and Gravel, Simon},journal={bioRxiv},pages={2023--07},year={2023},month=jul,publisher={Cold Spring Harbor Laboratory},keywords={R},doi={https://doi.org/10.1101/2023.07.06.548007},author+an={2=highlight},}
@article{zabad2023fast,title={Fast and accurate Bayesian polygenic risk modeling with variational inference},author={Zabad, Shadi and Gravel, Simon and Li, Yue},journal={The American Journal of Human Genetics},volume={110},number={5},pages={741--761},year={2023},month=may,publisher={Elsevier},keywords={J},doi={https://doi.org/10.1016/j.ajhg.2023.03.009},author+an={1=highlight},}
@article{anderson2023genes,title={On the genes, genealogies, and geographies of Quebec},author={Anderson-Trocm{\'e}, Luke and Nelson, Dominic and Zabad, Shadi and Diaz-Papkovich, Alex and Kryukov, Ivan and Baya, Nikolas and Touvier, Mathilde and Jeffery, Ben and Dina, Christian and V{\'e}zina, H{\'e}l{\`e}ne and others},journal={Science},volume={380},number={6647},pages={849--855},year={2023},month=may,publisher={American Association for the Advancement of Science},keywords={J},doi={https://doi.org/10.1126/science.add5300},author+an={3=highlight},}
@article{zabad2021assumptions,title={Assumptions about frequency-dependent architectures of complex traits bias measures of functional enrichment},author={Zabad, Shadi and Ragsdale, Aaron P and Sun, Rosie and Li, Yue and Gravel, Simon},journal={Genetic epidemiology},volume={45},number={6},pages={621--632},year={2021},month=jun,keywords={J},doi={https://doi.org/10.1002/gepi.22388},author+an={1=highlight},}
@article{chen2021moonprot,title={MoonProt 3.0: an update of the moonlighting proteins database},author={Chen, Chang and Liu, Haipeng and Zabad, Shadi and Rivera, Nina and Rowin, Emily and Hassan, Maheen and Gomez De Jesus, Stephanie M and Llin{\'a}s Santos, Paola S and Kravchenko, Karyna and Mikhova, Mariia and others},journal={Nucleic acids research},volume={49},number={D1},pages={D368--D372},year={2021},month=jan,publisher={Oxford University Press},keywords={J},doi={https://doi.org/10.1093/nar/gkaa1101},author+an={3=highlight},}
@article{zabad2020evolutionary,title={Evolutionary dynamics of neutral phenotypes under DNA substitution models},author={Zabad, Shadi and Moses, Alan M},journal={bioRxiv},pages={2020--10},year={2020},month=oct,publisher={Cold Spring Harbor Laboratory},keywords={R},doi={https://doi.org/10.1101/2020.10.26.355438},author+an={1=highlight},}
@article{chen2018moonprot,title={MoonProt 2.0: an expansion and update of the moonlighting proteins database},author={Chen, Chang and Zabad, Shadi and Liu, Haipeng and Wang, Wangfei and Jeffery, Constance},journal={Nucleic Acids Research},volume={46},number={D1},pages={D640--D644},year={2018},month=jan,publisher={Oxford University Press},keywords={J},doi={https://doi.org/10.1093/nar/gkx1043},author+an={2=highlight},}
@article{mani2015moonprot,title={MoonProt: a database for proteins that are known to moonlight},author={Mani, Mathew and Chen, Chang and Amblee, Vaishak and Liu, Haipeng and Mathur, Tanu and Zwicke, Grant and Zabad, Shadi and Patel, Bansi and Thakkar, Jagravi and Jeffery, Constance J},journal={Nucleic acids research},volume={43},number={D1},pages={D277--D282},year={2015},month=jan,publisher={Oxford University Press},keywords={J},doi={https://doi.org/10.1093/nar/gku954},author+an={7=highlight},}