From ede360e847e586e0f6604f94db5cc0ff087c16f2 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 10 Jun 2024 18:50:59 +0000 Subject: [PATCH] Deployed 24e9eb5 with MkDocs version: 1.6.0 --- .nojekyll | 0 404.html | 647 ++ api/eval/binary_metrics/index.html | 2034 +++++ api/eval/continuous_metrics/index.html | 1451 ++++ api/eval/pseudo_metrics/index.html | 977 +++ api/model/BayesPRSModel/index.html | 3303 ++++++++ api/model/LDPredInf/index.html | 1437 ++++ api/model/VIPRS/index.html | 5932 +++++++++++++++ api/model/VIPRSMix/index.html | 2727 +++++++ .../gridsearch/HyperparameterGrid/index.html | 2048 +++++ .../HyperparameterSearch/index.html | 3478 +++++++++ api/model/gridsearch/VIPRSBMA/index.html | 1239 +++ api/model/gridsearch/VIPRSGrid/index.html | 3043 ++++++++ .../gridsearch/VIPRSGridSearch/index.html | 1357 ++++ api/overview/index.html | 866 +++ api/plot/diagnostics/index.html | 876 +++ api/utils/OptimizeResult/index.html | 1154 +++ api/utils/compute_utils/index.html | 1675 ++++ api/utils/data_utils/index.html | 865 +++ api/utils/exceptions/index.html | 772 ++ assets/_mkdocstrings.css | 119 + assets/images/favicon.png | Bin 0 -> 1870 bytes assets/javascripts/bundle.ad660dcc.min.js | 29 + assets/javascripts/bundle.ad660dcc.min.js.map | 7 + assets/javascripts/lunr/min/lunr.ar.min.js | 1 + assets/javascripts/lunr/min/lunr.da.min.js | 18 + assets/javascripts/lunr/min/lunr.de.min.js | 18 + assets/javascripts/lunr/min/lunr.du.min.js | 18 + assets/javascripts/lunr/min/lunr.el.min.js | 1 + assets/javascripts/lunr/min/lunr.es.min.js | 18 + assets/javascripts/lunr/min/lunr.fi.min.js | 18 + assets/javascripts/lunr/min/lunr.fr.min.js | 18 + assets/javascripts/lunr/min/lunr.he.min.js | 1 + assets/javascripts/lunr/min/lunr.hi.min.js | 1 + assets/javascripts/lunr/min/lunr.hu.min.js | 18 + assets/javascripts/lunr/min/lunr.hy.min.js | 1 + assets/javascripts/lunr/min/lunr.it.min.js | 18 + assets/javascripts/lunr/min/lunr.ja.min.js | 1 + assets/javascripts/lunr/min/lunr.jp.min.js | 1 + assets/javascripts/lunr/min/lunr.kn.min.js | 1 + assets/javascripts/lunr/min/lunr.ko.min.js | 1 + assets/javascripts/lunr/min/lunr.multi.min.js | 1 + assets/javascripts/lunr/min/lunr.nl.min.js | 18 + assets/javascripts/lunr/min/lunr.no.min.js | 18 + assets/javascripts/lunr/min/lunr.pt.min.js | 18 + assets/javascripts/lunr/min/lunr.ro.min.js | 18 + assets/javascripts/lunr/min/lunr.ru.min.js | 18 + assets/javascripts/lunr/min/lunr.sa.min.js | 1 + .../lunr/min/lunr.stemmer.support.min.js | 1 + assets/javascripts/lunr/min/lunr.sv.min.js | 18 + assets/javascripts/lunr/min/lunr.ta.min.js | 1 + assets/javascripts/lunr/min/lunr.te.min.js | 1 + assets/javascripts/lunr/min/lunr.th.min.js | 1 + assets/javascripts/lunr/min/lunr.tr.min.js | 18 + assets/javascripts/lunr/min/lunr.vi.min.js | 1 + assets/javascripts/lunr/min/lunr.zh.min.js | 1 + assets/javascripts/lunr/tinyseg.js | 206 + assets/javascripts/lunr/wordcut.js | 6708 +++++++++++++++++ .../workers/search.b8dbb3d2.min.js | 42 + .../workers/search.b8dbb3d2.min.js.map | 7 + assets/stylesheets/main.6543a935.min.css | 1 + assets/stylesheets/main.6543a935.min.css.map | 1 + assets/stylesheets/palette.06af60db.min.css | 1 + .../stylesheets/palette.06af60db.min.css.map | 1 + citation/index.html | 743 ++ commandline/overview/index.html | 818 ++ commandline/viprs_evaluate/index.html | 834 ++ commandline/viprs_fit/index.html | 888 +++ commandline/viprs_score/index.html | 832 ++ download_ld/index.html | 730 ++ faq/index.html | 730 ++ getting_started/index.html | 829 ++ index.html | 855 +++ installation/index.html | 985 +++ objects.inv | Bin 0 -> 1913 bytes search/search_index.json | 1 + sitemap.xml | 3 + sitemap.xml.gz | Bin 0 -> 127 bytes tutorials/overview/index.html | 730 ++ 79 files changed, 52268 insertions(+) create mode 100644 .nojekyll create mode 100644 404.html create mode 100644 api/eval/binary_metrics/index.html create mode 100644 api/eval/continuous_metrics/index.html create mode 100644 api/eval/pseudo_metrics/index.html create mode 100644 api/model/BayesPRSModel/index.html create mode 100644 api/model/LDPredInf/index.html create mode 100644 api/model/VIPRS/index.html create mode 100644 api/model/VIPRSMix/index.html create mode 100644 api/model/gridsearch/HyperparameterGrid/index.html create mode 100644 api/model/gridsearch/HyperparameterSearch/index.html create mode 100644 api/model/gridsearch/VIPRSBMA/index.html create mode 100644 api/model/gridsearch/VIPRSGrid/index.html create mode 100644 api/model/gridsearch/VIPRSGridSearch/index.html create mode 100644 api/overview/index.html create mode 100644 api/plot/diagnostics/index.html create mode 100644 api/utils/OptimizeResult/index.html create mode 100644 api/utils/compute_utils/index.html create mode 100644 api/utils/data_utils/index.html create mode 100644 api/utils/exceptions/index.html create mode 100644 assets/_mkdocstrings.css create mode 100644 assets/images/favicon.png create mode 100644 assets/javascripts/bundle.ad660dcc.min.js create mode 100644 assets/javascripts/bundle.ad660dcc.min.js.map create mode 100644 assets/javascripts/lunr/min/lunr.ar.min.js create mode 100644 assets/javascripts/lunr/min/lunr.da.min.js create mode 100644 assets/javascripts/lunr/min/lunr.de.min.js create mode 100644 assets/javascripts/lunr/min/lunr.du.min.js create mode 100644 assets/javascripts/lunr/min/lunr.el.min.js create mode 100644 assets/javascripts/lunr/min/lunr.es.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.he.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hu.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hy.min.js create mode 100644 assets/javascripts/lunr/min/lunr.it.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ja.min.js create mode 100644 assets/javascripts/lunr/min/lunr.jp.min.js create mode 100644 assets/javascripts/lunr/min/lunr.kn.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ko.min.js create mode 100644 assets/javascripts/lunr/min/lunr.multi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.nl.min.js create mode 100644 assets/javascripts/lunr/min/lunr.no.min.js create mode 100644 assets/javascripts/lunr/min/lunr.pt.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ro.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ru.min.js create mode 100644 assets/javascripts/lunr/min/lunr.sa.min.js create mode 100644 assets/javascripts/lunr/min/lunr.stemmer.support.min.js create mode 100644 assets/javascripts/lunr/min/lunr.sv.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ta.min.js create mode 100644 assets/javascripts/lunr/min/lunr.te.min.js create mode 100644 assets/javascripts/lunr/min/lunr.th.min.js create mode 100644 assets/javascripts/lunr/min/lunr.tr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.vi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.zh.min.js create mode 100644 assets/javascripts/lunr/tinyseg.js create mode 100644 assets/javascripts/lunr/wordcut.js create mode 100644 assets/javascripts/workers/search.b8dbb3d2.min.js create mode 100644 assets/javascripts/workers/search.b8dbb3d2.min.js.map create mode 100644 assets/stylesheets/main.6543a935.min.css create mode 100644 assets/stylesheets/main.6543a935.min.css.map create mode 100644 assets/stylesheets/palette.06af60db.min.css create mode 100644 assets/stylesheets/palette.06af60db.min.css.map create mode 100644 citation/index.html create mode 100644 commandline/overview/index.html create mode 100644 commandline/viprs_evaluate/index.html create mode 100644 commandline/viprs_fit/index.html create mode 100644 commandline/viprs_score/index.html create mode 100644 download_ld/index.html create mode 100644 faq/index.html create mode 100644 getting_started/index.html create mode 100644 index.html create mode 100644 installation/index.html create mode 100644 objects.inv create mode 100644 search/search_index.json create mode 100644 sitemap.xml create mode 100644 sitemap.xml.gz create mode 100644 tutorials/overview/index.html diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/404.html b/404.html new file mode 100644 index 0000000..e1ee58b --- /dev/null +++ b/404.html @@ -0,0 +1,647 @@ + + + +
+ + + + + + + + + + + + + + +avg_precision(true_val, pred_val)
+
+¶Compute the average precision between the PRS predictions and a binary.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
true_val |
+ + | +
+
+
+ The response value or phenotype (a binary numpy vector with 0s and 1s) + |
+ + required + | +
pred_val |
+ + | +
+
+
+ The predicted value or PRS (a numpy vector) + |
+ + required + | +
viprs/eval/binary_metrics.py
cox_snell_r2(true_val, pred_val, covariates=None)
+
+¶Compute the Cox-Snell pseudo-R^2 between the PRS predictions and a binary phenotype. +If covariates are provided, we compute the incremental pseudo-R^2 by conditioning +on the covariates.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
true_val |
+ + | +
+
+
+ The response value or phenotype (a binary numpy vector with 0s and 1s) + |
+ + required + | +
pred_val |
+ + | +
+
+
+ The predicted value or PRS (a numpy vector) + |
+ + required + | +
covariates |
+ + | +
+
+
+ A pandas table of covariates where the rows are ordered the same way as the predictions and response. + |
+
+ None
+ |
+
viprs/eval/binary_metrics.py
f1(true_val, pred_val)
+
+¶Compute the F1 score between the PRS predictions and a binary phenotype.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
true_val |
+ + | +
+
+
+ The response value or phenotype (a binary numpy vector with 0s and 1s) + |
+ + required + | +
pred_val |
+ + | +
+
+
+ The predicted value or PRS (a numpy vector) + |
+ + required + | +
viprs/eval/binary_metrics.py
liability_logit_r2(true_val, pred_val, covariates=None, return_all_r2=False)
+
+¶Compute the R^2 between the PRS predictions and a binary phenotype on the liability +scale using the logit likelihood as outlined in Lee et al. (2012) Gene. Epi. +https://pubmed.ncbi.nlm.nih.gov/22714935/
+The R^2 is defined as: +R2_{probit} = Var(pred) / (Var(pred) + pi^2 / 3)
+Where Var(pred) is the variance of the predicted liability.
+If covariates are provided, we compute the incremental pseudo-R^2 by conditioning +on the covariates.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
true_val |
+ + | +
+
+
+ The response value or phenotype (a binary numpy vector with 0s and 1s) + |
+ + required + | +
pred_val |
+ + | +
+
+
+ The predicted value or PRS (a numpy vector) + |
+ + required + | +
covariates |
+ + | +
+
+
+ A pandas table of covariates where the rows are ordered the same way as the predictions and response. + |
+
+ None
+ |
+
return_all_r2 |
+ + | +
+
+
+ If True, return the null, full and incremental R2 values. + |
+
+ False
+ |
+
viprs/eval/binary_metrics.py
liability_probit_r2(true_val, pred_val, covariates=None, return_all_r2=False)
+
+¶Compute the R^2 between the PRS predictions and a binary phenotype on the liability +scale using the probit likelihood as outlined in Lee et al. (2012) Gene. Epi. +https://pubmed.ncbi.nlm.nih.gov/22714935/
+The R^2 is defined as: +R2_{probit} = Var(pred) / (Var(pred) + 1)
+Where Var(pred) is the variance of the predicted liability.
+If covariates are provided, we compute the incremental pseudo-R^2 by conditioning +on the covariates.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
true_val |
+ + | +
+
+
+ The response value or phenotype (a binary numpy vector with 0s and 1s) + |
+ + required + | +
pred_val |
+ + | +
+
+
+ The predicted value or PRS (a numpy vector) + |
+ + required + | +
covariates |
+ + | +
+
+
+ A pandas table of covariates where the rows are ordered the same way as the predictions and response. + |
+
+ None
+ |
+
return_all_r2 |
+ + | +
+
+
+ If True, return the null, full and incremental R2 values. + |
+
+ False
+ |
+
viprs/eval/binary_metrics.py
liability_r2(true_val, pred_val, covariates=None, return_all_r2=False)
+
+¶Compute the coefficient of determination (R^2) on the liability scale +according to Lee et al. (2012) Gene. Epi. +https://pubmed.ncbi.nlm.nih.gov/22714935/
+The R^2 liability is defined as: +R_{liability}^2 = R2_{observed}K(K-1)/(z^2)
+where R_{observed}^2 is the R^2 on the observed scale and K is the sample prevalence +and z is the "height of the normal density at the quantile for K".
+If covariates are provided, we compute the incremental pseudo-R^2 by conditioning +on the covariates.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
true_val |
+ + | +
+
+
+ The response value or phenotype (a binary numpy vector with 0s and 1s) + |
+ + required + | +
pred_val |
+ + | +
+
+
+ The predicted value or PRS (a numpy vector) + |
+ + required + | +
covariates |
+ + | +
+
+
+ A pandas table of covariates where the rows are ordered the same way as the predictions and response. + |
+
+ None
+ |
+
return_all_r2 |
+ + | +
+
+
+ If True, return the null, full and incremental R2 values. + |
+
+ False
+ |
+
viprs/eval/binary_metrics.py
mcfadden_r2(true_val, pred_val, covariates=None)
+
+¶Compute the McFadden pseudo-R^2 between the PRS predictions and a phenotype. +If covariates are provided, we compute the incremental pseudo-R^2 by conditioning +on the covariates.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
true_val |
+ + | +
+
+
+ The response value or phenotype (a binary numpy vector with 0s and 1s) + |
+ + required + | +
pred_val |
+ + | +
+
+
+ The predicted value or PRS (a numpy vector) + |
+ + required + | +
covariates |
+ + | +
+
+
+ A pandas table of covariates where the rows are ordered the same way as the predictions and response. + |
+
+ None
+ |
+
viprs/eval/binary_metrics.py
nagelkerke_r2(true_val, pred_val, covariates=None)
+
+¶Compute the Nagelkerke pseudo-R^2 between the PRS predictions and a binary phenotype. +If covariates are provided, we compute the incremental pseudo-R^2 by conditioning +on the covariates.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
true_val |
+ + | +
+
+
+ The response value or phenotype (a binary numpy vector with 0s and 1s) + |
+ + required + | +
pred_val |
+ + | +
+
+
+ The predicted value or PRS (a numpy vector) + |
+ + required + | +
covariates |
+ + | +
+
+
+ A pandas table of covariates where the rows are ordered the same way as the predictions and response. + |
+
+ None
+ |
+
viprs/eval/binary_metrics.py
pr_auc(true_val, pred_val)
+
+¶Compute the area under the Precision-Recall curve for a model +that maps from the PRS predictions to the binary phenotype.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
true_val |
+ + | +
+
+
+ The response value or phenotype (a binary numpy vector with 0s and 1s) + |
+ + required + | +
pred_val |
+ + | +
+
+
+ The predicted value or PRS (a numpy vector) + |
+ + required + | +
viprs/eval/binary_metrics.py
roc_auc(true_val, pred_val)
+
+¶Compute the area under the ROC (AUROC) for a model + that maps from the PRS predictions to the binary phenotype.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
true_val |
+ + | +
+
+
+ The response value or phenotype (a numpy binary vector with 0s and 1s) + |
+ + required + | +
pred_val |
+ + | +
+
+
+ The predicted value or PRS (a numpy vector) + |
+ + required + | +
viprs/eval/binary_metrics.py
incremental_r2(true_val, pred_val, covariates=None, return_all_r2=False)
+
+¶Compute the incremental prediction R^2 (proportion of phenotypic variance explained by the PRS). +This metric is computed by taking the R^2 of a model with covariates+PRS and subtracting from it +the R^2 of a model with covariates alone covariates.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
true_val |
+ + | +
+
+
+ The response value or phenotype (a numpy vector) + |
+ + required + | +
pred_val |
+ + | +
+
+
+ The predicted value or PRS (a numpy vector) + |
+ + required + | +
covariates |
+ + | +
+
+
+ A pandas table of covariates where the rows are ordered the same way as the predictions and response. + |
+
+ None
+ |
+
return_all_r2 |
+ + | +
+
+
+ If True, return the R^2 values for the null and full models as well. + |
+
+ False
+ |
+
viprs/eval/continuous_metrics.py
mse(true_val, pred_val)
+
+¶Compute the mean squared error (MSE) between
+the predictions or PRS pred_val
and the phenotype true_val
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
true_val |
+ + | +
+
+
+ The response value or phenotype (a numpy vector) + |
+ + required + | +
pred_val |
+ + | +
+
+
+ The predicted value or PRS (a numpy vector) + |
+ + required + | +
viprs/eval/continuous_metrics.py
partial_correlation(true_val, pred_val, covariates)
+
+¶Compute the partial correlation between the phenotype true_val
and the PRS pred_val
+by conditioning on a set of covariates. This metric is computed by first residualizing the
+phenotype and the PRS on a set of covariates and then computing the correlation coefficient
+between the residuals.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
true_val |
+ + | +
+
+
+ The response value or phenotype (a numpy vector) + |
+ + required + | +
pred_val |
+ + | +
+
+
+ The predicted value or PRS (a numpy vector) + |
+ + required + | +
covariates |
+ + | +
+
+
+ A pandas table of covariates where the rows are ordered the same way as the predictions and response. + |
+ + required + | +
viprs/eval/continuous_metrics.py
pearson_r(true_val, pred_val)
+
+¶Compute the pearson correlation coefficient between
+the predictions or PRS pred_val
and the phenotype true_val
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
true_val |
+ + | +
+
+
+ The response value or phenotype (a numpy vector) + |
+ + required + | +
pred_val |
+ + | +
+
+
+ The predicted value or PRS (a numpy vector) + |
+ + required + | +
viprs/eval/continuous_metrics.py
r2(true_val, pred_val)
+
+¶Compute the R^2 (proportion of variance explained) between
+the predictions or PRS pred_val
and the phenotype true_val
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
true_val |
+ + | +
+
+
+ The response value or phenotype (a numpy vector) + |
+ + required + | +
pred_val |
+ + | +
+
+
+ The predicted value or PRS (a numpy vector) + |
+ + required + | +
viprs/eval/continuous_metrics.py
r2_stats(r2_val, n)
+
+¶Compute the confidence interval and p-value for a given R-squared (proportion of variance + explained) value.
+This function and the formulas therein are based on the following paper
+by Momin et al. 2023: https://doi.org/10.1016/j.ajhg.2023.01.004 as well as
+the implementation in the R package PRSmix
:
+https://github.com/buutrg/PRSmix/blob/main/R/get_PRS_acc.R#L63
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
r2_val |
+ + | +
+
+
+ The R^2 value to compute the confidence interval/p-value for. + |
+ + required + | +
n |
+ + | +
+
+
+ The sample size used to compute the R^2 value + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ A dictionary with the R^2 value, the lower and upper values of the confidence interval, the p-value, and the standard error of the R^2 metric. + |
+
viprs/eval/continuous_metrics.py
pseudo_pearson_r(test_gdl, prs_beta_table)
+
+¶Perform pseudo-validation of the inferred effect sizes by comparing them to +standardized marginal betas from an independent validation set. Here, we follow the pseudo-validation +procedures outlined in Mak et al. (2017) and Yang and Zhou (2020), where +the correlation between the PRS and the phenotype in an independent validation +cohort can be approximated with:
+Corr(PRS, y) ~= r'b / sqrt(b'Sb)
+Where r
is the standardized marginal beta from a validation set,
+b
is the posterior mean for the effect size of each variant and S
is the LD matrix.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
test_gdl |
+ + | +
+
+
+ An instance of |
+ + required + | +
prs_beta_table |
+ + | +
+
+
+ A pandas DataFrame with the PRS effect sizes. Must contain the columns: CHR, SNP, A1, A2, BETA. + |
+ + required + | +
viprs/eval/pseudo_metrics.py
pseudo_r2(test_gdl, prs_beta_table)
+
+¶Compute the R-Squared metric (proportion of variance explained) for a given +PRS using standardized marginal betas from an independent test set. +Here, we follow the pseudo-validation procedures outlined in Mak et al. (2017) and +Yang and Zhou (2020), where the proportion of phenotypic variance explained by the PRS +in an independent validation cohort can be approximated with:
+R2(PRS, y) ~= 2*r'b - b'Sb
+Where r
is the standardized marginal beta from a validation/test set,
+b
is the posterior mean for the effect size of each variant and S
is the LD matrix.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
test_gdl |
+ + | +
+
+
+ An instance of |
+ + required + | +
prs_beta_table |
+ + | +
+
+
+ A pandas DataFrame with the PRS effect sizes. Must contain the columns: CHR, SNP, A1, A2, BETA. + |
+ + required + | +
viprs/eval/pseudo_metrics.py
BayesPRSModel
+
+
+¶A base class for Bayesian PRS models. This class defines the basic structure and methods +that are common to most Bayesian PRS models. Specifically, this class provides methods and interfaces +for initialization, harmonization, prediction, and fitting of Bayesian PRS models.
+The class is generic is designed to be inherited and extended by
+specific Bayesian PRS models, such as LDPred
and VIPRS
.
Attributes:
+Name | +Type | +Description | +
---|---|---|
gdl |
+ + | +
+
+
+ A GWADataLoader object containing harmonized GWAS summary statistics and Linkage-Disequilibrium (LD) matrices. + |
+
Nj |
+ + | +
+
+
+ A dictionary where keys are chromosomes and values are the sample sizes per variant. + |
+
shapes |
+ + | +
+
+
+ A dictionary where keys are chromosomes and values are the shapes of the variant arrays (e.g. the number of variants per chromosome). + |
+
_sample_size |
+ + | +
+
+
+ The average per-SNP sample size. + |
+
pip |
+ + | +
+
+
+ The posterior inclusion probability. + |
+
post_mean_beta |
+ + | +
+
+
+ The posterior mean for the effect sizes. + |
+
post_var_beta |
+ + | +
+
+
+ The posterior variance for the effect sizes. + |
+
viprs/model/BayesPRSModel.py
9 + 10 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 |
|
chromosomes
+
+
+ property
+
+
+¶Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ The list of chromosomes that are included in the BayesPRSModel + |
+
m: int
+
+
+ property
+
+
+¶See Also
+Returns:
+Type | +Description | +
---|---|
+ int
+ |
+
+
+
+ The number of variants in the model. + |
+
n: int
+
+
+ property
+
+
+¶Returns:
+Type | +Description | +
---|---|
+ int
+ |
+
+
+
+ The number of samples in the model. If not available, average the per-SNP sample sizes. + |
+
n_snps: int
+
+
+ property
+
+
+¶See Also
+Returns:
+Type | +Description | +
---|---|
+ int
+ |
+
+
+
+ The number of SNPs in the model. + |
+
__init__(gdl)
+
+¶Initialize the Bayesian PRS model.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
gdl |
+ + | +
+
+
+ An instance of |
+ + required + | +
viprs/model/BayesPRSModel.py
fit(*args, **kwargs)
+
+¶A genetic method to fit the Bayesian PRS model. This method should be implemented by the +specific Bayesian PRS model.
+ + +Raises:
+Type | +Description | +
---|---|
+ NotImplementedError
+ |
+
+
+
+ If the method is not implemented in the child class. + |
+
viprs/model/BayesPRSModel.py
get_heritability()
+
+¶A generic method to get an estimate of the heritability, or proportion of variance explained by SNPs.
+ + +Raises:
+Type | +Description | +
---|---|
+ NotImplementedError
+ |
+
+
+
+ If the method is not implemented in the child class. + |
+
viprs/model/BayesPRSModel.py
get_pip()
+
+¶Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ The posterior inclusion probability for each variant in the model. + |
+
get_posterior_mean_beta()
+
+¶Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ The posterior mean of the effect sizes (BETA) for each variant in the model. + |
+
get_posterior_variance_beta()
+
+¶Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ The posterior variance of the effect sizes (BETA) for each variant in the model. + |
+
get_proportion_causal()
+
+¶A generic method to get an estimate of the proportion of causal variants.
+ + +Raises:
+Type | +Description | +
---|---|
+ NotImplementedError
+ |
+
+
+
+ If the method is not implemented in the child class. + |
+
viprs/model/BayesPRSModel.py
harmonize_data(gdl=None, parameter_table=None)
+
+¶Harmonize the inferred effect sizes with a new GWADataLoader object. This method is useful +when the user wants to predict on new samples or when the effect sizes are inferred from a +different set of samples. The method aligns the effect sizes with the SNP table in the +GWADataLoader object.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
gdl |
+ + | +
+
+
+ An instance of |
+
+ None
+ |
+
parameter_table |
+ + | +
+
+
+ A |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ A tuple of the harmonized posterior inclusion probability, posterior mean for the effect sizes, and posterior variance for the effect sizes. + |
+
viprs/model/BayesPRSModel.py
153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 |
|
predict(test_gdl=None)
+
+¶Given the inferred effect sizes, predict the phenotype for the training samples in
+the GWADataLoader object or new test samples. If test_gdl
is not provided, genotypes
+from training samples will be used (if available).
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
test_gdl |
+ + | +
+
+
+ A GWADataLoader object containing genotype data for new test samples. + |
+
+ None
+ |
+
Raises:
+Type | +Description | +
---|---|
+ ValueError
+ |
+
+
+
+ If the posterior means for BETA are not set. AssertionError if the GWADataLoader object does not contain genotype data. + |
+
viprs/model/BayesPRSModel.py
pseudo_validate(test_gdl, metric='pearson_correlation')
+
+¶Evaluate the prediction accuracy of the inferred PRS using external GWAS summary statistics.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
test_gdl |
+ + | +
+
+
+ A |
+ + required + | +
metric |
+ + | +
+
+
+ The metric to use for evaluation. Options: 'r2' or 'pearson_correlation'. + |
+
+ 'pearson_correlation'
+ |
+
Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ The pseudo-validation metric. + |
+
viprs/model/BayesPRSModel.py
read_inferred_parameters(f_names, sep='\\s+')
+
+¶Read a file with the inferred parameters.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
f_names |
+ + | +
+
+
+ A path (or list of paths) to the file with the effect sizes. + |
+ + required + | +
sep |
+ + | +
+
+
+ The delimiter for the file(s). + |
+
+ '\\s+'
+ |
+
viprs/model/BayesPRSModel.py
set_model_parameters(parameter_table)
+
+¶Parses a pandas dataframe with model parameters and assigns them +to the corresponding class attributes.
+For example:
+ * Columns with BETA
, will be assigned to self.post_mean_beta
.
+ * Columns with PIP
will be assigned to self.pip
.
+ * Columns with VAR_BETA
, will be assigned to self.post_var_beta
.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
parameter_table |
+ + | +
+
+
+ A pandas table or dataframe. + |
+ + required + | +
viprs/model/BayesPRSModel.py
to_table(col_subset=('CHR', 'SNP', 'POS', 'A1', 'A2'), per_chromosome=False)
+
+¶Output the posterior estimates for the effect sizes to a pandas dataframe.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
col_subset |
+ + | +
+
+
+ The subset of columns to include in the tables (in addition to the effect sizes). + |
+
+ ('CHR', 'SNP', 'POS', 'A1', 'A2')
+ |
+
per_chromosome |
+ + | +
+
+
+ If True, return a separate table for each chromosome. + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ A pandas Dataframe with the posterior estimates for the effect sizes. + |
+
viprs/model/BayesPRSModel.py
write_inferred_parameters(f_name, per_chromosome=False, sep='\t')
+
+¶A convenience method to write the inferred posterior for the effect sizes to file.
+TODO: + * Support outputting scoring files compatible with PGS catalog format: + https://www.pgscatalog.org/downloads/#dl_scoring_files
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
f_name |
+ + | +
+
+
+ The filename (or directory) where to write the effect sizes + |
+ + required + | +
per_chromosome |
+ + | +
+
+
+ If True, write a file for each chromosome separately. + |
+
+ False
+ |
+
sep |
+ + | +
+
+
+ The delimiter for the file (tab by default). + |
+
+ '\t'
+ |
+
viprs/model/BayesPRSModel.py
LDPredInf
+
+
+¶
+ Bases: BayesPRSModel
A wrapper class implementing the LDPred-inf model. +The LDPred-inf model is a Bayesian model that uses summary statistics +from GWAS to estimate the posterior mean effect sizes of the SNPs. It is equivalent +to performing ridge regression, with the penalty proportional to the inverse of +the per-SNP heritability.
+Refer to the following references for details about the LDPred-inf model: +* Vilhjálmsson et al. AJHG. 2015 +* Privé et al. Bioinformatics. 2020
+ + +Attributes:
+Name | +Type | +Description | +
---|---|---|
gdl |
+ + | +
+
+
+ An instance of |
+
h2 |
+ + | +
+
+
+ The heritability for the trait (can also be chromosome-specific) + |
+
viprs/model/LDPredInf.py
4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 |
|
__init__(gdl, h2=None)
+
+¶Initialize the LDPred-inf model.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
gdl |
+ + | +
+
+
+ An instance of GWADataLoader + |
+ + required + | +
h2 |
+ + | +
+
+
+ The heritability for the trait (can also be chromosome-specific) + |
+
+ None
+ |
+
viprs/model/LDPredInf.py
fit(solver='minres', **solver_kwargs)
+
+¶Fit the summary statistics-based ridge regression, +following the specifications of the LDPred-inf model.
+Warning
+Not tested yet.
+Here, we use lsqr
or minres
solvers to solve the system of equations:
(D + lam*I)BETA = BETA_HAT
+where D is the LD matrix, BETA is ridge regression +estimate that we wish to obtain and BETA_HAT is the +marginal effect sizes estimated from GWAS.
+In this case, lam = M / N*h2, where M is the number of SNPs, +N is the number of samples and h2 is the heritability +of the trait.
+https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.linalg.lsqr.html +https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.linalg.minres.html
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
solver |
+ + | +
+
+
+ The solver for the system of linear equations. Options: |
+
+ 'minres'
+ |
+
solver_kwargs |
+ + | +
+
+
+ keyword arguments for the solver. + |
+
+ {}
+ |
+
viprs/model/LDPredInf.py
43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 |
|
get_heritability()
+
+¶Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ The heritability estimate for the trait of interest. + |
+
VIPRS
+
+
+¶
+ Bases: BayesPRSModel
The base class for performing Variational Inference of Polygenic Risk Scores (VIPRS).
+This class implements the Variational EM algorithm for estimating the posterior distribution +of the effect sizes using GWAS summary statistics. The model assumes a spike-and-slab mixture +prior on the effect size distribution, with the spike component representing the null effects +and the slab component representing the non-null effects.
+Details for the algorithm can be found in the Supplementary Material of the following paper:
+++ + +Zabad S, Gravel S, Li Y. Fast and accurate Bayesian polygenic risk modeling with variational inference. +Am J Hum Genet. 2023 May 4;110(5):741-761. doi: 10.1016/j.ajhg.2023.03.009. +Epub 2023 Apr 7. PMID: 37030289; PMCID: PMC10183379.
+
Attributes:
+Name | +Type | +Description | +
---|---|---|
gdl |
+ + | +
+
+
+ An instance of GWADataLoader containing harmonized GWAS summary statistics and LD matrices. + |
+
var_gamma |
+ + | +
+
+
+ A dictionary of the variational gamma parameter, denoting the probability that the variant comes from the slab component. + |
+
var_mu |
+ + | +
+
+
+ A dictionary of the variational mu parameter, denoting the mean of the effect size for each variant. + |
+
var_tau |
+ + | +
+
+
+ A dictionary of the variational tau parameter, denoting the precision of the effect size for each variant. + |
+
eta |
+ + | +
+
+
+ A dictionary of the posterior mean of the effect size, E[B] = gamma*mu. + |
+
zeta |
+ + | +
+
+
+ A dictionary of the expectation of B^2 under the posterior, E[B^2] = gamma*(mu^2 + 1./tau). + |
+
eta_diff |
+ + | +
+
+
+ A dictionary of the difference between the etas in two consecutive iterations. + |
+
q |
+ + | +
+
+
+ A dictionary of the q-factor, which keeps track of the multiplication of eta with the LD matrix. + |
+
ld_data |
+ + | +
+
+
+ A dictionary of the |
+
ld_indptr |
+ + | +
+
+
+ A dictionary of the |
+
ld_left_bound |
+ + | +
+
+
+ A dictionary of the left boundaries of the LD matrices. + |
+
std_beta |
+ + | +
+
+
+ A dictionary of the standardized marginal effect sizes from GWAS. + |
+
Nj |
+ + | +
+
+
+ A dictionary of the sample size per SNP from the GWAS study. + |
+
threads |
+ + | +
+
+
+ The number of threads to use when fitting the model. + |
+
fix_params |
+ + | +
+
+
+ A dictionary of hyperparameters with their fixed values. + |
+
float_precision |
+ + | +
+
+
+ The precision of the floating point variables. Options are: 'float32' or 'float64'. + |
+
order |
+ + | +
+
+
+ The order of the arrays in memory. Options are: 'C' or 'F'. + |
+
low_memory |
+ + | +
+
+
+ A boolean flag to indicate whether to use low memory mode. + |
+
dequantize_on_the_fly |
+ + | +
+
+
+ A boolean flag to indicate whether to dequantize the LD matrix on the fly. + |
+
use_cpp |
+ + | +
+
+
+ A boolean flag to indicate whether to use the C++ backend. + |
+
use_blas |
+ + | +
+
+
+ A boolean flag to indicate whether to use BLAS for linear algebra operations. + |
+
optim_result |
+ + | +
+
+
+ An instance of OptimizeResult tracking the progress of the optimization algorithm. + |
+
verbose |
+ + | +
+
+
+ Verbosity of the information printed to standard output. Can be boolean or an integer. + |
+
history |
+ + | +
+
+
+ A dictionary to store the history of the optimization procedure (e.g. the objective as a function of iteration number). + |
+
tracked_theta |
+ + | +
+
+
+ A list of hyperparameters to track throughout the optimization procedure. Useful for debugging/model checking. + |
+
viprs/model/VIPRS.py
14 + 15 + 16 + 17 + 18 + 19 + 20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 +470 +471 +472 +473 +474 +475 +476 +477 +478 +479 +480 +481 +482 +483 +484 +485 +486 +487 +488 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +499 +500 +501 +502 +503 +504 +505 +506 +507 +508 +509 +510 +511 +512 +513 +514 +515 +516 +517 +518 +519 +520 +521 +522 +523 +524 +525 +526 +527 +528 +529 +530 +531 +532 +533 +534 +535 +536 +537 +538 +539 +540 +541 +542 +543 +544 +545 +546 +547 +548 +549 +550 +551 +552 +553 +554 +555 +556 +557 +558 +559 +560 +561 +562 +563 +564 +565 +566 +567 +568 +569 +570 +571 +572 +573 +574 +575 +576 +577 +578 +579 +580 +581 +582 +583 +584 +585 +586 +587 +588 +589 +590 +591 +592 +593 +594 +595 +596 +597 +598 +599 +600 +601 +602 +603 +604 +605 +606 +607 +608 +609 +610 +611 +612 +613 +614 +615 +616 +617 +618 +619 +620 +621 +622 +623 +624 +625 +626 +627 +628 +629 +630 +631 +632 +633 +634 +635 +636 +637 +638 +639 +640 +641 +642 +643 +644 +645 +646 +647 +648 +649 +650 +651 +652 +653 +654 +655 +656 +657 +658 +659 +660 +661 +662 +663 +664 +665 +666 +667 +668 +669 +670 +671 +672 +673 +674 +675 +676 +677 +678 +679 +680 +681 +682 +683 +684 +685 +686 +687 +688 +689 +690 +691 +692 +693 +694 +695 +696 +697 +698 +699 +700 +701 +702 +703 +704 +705 +706 +707 +708 +709 +710 +711 +712 +713 +714 +715 +716 +717 +718 +719 +720 +721 +722 +723 +724 +725 +726 +727 +728 +729 +730 +731 +732 +733 +734 +735 +736 +737 +738 +739 +740 +741 +742 +743 +744 +745 +746 +747 +748 +749 +750 +751 +752 +753 +754 +755 +756 +757 +758 +759 +760 +761 +762 +763 +764 +765 +766 +767 +768 +769 +770 +771 +772 +773 +774 +775 +776 +777 +778 +779 +780 +781 +782 +783 +784 +785 +786 +787 +788 +789 +790 +791 +792 +793 +794 +795 +796 +797 +798 +799 +800 +801 +802 +803 +804 +805 +806 +807 +808 +809 +810 +811 +812 +813 +814 +815 +816 +817 +818 +819 +820 +821 +822 +823 +824 +825 +826 +827 +828 +829 +830 +831 +832 +833 +834 +835 +836 +837 +838 +839 +840 +841 +842 +843 +844 +845 +846 +847 +848 +849 +850 +851 +852 +853 +854 +855 +856 +857 +858 +859 +860 +861 +862 +863 +864 +865 +866 +867 +868 +869 +870 +871 +872 +873 +874 +875 +876 +877 |
|
__init__(gdl, fix_params=None, tracked_theta=None, verbose=True, float_precision='float32', order='F', low_memory=True, use_blas=True, use_cpp=True, dequantize_on_the_fly=False, threads=1)
+
+¶Initialize the VIPRS model.
+.. note:: + The initialization of the model involves loading the LD matrix to memory.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
gdl |
+ + | +
+
+
+ An instance of GWADataLoader containing harmonized GWAS summary statistics and LD matrices. + |
+ + required + | +
fix_params |
+ + | +
+
+
+ A dictionary of hyperparameters with their fixed values. + |
+
+ None
+ |
+
tracked_theta |
+ + | +
+
+
+ A list of hyperparameters to track throughout the optimization procedure. Useful for debugging/model checking. Currently, we allow the user to track the following: * The proportion of causal variants ( |
+
+ None
+ |
+
verbose |
+ + | +
+
+
+ Verbosity of the information printed to standard output. Can be boolean or an integer. Provide a number greater than 1 for more detailed output. + |
+
+ True
+ |
+
float_precision |
+ + | +
+
+
+ The precision of the floating point variables. Options are: 'float32' or 'float64'. + |
+
+ 'float32'
+ |
+
order |
+ + | +
+
+
+ The order of the arrays in memory. Options are: 'C' or 'F'. + |
+
+ 'F'
+ |
+
low_memory |
+ + | +
+
+
+ A boolean flag to indicate whether to use low memory mode. + |
+
+ True
+ |
+
use_blas |
+ + | +
+
+
+ A boolean flag to indicate whether to use BLAS for linear algebra operations. + |
+
+ True
+ |
+
use_cpp |
+ + | +
+
+
+ A boolean flag to indicate whether to use the C++ backend. + |
+
+ True
+ |
+
dequantize_on_the_fly |
+ + | +
+
+
+ A boolean flag to indicate whether to dequantize the LD matrix on the fly. + |
+
+ False
+ |
+
threads |
+ + | +
+
+
+ The number of threads to use when fitting the model. + |
+
+ 1
+ |
+
viprs/model/VIPRS.py
62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 |
|
compute_eta()
+
+¶Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ The mean for the effect size under the variational posterior. + |
+
compute_pip()
+
+¶compute_zeta()
+
+¶Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ The expectation of the squared effect size under the variational posterior. + |
+
viprs/model/VIPRS.py
e_step()
+
+¶Run the E-Step of the Variational EM algorithm. +Here, we update the variational parameters for each variant using coordinate +ascent optimization techniques. The update equations are outlined in +the Supplementary Material of the following paper:
+++ +Zabad S, Gravel S, Li Y. Fast and accurate Bayesian polygenic risk modeling with variational inference. +Am J Hum Genet. 2023 May 4;110(5):741-761. doi: 10.1016/j.ajhg.2023.03.009. +Epub 2023 Apr 7. PMID: 37030289; PMCID: PMC10183379.
+
viprs/model/VIPRS.py
346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 |
|
elbo(sum_axis=None)
+
+¶Compute the variational objective, the Evidence Lower-BOund (ELBO),
+from GWAS summary statistics and the reference LD data. This implementation assumes
+that the product of the LD matrix with the current estimate of the effect sizes
+is already computed and stored in the q
dictionary. If this is not the case,
+we recommend computing q first and then calling this method.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
sum_axis |
+ + | +
+
+
+ The axis along which to sum the ELBO. If None, the ELBO is returned as a scalar. + |
+
+ None
+ |
+
viprs/model/VIPRS.py
481 +482 +483 +484 +485 +486 +487 +488 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +499 +500 +501 +502 +503 +504 +505 +506 +507 +508 +509 +510 +511 +512 +513 +514 +515 +516 +517 +518 +519 +520 +521 +522 +523 +524 +525 +526 +527 +528 +529 +530 +531 +532 +533 +534 +535 +536 +537 +538 +539 +540 +541 +542 +543 +544 +545 +546 +547 +548 +549 +550 +551 +552 +553 +554 +555 |
|
fit(max_iter=1000, theta_0=None, param_0=None, continued=False, min_iter=3, f_abs_tol=1e-06, x_abs_tol=1e-07, drop_r_tol=0.01, patience=5)
+
+¶A convenience method to fit the model using the Variational EM algorithm.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
max_iter |
+ + | +
+
+
+ Maximum number of iterations. + |
+
+ 1000
+ |
+
theta_0 |
+ + | +
+
+
+ A dictionary of values to initialize the hyperparameters + |
+
+ None
+ |
+
param_0 |
+ + | +
+
+
+ A dictionary of values to initialize the variational parameters + |
+
+ None
+ |
+
continued |
+ + | +
+
+
+ If true, continue the model fitting for more iterations from current parameters instead of starting over. + |
+
+ False
+ |
+
min_iter |
+ + | +
+
+
+ The minimum number of iterations to run before checking for convergence. + |
+
+ 3
+ |
+
f_abs_tol |
+ + | +
+
+
+ The absolute tolerance threshold for the objective (ELBO). + |
+
+ 1e-06
+ |
+
x_abs_tol |
+ + | +
+
+
+ The absolute tolerance threshold for the variational parameters. + |
+
+ 1e-07
+ |
+
drop_r_tol |
+ + | +
+
+
+ The relative tolerance for the drop in the ELBO to be considered as a red flag. It usually happens around convergence that the objective fluctuates due to numerical errors. This is a way to differentiate such random fluctuations from actual drops in the objective. + |
+
+ 0.01
+ |
+
patience |
+ + | +
+
+
+ The maximum number of times the objective is allowed to drop before termination. + |
+
+ 5
+ |
+
viprs/model/VIPRS.py
728 +729 +730 +731 +732 +733 +734 +735 +736 +737 +738 +739 +740 +741 +742 +743 +744 +745 +746 +747 +748 +749 +750 +751 +752 +753 +754 +755 +756 +757 +758 +759 +760 +761 +762 +763 +764 +765 +766 +767 +768 +769 +770 +771 +772 +773 +774 +775 +776 +777 +778 +779 +780 +781 +782 +783 +784 +785 +786 +787 +788 +789 +790 +791 +792 +793 +794 +795 +796 +797 +798 +799 +800 +801 +802 +803 +804 +805 +806 +807 +808 +809 +810 +811 +812 +813 +814 +815 +816 +817 +818 +819 +820 +821 +822 +823 +824 +825 +826 +827 +828 +829 +830 +831 +832 +833 +834 +835 +836 +837 +838 +839 +840 +841 +842 +843 +844 +845 +846 +847 +848 +849 +850 +851 +852 +853 +854 +855 +856 +857 +858 +859 +860 +861 +862 +863 +864 +865 +866 +867 +868 +869 +870 +871 +872 +873 +874 +875 +876 +877 |
|
get_average_effect_size_variance()
+
+¶Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ The average per-SNP variance for the prior mixture components + |
+
viprs/model/VIPRS.py
get_heritability()
+
+¶Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ An estimate of the SNP heritability, or proportion of variance explained by SNPs. + |
+
get_null_pi(chrom=None)
+
+¶Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
chrom |
+ + | +
+
+
+ If provided, get the mixing proportion for the null component on a given chromosome. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ The value of the prior probability of a variant being null, |
+
viprs/model/VIPRS.py
get_pi(chrom=None)
+
+¶Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
chrom |
+ + | +
+
+
+ Get the value of |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ The value of the prior probability of a variant being causal, |
+
viprs/model/VIPRS.py
get_proportion_causal()
+
+¶Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ The proportion of causal variants in the model. + |
+
get_sigma_epsilon()
+
+¶Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ The value of the residual variance, |
+
get_tau_beta(chrom=None)
+
+¶Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
chrom |
+ + | +
+
+
+ Get the value of |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ The value of the prior precision on the effect size(s), |
+
viprs/model/VIPRS.py
init_optim_meta()
+
+¶Initialize the various quantities/objects to keep track of the optimization process. + This method initializes the "history" object (which keeps track of the objective + other + hyperparameters requested by the user), in addition to the OptimizeResult objects.
+ +viprs/model/VIPRS.py
initialize(theta_0=None, param_0=None)
+
+¶A convenience method to initialize all the objects associated with the model.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
theta_0 |
+ + | +
+
+
+ A dictionary of initial values for the hyperparameters theta + |
+
+ None
+ |
+
param_0 |
+ + | +
+
+
+ A dictionary of initial values for the variational parameters + |
+
+ None
+ |
+
viprs/model/VIPRS.py
initialize_theta(theta_0=None)
+
+¶Initialize the global hyperparameters of the model.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
theta_0 |
+ + | +
+
+
+ A dictionary of initial values for the hyperparameters theta + |
+
+ None
+ |
+
viprs/model/VIPRS.py
236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 |
|
initialize_variational_parameters(param_0=None)
+
+¶Initialize the variational parameters of the model.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
param_0 |
+ + | +
+
+
+ A dictionary of initial values for the variational parameters + |
+
+ None
+ |
+
viprs/model/VIPRS.py
m_step()
+
+¶Run the M-Step of the Variational EM algorithm. +Here, we update the hyperparameters of the model, by simply calling +the update functions for each hyperparameter separately.
+ +viprs/model/VIPRS.py
objective()
+
+¶The optimization objective for the variational inference problem. The objective +for the VIPRS method is the Evidence Lower-Bound (ELBO) in this case.
+See Also
+viprs/model/VIPRS.py
to_theta_table()
+
+¶Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ A |
+
viprs/model/VIPRS.py
update_pi()
+
+¶Update the prior probability of a variant being causal, or the proportion of causal variants, pi
.
viprs/model/VIPRS.py
update_posterior_moments()
+
+¶A convenience method to update the dictionaries containing the posterior moments, +including the PIP and posterior mean and variance for the effect size.
+ +viprs/model/VIPRS.py
update_sigma_epsilon()
+
+¶Update the global residual variance parameter, sigma_epsilon
.
viprs/model/VIPRS.py
update_tau_beta()
+
+¶Update the prior precision (inverse variance) for the effect size, tau_beta
.
viprs/model/VIPRS.py
update_theta_history()
+
+¶A convenience method to update the history of the hyperparameters of the model, +if the user requested that they should be tracked.
+ +viprs/model/VIPRS.py
write_inferred_theta(f_name, sep='\t')
+
+¶A convenience method to write the inferred (and fixed) hyperparameters of the model to file.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
f_name |
+ + | +
+
+
+ The file name + |
+ + required + | +
sep |
+ + | +
+
+
+ The separator for the hyperparameter file. + |
+
+ '\t'
+ |
+
viprs/model/VIPRS.py
VIPRSMix
+
+
+¶
+ Bases: VIPRS
A class for the Variational Inference for Polygenic Risk Scores (VIPRS) model
+parametrized with the sparse mixture prior on the effect sizes. The class inherits
+many of the methods and attributes from the VIPRS
class unchanged. However,
+there are many important updates and changes to the model, including the dimensionality
+of the arrays representing the variational parameters.
Details for the algorithm can be found in the Supplementary Material of the following paper:
+++ + +Zabad S, Gravel S, Li Y. Fast and accurate Bayesian polygenic risk modeling with variational inference. +Am J Hum Genet. 2023 May 4;110(5):741-761. doi: 10.1016/j.ajhg.2023.03.009. +Epub 2023 Apr 7. PMID: 37030289; PMCID: PMC10183379.
+
Attributes:
+Name | +Type | +Description | +
---|---|---|
K |
+ + | +
+
+
+ The number of causal (i.e. non-null) components in the mixture prior (minimum 1). When |
+
d |
+ + | +
+
+
+ Multiplier for the prior on the effect size (vector of size K). + |
+
viprs/model/VIPRSMix.py
11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 |
|
__init__(gdl, K=1, prior_multipliers=None, **kwargs)
+
+¶Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
gdl |
+ + | +
+
+
+ An instance of |
+ + required + | +
K |
+ + | +
+
+
+ The number of causal (i.e. non-null) components in the mixture prior (minimum 1). When |
+
+ 1
+ |
+
prior_multipliers |
+ + | +
+
+
+ Multiplier for the prior on the effect size (vector of size K). + |
+
+ None
+ |
+
kwargs |
+ + | +
+
+
+ Additional keyword arguments to pass to the VIPRS model. + |
+
+ {}
+ |
+
viprs/model/VIPRSMix.py
compute_eta()
+
+¶Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ The mean for the effect size under the variational posterior. + |
+
compute_pip()
+
+¶Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ The posterior inclusion probability + |
+
compute_zeta(sum_axis=1)
+
+¶Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ The expectation of the squared effect size under the variational posterior. + |
+
viprs/model/VIPRSMix.py
e_step()
+
+¶Run the E-Step of the Variational EM algorithm. +Here, we update the variational parameters for each variant using coordinate +ascent optimization techniques. The update equations are outlined in +the Supplementary Material of the following paper:
+++ +Zabad S, Gravel S, Li Y. Fast and accurate Bayesian polygenic risk modeling with variational inference. +Am J Hum Genet. 2023 May 4;110(5):741-761. doi: 10.1016/j.ajhg.2023.03.009. +Epub 2023 Apr 7. PMID: 37030289; PMCID: PMC10183379.
+
viprs/model/VIPRSMix.py
166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 |
|
get_average_effect_size_variance()
+
+¶Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ The average per-SNP variance for the prior mixture components + |
+
viprs/model/VIPRSMix.py
get_null_pi(chrom=None)
+
+¶Get the proportion of SNPs in the null component
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
chrom |
+ + | +
+
+
+ If provided, get the mixing proportion for the null component on a given chromosome. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ The value of the mixing proportion for the null component + |
+
viprs/model/VIPRSMix.py
get_proportion_causal()
+
+¶Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ The proportion of variants in the non-null components. + |
+
viprs/model/VIPRSMix.py
initialize_theta(theta_0=None)
+
+¶Initialize the global hyperparameters of the model
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
theta_0 |
+ + | +
+
+
+ A dictionary of initial values for the hyperparameters theta + |
+
+ None
+ |
+
viprs/model/VIPRSMix.py
64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 |
|
to_theta_table()
+
+¶Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ A |
+
viprs/model/VIPRSMix.py
update_pi()
+
+¶Update the prior mixing proportions pi
viprs/model/VIPRSMix.py
update_tau_beta()
+
+¶Update the prior precision (inverse variance) for the effect sizes, tau_beta
viprs/model/VIPRSMix.py
HyperparameterGrid
+
+
+¶
+ Bases: object
A utility class to facilitate generating grids for the
+hyperparameters of the standard VIPRS
models. It is designed to
+interface with models that operate on grids of hyperparameters,
+such as VIPRSGridSeach
and VIPRSBMA
. The hyperparameters for
+the standard VIPRS model are:
sigma_epsilon
: The residual variance for the phenotype.tau_beta
: The precision (inverse variance) of the prior for the effect sizes.pi
: The proportion of non-zero effect sizes (polygenicity).Attributes:
+Name | +Type | +Description | +
---|---|---|
sigma_epsilon |
+ + | +
+
+
+ A grid of values for the residual variance hyperparameter. + |
+
tau_beta |
+ + | +
+
+
+ A grid of values for the precision of the prior for the effect sizes. + |
+
pi |
+ + | +
+
+
+ A grid of values for the proportion of non-zero effect sizes. + |
+
h2_est |
+ + | +
+
+
+ An estimate of the heritability for the trait under consideration. + |
+
h2_se |
+ + | +
+
+
+ The standard error of the heritability estimate. + |
+
n_snps |
+ + | +
+
+
+ The number of common variants that may be relevant for this analysis. + |
+
viprs/model/gridsearch/HyperparameterGrid.py
6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 |
|
__init__(sigma_epsilon_grid=None, sigma_epsilon_steps=None, tau_beta_grid=None, tau_beta_steps=None, pi_grid=None, pi_steps=None, h2_est=None, h2_se=None, n_snps=1000000.0)
+
+¶Create a hyperparameter grid for the standard VIPRS model with the +spike-and-slab prior. The hyperparameters for this model are:
+sigma_epsilon
: The residual variancetau_beta
: The precision (inverse variance) of the prior for the effect sizespi
: The proportion of non-zero effect sizesFor each of these hyperparameters, we can provide a grid of values to search over. +If the heritability estimate and standard error (from e.g. LDSC) are provided, +we can generate grids for sigma_epsilon and tau_beta that are informed by these estimates.
+For each hyperparameter to be included in the grid, user must specify either the grid +itself, or the number of steps to use to generate the grid.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
sigma_epsilon_grid |
+ + | +
+
+
+ An array containing a grid of values for the sigma_epsilon hyperparameter. + |
+
+ None
+ |
+
sigma_epsilon_steps |
+ + | +
+
+
+ The number of steps for the sigma_epsilon grid + |
+
+ None
+ |
+
tau_beta_grid |
+ + | +
+
+
+ An array containing a grid of values for the tau_beta hyperparameter. + |
+
+ None
+ |
+
tau_beta_steps |
+ + | +
+
+
+ The number of steps for the tau_beta grid + |
+
+ None
+ |
+
pi_grid |
+ + | +
+
+
+ An array containing a grid of values for the pi hyperparameter + |
+
+ None
+ |
+
pi_steps |
+ + | +
+
+
+ The number of steps for the pi grid + |
+
+ None
+ |
+
h2_est |
+ + | +
+
+
+ An estimate of the heritability for the trait under consideration. If provided, we can generate grids for some of the hyperparameters that are consistent with this estimate. + |
+
+ None
+ |
+
h2_se |
+ + | +
+
+
+ The standard error of the heritability estimate. If provided, we can generate grids for some of the hyperparameters that are consistent with this estimate. + |
+
+ None
+ |
+
n_snps |
+ + | +
+
+
+ Number of common variants that may be relevant for this analysis. This estimate can be used to generate grids that are based on this number. + |
+
+ 1000000.0
+ |
+
viprs/model/gridsearch/HyperparameterGrid.py
combine_grids()
+
+¶Weave together the different hyperparameter grids and return a list of +dictionaries where the key is the hyperparameter name and the value is +value for that hyperparameter.
+ + +Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ A list of dictionaries containing the hyperparameter values. + |
+
Raises:
+Type | +Description | +
---|---|
+ ValueError
+ |
+
+
+
+ If all the grids are empty. + |
+
viprs/model/gridsearch/HyperparameterGrid.py
generate_pi_grid(steps=5)
+
+¶Generate a grid of values for the pi
(proportion of non-zero effect sizes) hyperparameter.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
steps |
+ + | +
+
+
+ The number of steps for the |
+
+ 5
+ |
+
viprs/model/gridsearch/HyperparameterGrid.py
generate_sigma_epsilon_grid(steps=5)
+
+¶Generate a grid of values for the sigma_epsilon
(residual variance) hyperparameter.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
steps |
+ + | +
+
+
+ The number of steps for the sigma_epsilon grid. + |
+
+ 5
+ |
+
viprs/model/gridsearch/HyperparameterGrid.py
generate_tau_beta_grid(steps=5)
+
+¶Generate a grid of values for the tau_beta
+(precision of the prior for the effect sizes) hyperparameter.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
steps |
+ + | +
+
+
+ The number of steps for the |
+
+ 5
+ |
+
viprs/model/gridsearch/HyperparameterGrid.py
to_table()
+
+¶Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ The hyperparameter grid as a pandas |
+
BMA
+
+
+¶
+ Bases: BayesPRSModel
Bayesian Model Averaging fitting procedure
+ +viprs/model/gridsearch/HyperparameterSearch.py
396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 +470 +471 +472 +473 +474 +475 +476 +477 +478 +479 +480 +481 +482 +483 +484 +485 +486 +487 +488 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +499 +500 +501 +502 +503 +504 +505 +506 +507 +508 +509 |
|
__init__(gdl, grid, model=None, normalization='softmax', verbose=False, n_jobs=1)
+
+¶Integrate out hyperparameters using Bayesian Model Averaging
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
gdl |
+ + | +
+
+
+ A GWADataLoader object + |
+ + required + | +
grid |
+ + | +
+
+
+ A HyperParameterGrid object + |
+ + required + | +
model |
+ + | +
+
+
+ A |
+
+ None
+ |
+
normalization |
+ + | +
+
+
+ The normalization scheme for the final ELBOs. Options are ( |
+
+ 'softmax'
+ |
+
verbose |
+ + | +
+
+
+ Detailed messages and print statements. + |
+
+ False
+ |
+
n_jobs |
+ + | +
+
+
+ The number of processes to use for the BMA + |
+
+ 1
+ |
+
viprs/model/gridsearch/HyperparameterSearch.py
BayesOpt
+
+
+¶
+ Bases: HyperparameterSearch
Hyperparameter search using Bayesian optimization
+ +viprs/model/gridsearch/HyperparameterSearch.py
191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 |
|
__init__(gdl, opt_params, param_bounds=None, model=None, criterion='ELBO', validation_gdl=None, verbose=False, n_jobs=1)
+
+¶Perform hyperparameter search using Bayesian optimization
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
gdl |
+ + | +
+
+
+ A GWADataLoader object + |
+ + required + | +
opt_params |
+ + | +
+
+
+ A list of the hyperparameters to optimize over (e.g. 'pi', 'sigma_epsilon', 'sigma_beta'). + |
+ + required + | +
param_bounds |
+ + | +
+
+
+ The bounds for each hyperparameter included in the optimization. A list of tuples, where each tuples records the (min, max) values for each hyperparameter. + |
+
+ None
+ |
+
model |
+ + | +
+
+
+ A |
+
+ None
+ |
+
criterion |
+ + | +
+
+
+ The objective function for the hyperparameter search (ELBO or validation). + |
+
+ 'ELBO'
+ |
+
validation_gdl |
+ + | +
+
+
+ If the objective is validation, provide the GWADataLoader object for the validation dataset. + |
+
+ None
+ |
+
verbose |
+ + | +
+
+
+ Detailed messages and print statements. + |
+
+ False
+ |
+
n_jobs |
+ + | +
+
+
+ The number of processes to use for the hyperparameters search (not applicable here). + |
+
+ 1
+ |
+
viprs/model/gridsearch/HyperparameterSearch.py
fit(max_iter=50, f_abs_tol=0.0001, n_calls=30, n_random_starts=5, acq_func='gp_hedge')
+
+¶Perform model fitting and hyperparameter search using Bayesian optimization.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
n_calls |
+ + | +
+
+
+ The number of model runs with different hyperparameter settings. + |
+
+ 30
+ |
+
n_random_starts |
+ + | +
+
+
+ The number of random starts to initialize the optimizer. + |
+
+ 5
+ |
+
acq_func |
+ + | +
+
+
+ The acquisition function (default: |
+
+ 'gp_hedge'
+ |
+
max_iter |
+ + | +
+
+
+ The maximum number of iterations within the search (default: 50). + |
+
+ 50
+ |
+
f_abs_tol |
+ + | +
+
+
+ The absolute tolerance for the objective (ELBO) within the search + |
+
+ 0.0001
+ |
+
viprs/model/gridsearch/HyperparameterSearch.py
239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 |
|
GridSearch
+
+
+¶
+ Bases: HyperparameterSearch
Hyperparameter search using Grid Search
+ +viprs/model/gridsearch/HyperparameterSearch.py
308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 |
|
__init__(gdl, grid, model=None, criterion='ELBO', validation_gdl=None, verbose=False, n_jobs=1)
+
+¶Perform hyperparameter search using grid search
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
gdl |
+ + | +
+
+
+ A GWADataLoader object + |
+ + required + | +
grid |
+ + | +
+
+
+ A HyperParameterGrid object + |
+ + required + | +
model |
+ + | +
+
+
+ A |
+
+ None
+ |
+
criterion |
+ + | +
+
+
+ The objective function for the grid search (ELBO or validation). + |
+
+ 'ELBO'
+ |
+
validation_gdl |
+ + | +
+
+
+ If the objective is validation, provide the GWADataLoader object for the validation dataset. + |
+
+ None
+ |
+
verbose |
+ + | +
+
+
+ Detailed messages and print statements. + |
+
+ False
+ |
+
n_jobs |
+ + | +
+
+
+ The number of processes to use for the grid search + |
+
+ 1
+ |
+
viprs/model/gridsearch/HyperparameterSearch.py
HyperparameterSearch
+
+
+¶
+ Bases: object
A generic class for performing hyperparameter search on the
+VIPRS
model. This interface is old and will likely be deprecated
+in future releases. It is recommended to use the VIPRSGrid
class
+and its derivatives for performing grid search instead.
viprs/model/gridsearch/HyperparameterSearch.py
34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 |
|
__init__(gdl, model=None, criterion='ELBO', validation_gdl=None, verbose=False, n_jobs=1)
+
+¶A generic hyperparameter search class that implements common functionalities +that may be required by hyperparameter search strategies.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
gdl |
+ + | +
+
+
+ A GWADataLoader object + |
+ + required + | +
model |
+ + | +
+
+
+ A |
+
+ None
+ |
+
criterion |
+ + | +
+
+
+ The objective function for the hyperparameter search. Options are: |
+
+ 'ELBO'
+ |
+
validation_gdl |
+ + | +
+
+
+ If the objective is validation, provide the GWADataLoader object for the validation dataset. + |
+
+ None
+ |
+
verbose |
+ + | +
+
+
+ Detailed messages and print statements. + |
+
+ False
+ |
+
n_jobs |
+ + | +
+
+
+ The number of processes to use for the hyperparameters search. + |
+
+ 1
+ |
+
viprs/model/gridsearch/HyperparameterSearch.py
multi_objective(models)
+
+¶This method evaluates multiple PRS models simultaneously. This can be faster for +some evaluation criteria, such as the validation R^2, because we only need to +multiply the inferred effect sizes with the genotype matrix only once.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
models |
+ + | +
+
+
+ A list of PRS models that we wish to evaluate. + |
+ + required + | +
viprs/model/gridsearch/HyperparameterSearch.py
objective(model)
+
+¶A method that takes the result of fitting the model
+and returns the desired objective (either ELBO
, pseudo_validation
, or validation
).
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
model |
+ + | +
+
+
+ The PRS model to evaluate + |
+ + required + | +
viprs/model/gridsearch/HyperparameterSearch.py
to_validation_table()
+
+¶Summarize the validation results in a pandas table.
+ +viprs/model/gridsearch/HyperparameterSearch.py
write_validation_result(v_filename, sep='\t')
+
+¶After performing hyperparameter search, write a table +that records that value of the objective for each combination +of hyperparameters.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
v_filename |
+ + | +
+
+
+ The filename for the validation table. + |
+ + required + | +
sep |
+ + | +
+
+
+ The separator for the validation table + |
+
+ '\t'
+ |
+
viprs/model/gridsearch/HyperparameterSearch.py
fit_model_fixed_params(params)
+
+¶Perform model fitting using a set of fixed parameters.
+This is a helper function to allow us to use the multiprocessing
module
+to fit PRS models in parallel.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
params |
+ + | +
+
+
+ A tuple of (BayesPRSModel, fixed parameters dictionary, and kwargs for the .fit() method). + |
+ + required + | +
viprs/model/gridsearch/HyperparameterSearch.py
VIPRSBMA
+
+
+¶
+ Bases: VIPRSGrid
The VIPRSBMA
class is an extension of the VIPRSGrid
class that
+implements Bayesian model averaging for the VIPRS
models in the grid.
+Bayesian model averaging is a technique that allows us to combine the
+results of multiple models by weighting them according to their evidence.
+In this context, we weigh the model by their final ELBO values.
For more details on the BMA procedure implemented here, refer to the +Supplementary material of:
+++ +Zabad S, Gravel S, Li Y. Fast and accurate Bayesian polygenic risk modeling with variational inference. +Am J Hum Genet. 2023 May 4;110(5):741-761. doi: 10.1016/j.ajhg.2023.03.009. +Epub 2023 Apr 7. PMID: 37030289; PMCID: PMC10183379.
+
viprs/model/gridsearch/VIPRSBMA.py
7 + 8 + 9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 |
|
__init__(gdl, grid, **kwargs)
+
+¶Initialize the VIPRSBMA
model.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
gdl |
+ + | +
+
+
+ An instance of |
+ + required + | +
grid |
+ + | +
+
+
+ An instance of |
+ + required + | +
kwargs |
+ + | +
+
+
+ Additional keyword arguments for the VIPRS model + |
+
+ {}
+ |
+
viprs/model/gridsearch/VIPRSBMA.py
average_models(normalization='softmax')
+
+¶Use Bayesian model averaging (BMA) to obtain final weights for each parameter. +We average the weights by using the final ELBO for each model.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
normalization |
+ + | +
+
+
+ The normalization scheme for the final ELBOs. Options are ( |
+
+ 'softmax'
+ |
+
Raises:
+Type | +Description | +
---|---|
+ KeyError
+ |
+
+
+
+ If the normalization scheme is not recognized. + |
+
viprs/model/gridsearch/VIPRSBMA.py
VIPRSGrid
+
+
+¶
+ Bases: VIPRS
A class to fit the VIPRS
model to data using a grid of hyperparameters.
+Instead of having a single set of hyperparameters, we simultaneously fit
+multiple models with different hyperparameters and compare their performance
+at the end. This class is generic and does not support any model selection or
+averaging schemes.
The class inherits all the basic attributes from the VIPRS class.
+See Also
+ +Attributes:
+Name | +Type | +Description | +
---|---|---|
grid_table |
+ + | +
+
+
+ A pandas table containing the hyperparameters for each model. + |
+
n_models |
+ + | +
+
+
+ The number of models to fit. + |
+
shapes |
+ + | +
+
+
+ A dictionary containing the shapes of the data matrices. + |
+
active_models |
+ + | +
+
+
+ A boolean array indicating which models are still active (i.e. not converged). + |
+
viprs/model/gridsearch/VIPRSGrid.py
13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 |
|
models_to_keep
+
+
+ property
+
+
+¶Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ A boolean array indicating which models have converged successfully. + |
+
__init__(gdl, grid, **kwargs)
+
+¶Initialize the VIPRS
model with a grid of hyperparameters.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
gdl |
+ + | +
+
+
+ An instance of |
+ + required + | +
grid |
+ + | +
+
+
+ An instance of |
+ + required + | +
kwargs |
+ + | +
+
+
+ Additional keyword arguments to pass to the parent |
+
+ {}
+ |
+
viprs/model/gridsearch/VIPRSGrid.py
e_step()
+
+¶Run the E-Step of the Variational EM algorithm. +Here, we update the variational parameters for each variant using coordinate +ascent optimization techniques. The coordinate ascent procedure is run on all the models +in the grid simultaneously. The update equations are outlined in +the Supplementary Material of the following paper:
+++ +Zabad S, Gravel S, Li Y. Fast and accurate Bayesian polygenic risk modeling with variational inference. +Am J Hum Genet. 2023 May 4;110(5):741-761. doi: 10.1016/j.ajhg.2023.03.009. +Epub 2023 Apr 7. PMID: 37030289; PMCID: PMC10183379.
+
viprs/model/gridsearch/VIPRSGrid.py
125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 |
|
fit(max_iter=1000, theta_0=None, param_0=None, continued=False, min_iter=3, f_abs_tol=1e-06, x_abs_tol=1e-07, drop_r_tol=1e-06, patience=5)
+
+¶A convenience method to fit all the models in the grid using the Variational EM algorithm.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
max_iter |
+ + | +
+
+
+ Maximum number of iterations. + |
+
+ 1000
+ |
+
theta_0 |
+ + | +
+
+
+ A dictionary of values to initialize the hyperparameters + |
+
+ None
+ |
+
param_0 |
+ + | +
+
+
+ A dictionary of values to initialize the variational parameters + |
+
+ None
+ |
+
continued |
+ + | +
+
+
+ If true, continue the model fitting for more iterations. + |
+
+ False
+ |
+
min_iter |
+ + | +
+
+
+ The minimum number of iterations to run before checking for convergence. + |
+
+ 3
+ |
+
f_abs_tol |
+ + | +
+
+
+ The absolute tolerance threshold for the objective (ELBO). + |
+
+ 1e-06
+ |
+
x_abs_tol |
+ + | +
+
+
+ The absolute tolerance threshold for the variational parameters. + |
+
+ 1e-07
+ |
+
drop_r_tol |
+ + | +
+
+
+ The relative tolerance for the drop in the ELBO to be considered as a red flag. It usually happens around convergence that the objective fluctuates due to numerical errors. This is a way to differentiate such random fluctuations from actual drops in the objective. + |
+
+ 1e-06
+ |
+
patience |
+ + | +
+
+
+ The maximum number of times the objective is allowed to drop before termination. + |
+
+ 5
+ |
+
viprs/model/gridsearch/VIPRSGrid.py
250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 |
|
init_optim_meta()
+
+¶Initialize the various quantities/objects to keep track of the optimization process. + This method initializes the "history" object (which keeps track of the objective + other + hyperparameters requested by the user), in addition to the OptimizeResult objects.
+ +viprs/model/gridsearch/VIPRSGrid.py
initialize_theta(theta_0=None)
+
+¶Initialize the global hyperparameters of the model.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
theta_0 |
+ + | +
+
+
+ A dictionary of initial values for the hyperparameters theta + |
+
+ None
+ |
+
viprs/model/gridsearch/VIPRSGrid.py
to_theta_table()
+
+¶Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ A |
+
viprs/model/gridsearch/VIPRSGrid.py
to_validation_table()
+
+¶Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ The validation table summarizing the performance of each model. + |
+
Raises:
+Type | +Description | +
---|---|
+ ValueError
+ |
+
+
+
+ if the validation result is not set. + |
+
viprs/model/gridsearch/VIPRSGrid.py
write_validation_result(v_filename, sep='\t')
+
+¶After performing hyperparameter search, write a table +that records that value of the objective for each combination +of hyperparameters.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
v_filename |
+ + | +
+
+
+ The filename for the validation table. + |
+ + required + | +
sep |
+ + | +
+
+
+ The separator for the validation table + |
+
+ '\t'
+ |
+
viprs/model/gridsearch/VIPRSGrid.py
VIPRSGridSearch
+
+
+¶
+ Bases: VIPRSGrid
The VIPRSGridSearch
class is an extension of the VIPRSGrid
class that
+implements grid search for the VIPRS
models. The grid search procedure
+fits multiple models to the data, each with different hyperparameters,
+and selects the best model based on user-defined criteria.
The criteria supported are:
+ELBO
: The model with the highest ELBO is selected.validation
: The model with the highest R^2 on the validation set is selected.pseudo_validation
: The model with the highest pseudo-validation R^2 is selected.Note that the validation
and pseudo_validation
criteria require the user to provide
+validation data in the form of paired genotype/phenotype data or external GWAS summary
+statistics.
viprs/model/gridsearch/VIPRSGridSearch.py
6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 |
|
__init__(gdl, grid, **kwargs)
+
+¶Initialize the VIPRSGridSearch
model.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
gdl |
+ + | +
+
+
+ An instance of |
+ + required + | +
grid |
+ + | +
+
+
+ An instance of |
+ + required + | +
kwargs |
+ + | +
+
+
+ Additional keyword arguments to pass to the parent |
+
+ {}
+ |
+
viprs/model/gridsearch/VIPRSGridSearch.py
select_best_model(validation_gdl=None, criterion='ELBO')
+
+¶From the grid of models that were fit to the data, select the best
+model according to the specified criterion
. If the criterion is the ELBO,
+the model with the highest ELBO will be selected. If the criterion is
+validation or pseudo-validation, the model with the highest R^2 on the
+validation set will be selected.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
validation_gdl |
+ + | +
+
+
+ An instance of |
+
+ None
+ |
+
criterion |
+ + | +
+
+
+ The criterion for selecting the best model. Options are: ( |
+
+ 'ELBO'
+ |
+
viprs/model/gridsearch/VIPRSGridSearch.py
39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 |
|
VIPRS
models.plot_history(prs_model, quantity=None)
+
+¶This function plots the optimization history for various model parameters and/or objectives. For +every iteration step, we generally save quantities such as the ELBO, the heritability, etc. For the purposes +of debugging and checking model convergence, it is useful to visually observe the trajectory +of these quantities as a function of training iteration.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
prs_model |
+ + | +
+
+
+ A |
+ + required + | +
quantity |
+ + | +
+
+
+ The quantities to plot (e.g. |
+
+ None
+ |
+
viprs/plot/diagnostics.py
OptimizeResult
+
+
+¶
+ Bases: object
A class to store the results/progress of an optimization algorithm.
+Similar to the OptimizeResult
class from scipy.optimize
,
+but with a few additional fields.
viprs/utils/OptimizeResult.py
iterations
+
+
+ property
+
+
+¶Return the number of iterations at its current value.
+objective
+
+
+ property
+
+
+¶Return the objective function value at its current value.
+reset()
+
+¶Reset the stored values to their initial state.
+ + +update(fun, stop_iteration=False, success=False, message=None, increment=True)
+
+¶Update the stored values with new values.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
fun |
+ + | +
+
+
+ The new objective function value + |
+ + required + | +
stop_iteration |
+ + | +
+
+
+ A flag to indicate whether the optimization algorithm has stopped iterating + |
+
+ False
+ |
+
success |
+ + | +
+
+
+ A flag to indicate whether the optimization algorithm has succeeded + |
+
+ False
+ |
+
message |
+ + | +
+
+
+ A detailed message about the optimization result. + |
+
+ None
+ |
+
increment |
+ + | +
+
+
+ A flag to indicate whether to increment the number of iterations. + |
+
+ True
+ |
+
viprs/utils/OptimizeResult.py
dict_concat(d, axis=0)
+
+¶Concatenate the values of a dictionary into a single vector
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
d |
+ + | +
+
+
+ A dictionary where values are numeric scalars or vectors + |
+ + required + | +
axis |
+ + | +
+
+
+ Concatenate along given axis. + |
+
+ 0
+ |
+
viprs/utils/compute_utils.py
dict_dot(d1, d2)
+
+¶Perform dot product on the elements of d1 and d2
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
d1 |
+ + | +
+
+
+ A dictionary where values are numeric scalars or vectors + |
+ + required + | +
d2 |
+ + | +
+
+
+ A dictionary where values are numeric scalars or vectors + |
+ + required + | +
viprs/utils/compute_utils.py
dict_elementwise_dot(d1, d2)
+
+¶Apply element-wise product between the values of two dictionaries
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
d1 |
+ + | +
+
+
+ A dictionary where values are numeric scalars or vectors + |
+ + required + | +
d2 |
+ + | +
+
+
+ A dictionary where values are numeric scalars or vectors + |
+ + required + | +
viprs/utils/compute_utils.py
dict_elementwise_transform(d, transform)
+
+¶Apply a transformation to values of a dictionary
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
d |
+ + | +
+
+
+ A dictionary where values are numeric scalars or vectors + |
+ + required + | +
transform |
+ + | +
+
+
+ A function to apply to + |
+ + required + | +
viprs/utils/compute_utils.py
dict_mean(d, axis=None)
+
+¶Estimate the mean of the values of a dictionary
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
d |
+ + | +
+
+
+ A dictionary where values are numeric scalars or vectors + |
+ + required + | +
axis |
+ + | +
+
+
+ Perform aggregation along given axis. + |
+
+ None
+ |
+
viprs/utils/compute_utils.py
dict_repeat(value, shapes)
+
+¶Given a value, create a dictionary where the value is repeated +according to the shapes parameter
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
shapes |
+ + | +
+
+
+ A dictionary of shapes. Key is arbitrary, value is integer input to np.repeat + |
+ + required + | +
value |
+ + | +
+
+
+ The value to repeat + |
+ + required + | +
viprs/utils/compute_utils.py
dict_set(d, value)
+
+¶Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
d |
+ + | +
+
+
+ A dictionary where values are numeric vectors + |
+ + required + | +
value |
+ + | +
+
+
+ A value to set for all vectors + |
+ + required + | +
dict_sum(d, axis=None, transform=None)
+
+¶Estimate the sum of the values of a dictionary
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
d |
+ + | +
+
+
+ A dictionary where values are numeric scalars or vectors + |
+ + required + | +
axis |
+ + | +
+
+
+ Perform aggregation along given axis. + |
+
+ None
+ |
+
transform |
+ + | +
+
+
+ Transformation to apply before summing. + |
+
+ None
+ |
+
viprs/utils/compute_utils.py
expand_column_names(c_name, shape, sep='_')
+
+¶Given a desired column name c_name
and a matrix shape
+that we'd like to apply the column name to, return a list of
+column names for every column in the matrix. The column names will be
+in the form of c_name
followed by an index, separated by sep
.
For example, if the column name is BETA
, the
+shape is (100, 3) and the separator is _
, we return a list with:
+[BETA_0
, BETA_1
, BETA_2
]
If the matrix in question is a vector, we just return the column name +without any indices appended to it.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
c_name |
+ + | +
+
+
+ A string object + |
+ + required + | +
shape |
+ + | +
+
+
+ The shape of a numpy matrix or vector + |
+ + required + | +
sep |
+ + | +
+
+
+ The separator + |
+
+ '_'
+ |
+
viprs/utils/compute_utils.py
fits_in_memory(alloc_size, max_prop=0.9)
+
+¶Check whether there's enough memory resources to load an object +with the given allocation size (in MB).
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
alloc_size |
+ + | +
+
+
+ The allocation size + |
+ + required + | +
max_prop |
+ + | +
+
+
+ The maximum proportion of available memory allowed for the object + |
+
+ 0.9
+ |
+
viprs/utils/compute_utils.py
download_ukb_wb_ld_matrix(target_dir='.', chromosome=None)
+
+¶Download the LD matrix for the White British samples in the UK Biobank.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
target_dir |
+ + | +
+
+
+ The path or directory where to store the LD matrix + |
+
+ '.'
+ |
+
chromosome |
+ + | +
+
+
+ An integer or list of integers with the chromosome numbers for which to download the LD matrices from Zenodo. + |
+
+ None
+ |
+
viprs/utils/data_utils.py