Improve phenotype analysis result sanity checks.

monarch-initiative · Sep 23, 2024 · 8a9dc2c · 8a9dc2c
1 parent d8b3974
commit 8a9dc2c
Show file tree

Hide file tree

Showing 2 changed files with 55 additions and 12 deletions.
diff --git a/src/gpsea/analysis/pcats/_impl.py b/src/gpsea/analysis/pcats/_impl.py
@@ -309,8 +309,37 @@ def __init__(
         self._corrected_pvals = (
             None if corrected_pvals is None else tuple(corrected_pvals)
         )
-        assert isinstance(gt_predicate, GenotypePolyPredicate)
         self._gt_predicate = gt_predicate
+        errors = self._check_sanity()
+        if errors:
+            raise ValueError(os.linesep.join(errors))
+
+    def _check_sanity(self) -> typing.Sequence[str]:
+        errors = []
+        # All sequences must have the same lengths ...
+        for seq, name in (
+            (self._n_usable, 'n_usable'),
+            (self._all_counts, 'all_counts'),
+            (self._pvals, 'pvals'),
+        ):
+            if len(self._pheno_predicates) != len(seq):
+                errors.append(
+                    f"`len(pheno_predicates)` must be the same as `len({name})` but "
+                    f"{len(self._pheno_predicates)}!={len(seq)}"
+                )
+
+        # ... including the optional corrected p values
+        if self._corrected_pvals is not None and len(self._pheno_predicates) != len(self._corrected_pvals):
+            errors.append(
+                f"`len(pheno_predicates)` must be the same as `len(corrected_pvals)` but "
+                f"{len(self._pheno_predicates)}!={len(self._corrected_pvals)}"
+            )
+
+        if not isinstance(self._gt_predicate, GenotypePolyPredicate):
+            errors.append(
+                "`gt_predicate` must be an instance of `GenotypePolyPredicate`"
+            )
+        return errors
 
     @property
     def gt_predicate(self) -> GenotypePolyPredicate:
@@ -428,6 +457,19 @@ def __init__(
         self._mtc_filter_name = mtc_filter_name
         self._mtc_filter_results = tuple(mtc_filter_results)
         self._mtc_name = mtc_name
+
+        errors = self._check_hpo_result_sanity()
+        if errors:
+            raise ValueError(os.linesep.join(errors))
+
+    def _check_hpo_result_sanity(self) -> typing.Sequence[str]:
+        errors = []
+        if len(self._pheno_predicates) != len(self._mtc_filter_results):
+            errors.append(
+                f"`len(pheno_predicates)` must be the same as `len(mtc_filter_results)` but "
+                f"{len(self._pheno_predicates)}!={len(self._mtc_filter_results)}"
+            )
+        return errors
 
     @property
     def mtc_filter_name(self) -> str:

diff --git a/tests/analysis/pcats/test_hpo_term_analysis.py b/tests/analysis/pcats/test_hpo_term_analysis.py
@@ -1,6 +1,7 @@
 import typing
 
 import hpotk
+import numpy as np
 import pytest
 
 from gpsea.model import Cohort
@@ -64,7 +65,7 @@ def test_compare_genotype_vs_phenotypes(
                 0.48571428571428565,
                 float("nan"),
                 0.1048951048951049,
-                1.,
+                1.0,
             ],
             nan_ok=True,
         )
@@ -80,21 +81,21 @@ def test_compare_genotype_vs_phenotypes(
             nan_ok=True,
         )
 
-    def test_compare_genotype_vs_phenotypes_explodes_if_no_phenotypes_are_left_after_mtc_filter(
+    def test_compare_genotype_vs_phenotypes_can_handle_if_no_phenotypes_are_left_after_mtc_filter(
         self,
         analysis: HpoTermAnalysis,
         degenerated_cohort: Cohort,
         suox_gt_predicate: GenotypePolyPredicate,
         suox_pheno_predicates: typing.Sequence[PhenotypePolyPredicate[hpotk.TermId]],
     ):
-        with pytest.raises(ValueError) as e:
-            analysis.compare_genotype_vs_phenotypes(
-                cohort=degenerated_cohort,
-                gt_predicate=suox_gt_predicate,
-                pheno_predicates=suox_pheno_predicates,
-            )
+        result = analysis.compare_genotype_vs_phenotypes(
+            cohort=degenerated_cohort,
+            gt_predicate=suox_gt_predicate,
+            pheno_predicates=suox_pheno_predicates,
+        )
 
         assert (
-            e.value.args[0]
-            == "No phenotypes are left for the analysis after MTC filtering step"
-        )
+            result.total_tests == 0
+        ), "No tests should have been done due to MTC filtering"
+        assert np.all(np.isnan(result.pvals)), "All p values should be NaN"
+        assert result.corrected_pvals is None