Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle 'None' string in fence config #1161

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
5dc1da9
adding default empty dict
AlbertSnows Jul 18, 2024
67dc533
adding sanity happy path test
AlbertSnows Jul 18, 2024
19384ad
redesigning code to handle none case without getting too much more co…
AlbertSnows Jul 18, 2024
9032344
gave a better name
AlbertSnows Jul 18, 2024
fdc937c
adding typing
AlbertSnows Jul 18, 2024
ed05a13
adding more state handling
AlbertSnows Jul 18, 2024
7c1ffe4
Trigger GitHub Actions workflow
AlbertSnows Jul 22, 2024
9ee8a78
Testing
k-burt-uch Jul 18, 2024
174424f
fix(PXP-11364): Fix failing dbgap sync unit test
k-burt-uch Jul 22, 2024
532dcc5
Trigger GitHub Actions workflow
AlbertSnows Jul 22, 2024
48ee3e0
Merge branch 'master' into fix/handle_none_string_v2
AlbertSnows Jul 23, 2024
8c4a9b7
moving to separate unit test
AlbertSnows Jul 23, 2024
5e51743
Merge branch 'master' into fix/handle_none_string_v2
AlbertSnows Jul 23, 2024
491140f
Merge branch 'master' into fix/handle_none_string_v2
AlbertSnows Jul 31, 2024
d9bc79f
Merge branch 'master' into fix/handle_none_string_v2
AlbertSnows Aug 7, 2024
a06ecaf
more documentation
AlbertSnows Aug 9, 2024
65edbd9
Merge branch 'master' into fix/handle_none_string_v2
Avantol13 Aug 9, 2024
2b63f57
more documentation
AlbertSnows Aug 9, 2024
95db2db
Merge remote-tracking branch 'origin/fix/handle_none_string_v2' into …
AlbertSnows Aug 12, 2024
8e60f67
cleaning up references in test fence create
AlbertSnows Aug 12, 2024
528feac
Merge branch 'master' into fix/handle_none_string_v2
AlbertSnows Aug 12, 2024
06cebff
documentation + minor rename
AlbertSnows Aug 12, 2024
af9c5b1
fix name, add description
AlbertSnows Sep 3, 2024
e2d5fd7
Merge branch 'master' into fix/handle_none_string_v2
AlbertSnows Sep 3, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion fence/config-default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,7 @@ dbGaP:
#
# If `parse_consent_code` is true, then a user will be given access to the exact
# same consent codes in the child studies
parent_to_child_studies_mapping:
parent_to_child_studies_mapping: {}
# 'phs001194': ['phs000571', 'phs001843']
# A consent of "c999" can indicate access to that study's "exchange area data"
# and when a user has access to one study's exchange area data, they
Expand Down
84 changes: 67 additions & 17 deletions fence/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from gen3config import Config

from cdislogging import get_logger
from collections import Counter
from typing import List, Dict, Any

logger = get_logger(__name__)

Expand Down Expand Up @@ -150,28 +152,76 @@ def post_process(self):
logger.warning(
f"IdP '{idp_id}' is using multifactor_auth_claim_info '{mfa_info['claim']}', which is neither AMR or ACR. Unable to determine if a user used MFA. Fence will continue and assume they have not used MFA."
)
self._validate_dbgap_config(self._configs["dbGaP"])

self._validate_parent_child_studies(self._configs["dbGaP"])
@staticmethod
def find_duplicates(collection):
AlbertSnows marked this conversation as resolved.
Show resolved Hide resolved
AlbertSnows marked this conversation as resolved.
Show resolved Hide resolved
"""

Args:
collection: any data type accepted by the Counter object

Returns:
a set of items that were found more than once in the collection

note: if collection is a set, this function will never find duplicates
(per the definition of a set)
"""
item_with_occurrences = Counter(collection)
duplicates = {item
for item, occurrences in item_with_occurrences.items()
if occurrences > 1}
return duplicates

@staticmethod
def get_parent_studies_safely(dbgap_config):
"""
AlbertSnows marked this conversation as resolved.
Show resolved Hide resolved
Args:
dbgap_config: { parent_to_child_studies_mapping: { k1: v1, ... } }
Returns:
[k1, k2, ...]
"""
study_mapping = dbgap_config.get('parent_to_child_studies_mapping', {})
# study mapping could be 'None'
safe_studies = list(study_mapping.keys()) if isinstance(study_mapping, dict) else []
return safe_studies

@staticmethod
def _validate_parent_child_studies(dbgap_configs):
def validate_parent_child_studies(dbgap_configs: List[Dict[str, Any]]) -> None:
AlbertSnows marked this conversation as resolved.
Show resolved Hide resolved
"""
Given a list of dictionaries that contain a parent -> child study mapping,
this function will check that all parents are unique and not duplicated
Args:
dbgap_configs: [ { parent_to_child_studies_mapping: { k1: v1, ... } }, ... ]

Returns:
Nothing. This function will throw if duplicates are found.
AlbertSnows marked this conversation as resolved.
Show resolved Hide resolved
"""
safe_list_of_parent_studies = []
for dbgap_config in dbgap_configs:
safe_parent_studies = FenceConfig.get_parent_studies_safely(dbgap_config)
safe_list_of_parent_studies.extend(safe_parent_studies)

duplicates = FenceConfig.find_duplicates(safe_list_of_parent_studies)
if len(duplicates) > 0:
raise Exception(
f"{duplicates} are duplicate parent study ids found in parent_to_child_studies_mapping for "
f"multiple dbGaP configurations.")

@staticmethod
def enforce_is_array(dbgap_configs):
AlbertSnows marked this conversation as resolved.
Show resolved Hide resolved
if isinstance(dbgap_configs, list):
configs = dbgap_configs
corrected_dbgap_configs = dbgap_configs
elif isinstance(dbgap_configs, dict):
corrected_dbgap_configs = [dbgap_configs]
else:
configs = [dbgap_configs]

all_parent_studies = set()
for dbgap_config in configs:
parent_studies = dbgap_config.get(
"parent_to_child_studies_mapping", {}
).keys()
conflicts = parent_studies & all_parent_studies
if len(conflicts) > 0:
raise Exception(
f"{conflicts} are duplicate parent study ids found in parent_to_child_studies_mapping for "
f"multiple dbGaP configurations."
)
all_parent_studies.update(parent_studies)
raise Exception("The dbgap configuration is not a recognized data structure, aborting!")
return corrected_dbgap_configs

@staticmethod
def _validate_dbgap_config(dbgap_configs):
corrected_dbgap_configs = FenceConfig.enforce_is_array(dbgap_configs)
FenceConfig.validate_parent_child_studies(corrected_dbgap_configs)


config = FenceConfig(DEFAULT_CFG_PATH)
35 changes: 34 additions & 1 deletion tests/test_app_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,20 @@ def test_app_config():
patcher.stop()


def test_validate_parent_child_studies_against_none_config():
AlbertSnows marked this conversation as resolved.
Show resolved Hide resolved
"""
our dbgap config can have nothing for the parent_to_child_studies_mapping property
this test ensures the validator handles that case
"""
none_string_config = [{"parent_to_child_studies_mapping": 'None'},
{"parent_to_child_studies_mapping": 'None'},]

try:
FenceConfig.validate_parent_child_studies(none_string_config)
except Exception:
pytest.fail("Study validation failed when given 'None' mapping!")
AlbertSnows marked this conversation as resolved.
Show resolved Hide resolved


def test_app_config_parent_child_study_mapping(monkeypatch):
invalid_dbgap_configs = [
{
Expand All @@ -136,4 +150,23 @@ def test_app_config_parent_child_study_mapping(monkeypatch):
},
]
with pytest.raises(Exception):
FenceConfig._validate_parent_child_studies(invalid_dbgap_configs)
FenceConfig.validate_parent_child_studies(invalid_dbgap_configs)

valid_dbgap_configs = [
{
"parent_to_child_studies_mapping": {
"phs001194": ["phs000571", "phs001843"],
"phs001193": ["phs000572", "phs001844"],
}
},
{
"parent_to_child_studies_mapping": {
"phs001195": ["phs0015623"],
"phs001192": ["phs0001", "phs002"],
}
},
]
try:
FenceConfig.validate_parent_child_studies(valid_dbgap_configs)
except Exception:
pytest.fail("Study validation failed when it should have passed!")
AlbertSnows marked this conversation as resolved.
Show resolved Hide resolved
Loading