diff --git a/fence/config-default.yaml b/fence/config-default.yaml index a570989c0..d2c0ca217 100755 --- a/fence/config-default.yaml +++ b/fence/config-default.yaml @@ -558,7 +558,7 @@ dbGaP: # # If `parse_consent_code` is true, then a user will be given access to the exact # same consent codes in the child studies - parent_to_child_studies_mapping: + parent_to_child_studies_mapping: {} # 'phs001194': ['phs000571', 'phs001843'] # A consent of "c999" can indicate access to that study's "exchange area data" # and when a user has access to one study's exchange area data, they diff --git a/fence/config.py b/fence/config.py index d981bfd38..ea2be1443 100644 --- a/fence/config.py +++ b/fence/config.py @@ -1,3 +1,4 @@ +import collections import os from yaml import safe_load as yaml_load import urllib.parse @@ -6,6 +7,7 @@ from gen3config import Config from cdislogging import get_logger +from typing import List, Dict, Any logger = get_logger(__name__) @@ -150,28 +152,114 @@ def post_process(self): logger.warning( f"IdP '{idp_id}' is using multifactor_auth_claim_info '{mfa_info['claim']}', which is neither AMR or ACR. Unable to determine if a user used MFA. Fence will continue and assume they have not used MFA." ) + self._validate_dbgap_config(self._configs["dbGaP"]) - self._validate_parent_child_studies(self._configs["dbGaP"]) + @staticmethod + def _get_parent_studies_safely(dbgap_config): + """ + This will get a list of parent id's from the dbgap config's mapping property + or return and empty array if the entry doesn't exist + Args: + dbgap_config: { parent_to_child_studies_mapping: { k1: v1, ... } } + Returns: + [k1, k2, ...] + """ + study_mapping = dbgap_config.get('parent_to_child_studies_mapping', {}) + # study mapping could be 'None' + safe_studies = list(study_mapping.keys()) if isinstance(study_mapping, dict) else [] + return safe_studies + # TODO(fix-it): These functions should go in utils.py, but cannot be migrated until + # the variable DEFAULT_BACKOFF_SETTINGS is migrated here @staticmethod - def _validate_parent_child_studies(dbgap_configs): - if isinstance(dbgap_configs, list): - configs = dbgap_configs - else: - configs = [dbgap_configs] - - all_parent_studies = set() - for dbgap_config in configs: - parent_studies = dbgap_config.get( - "parent_to_child_studies_mapping", {} - ).keys() - conflicts = parent_studies & all_parent_studies - if len(conflicts) > 0: - raise Exception( - f"{conflicts} are duplicate parent study ids found in parent_to_child_studies_mapping for " - f"multiple dbGaP configurations." - ) - all_parent_studies.update(parent_studies) + def _some(predicate, iterable, found_nothing_value=None): + """ Returns the first element that satisfies the predicate, else fail value + Expects predicate to be a boolean lambda and iterable to be a collection + """ + for item in iterable: + result = predicate(item) + if result: + return item + return found_nothing_value + + @staticmethod + def _coerce_to_array(unknown_data_structure, exception_message="Unrecognized data structure, aborting!"): + """ + Given a dubious data structure, coerces it into a list + depending on the data type. + Currently, handles list, dict, and None + Args: + exception_message: any string for more context + unknown_data_structure: either a list, dict, or None; fails otherwise + + Returns: a list of some kind depending on the provided data structure + + note: fails if the unknown data structure is any other type + """ + # TODO: these three functions can be move out on their own when migrated to utils.py + identity = lambda v: v + wrap_in_array = lambda v: [v] + empty_array = lambda _: [] + # END TODO + data_is = lambda data_type: isinstance(unknown_data_structure, data_type) + type_to_coercion = { + list: identity, + dict: wrap_in_array, + type(None): empty_array, + } + data_type_of_parameter = FenceConfig._some(data_is, list(type_to_coercion.keys()), TypeError) + if data_type_of_parameter is TypeError: + raise ValueError(exception_message + f" | Type: {type(unknown_data_structure)}") + return type_to_coercion[data_type_of_parameter](unknown_data_structure) + + @staticmethod + def _find_duplicates(collection): + """ + + Args: + collection: any data type accepted by the Counter object + + Returns: + a set of items that were found more than once in the collection + + note: if collection is a dictionary, counter will treat it as a + duplicates mapping! + """ + item_with_occurrences = collections.Counter(collection) + duplicates = {item + for item, occurrences in item_with_occurrences.items() + if occurrences > 1} + return duplicates + + # END TODO + + + @staticmethod + def _validate_parent_child_studies(dbgap_configs: List[Dict[str, Any]]) -> None: + """ + Given a list of dictionaries that contain a parent -> child study mapping, + this function will check that all parents are unique and not duplicated + Args: + dbgap_configs: [ { parent_to_child_studies_mapping: { k1: v1, ... } }, ... ] + + Note: This function will throw if duplicates are found. + """ + safe_list_of_parent_studies = [] + for dbgap_config in dbgap_configs: + safe_parent_studies = FenceConfig._get_parent_studies_safely(dbgap_config) + safe_list_of_parent_studies.extend(safe_parent_studies) + + duplicates = FenceConfig._find_duplicates(safe_list_of_parent_studies) + if len(duplicates) > 0: + raise Exception( + f"{duplicates} are duplicate parent study ids found in parent_to_child_studies_mapping for " + f"multiple dbGaP configurations.") + + @staticmethod + def _validate_dbgap_config(dbgap_configs): + error_message = "The dbgap configuration is not a recognized data structure, aborting!" + corrected_dbgap_configs = FenceConfig._coerce_to_array(dbgap_configs, error_message) + FenceConfig._validate_parent_child_studies(corrected_dbgap_configs) config = FenceConfig(DEFAULT_CFG_PATH) diff --git a/fence/scripting/fence_create.py b/fence/scripting/fence_create.py index a4b15aff8..41271d411 100644 --- a/fence/scripting/fence_create.py +++ b/fence/scripting/fence_create.py @@ -57,10 +57,9 @@ from fence.config import config from fence.sync.sync_users import UserSyncer from fence.utils import ( - create_client, get_valid_expiration, generate_client_credentials, - get_SQLAlchemyDriver, + get_SQLAlchemyDriver, logger, ) from sqlalchemy.orm.attributes import flag_modified from gen3authz.client.arborist.client import ArboristClient @@ -1824,3 +1823,69 @@ def access_token_polling_job( with driver.session as db_session: loop = asyncio.get_event_loop() loop.run_until_complete(job.update_tokens(db_session)) + + +def create_client( + DB, + username=None, + urls=[], + name="", + description="", + auto_approve=False, + is_admin=False, + grant_types=None, + confidential=True, + arborist=None, + policies=None, + allowed_scopes=None, + expires_in=None, +): + client_id, client_secret, hashed_secret = generate_client_credentials(confidential) + if arborist is not None: + arborist.create_client(client_id, policies) + driver = get_SQLAlchemyDriver(DB) + auth_method = "client_secret_basic" if confidential else "none" + + allowed_scopes = allowed_scopes or config["CLIENT_ALLOWED_SCOPES"] + if not set(allowed_scopes).issubset(set(config["CLIENT_ALLOWED_SCOPES"])): + raise ValueError( + "Each allowed scope must be one of: {}".format( + config["CLIENT_ALLOWED_SCOPES"] + ) + ) + + if "openid" not in allowed_scopes: + allowed_scopes.append("openid") + logger.warning('Adding required "openid" scope to list of allowed scopes.') + + with driver.session as s: + user = None + if username: + user = query_for_user(session=s, username=username) + if not user: + user = User(username=username, is_admin=is_admin) + s.add(user) + + if s.query(Client).filter(Client.name == name).first(): + if arborist is not None: + arborist.delete_client(client_id) + raise Exception("client {} already exists".format(name)) + + client = Client( + client_id=client_id, + client_secret=hashed_secret, + user=user, + redirect_uris=urls, + allowed_scopes=" ".join(allowed_scopes), + description=description, + name=name, + auto_approve=auto_approve, + grant_types=grant_types, + is_confidential=confidential, + token_endpoint_auth_method=auth_method, + expires_in=expires_in, + ) + s.add(client) + s.commit() + + return client_id, client_secret diff --git a/fence/scripting/google_monitor.py b/fence/scripting/google_monitor.py index ad232519d..ae5be6b9e 100644 --- a/fence/scripting/google_monitor.py +++ b/fence/scripting/google_monitor.py @@ -7,6 +7,7 @@ """ import traceback +import requests from gen3cirrus.google_cloud.iam import GooglePolicyMember from gen3cirrus import GoogleCloudManager from gen3cirrus.google_cloud.errors import GoogleAPIError @@ -37,7 +38,7 @@ from fence import utils from fence.config import config from fence.models import User -from fence.errors import Unauthorized +from fence.errors import Unauthorized, NotFound logger = get_logger(__name__) @@ -445,6 +446,52 @@ def _get_user_email_list_from_google_project_with_owner_role(project_id): ) +def send_email(from_email, to_emails, subject, text, smtp_domain): + """ + Send email to group of emails using mail gun api. + + https://app.mailgun.com/ + + Args: + from_email(str): from email + to_emails(list): list of emails to receive the messages + text(str): the text message + smtp_domain(dict): smtp domain server + + { + "smtp_hostname": "smtp.mailgun.org", + "default_login": "postmaster@mailgun.planx-pla.net", + "api_url": "https://api.mailgun.net/v3/mailgun.planx-pla.net", + "smtp_password": "password", # pragma: allowlist secret + "api_key": "api key" # pragma: allowlist secret + } + + Returns: + Http response + + Exceptions: + KeyError + + """ + if smtp_domain not in config["GUN_MAIL"] or not config["GUN_MAIL"].get( + smtp_domain + ).get("smtp_password"): + raise NotFound( + "SMTP Domain '{}' does not exist in configuration for GUN_MAIL or " + "smtp_password was not provided. " + "Cannot send email.".format(smtp_domain) + ) + + api_key = config["GUN_MAIL"][smtp_domain].get("api_key", "") + email_url = config["GUN_MAIL"][smtp_domain].get("api_url", "") + "/messages" + + return requests.post( + email_url, + auth=("api", api_key), + data={"from": from_email, "to": to_emails, "subject": subject, "text": text}, + ) + + def _send_emails_informing_service_account_removal( to_emails, invalid_service_account_reasons, invalid_project_reasons, project_id ): diff --git a/fence/utils.py b/fence/utils.py index 463fb6f75..345ac1d35 100644 --- a/fence/utils.py +++ b/fence/utils.py @@ -1,3 +1,9 @@ +""" +This file is for functions that are generalized enough to be used in varied places that need not be related. +Functions placed here are/should be low level in terms of composition, and thus references to other modules +in this project should be used sparingly. +""" + import bcrypt import collections from functools import wraps @@ -6,17 +12,14 @@ from random import SystemRandom import re import string -import requests from urllib.parse import urlencode from urllib.parse import parse_qs, urlsplit, urlunsplit import sys from cdislogging import get_logger import flask -from werkzeug.datastructures import ImmutableMultiDict -from fence.models import Client, User, query_for_user -from fence.errors import NotFound, UserError +from fence.errors import UserError from fence.config import config from authlib.oauth2.rfc6749.util import scope_to_list from authlib.oauth2.rfc6749.errors import InvalidScopeError @@ -57,97 +60,6 @@ def generate_client_credentials(confidential): return client_id, client_secret, hashed_secret -def create_client( - DB, - username=None, - urls=[], - name="", - description="", - auto_approve=False, - is_admin=False, - grant_types=None, - confidential=True, - arborist=None, - policies=None, - allowed_scopes=None, - expires_in=None, -): - client_id, client_secret, hashed_secret = generate_client_credentials(confidential) - if arborist is not None: - arborist.create_client(client_id, policies) - driver = get_SQLAlchemyDriver(DB) - auth_method = "client_secret_basic" if confidential else "none" - - allowed_scopes = allowed_scopes or config["CLIENT_ALLOWED_SCOPES"] - if not set(allowed_scopes).issubset(set(config["CLIENT_ALLOWED_SCOPES"])): - raise ValueError( - "Each allowed scope must be one of: {}".format( - config["CLIENT_ALLOWED_SCOPES"] - ) - ) - - if "openid" not in allowed_scopes: - allowed_scopes.append("openid") - logger.warning('Adding required "openid" scope to list of allowed scopes.') - - with driver.session as s: - user = None - if username: - user = query_for_user(session=s, username=username) - if not user: - user = User(username=username, is_admin=is_admin) - s.add(user) - - if s.query(Client).filter(Client.name == name).first(): - if arborist is not None: - arborist.delete_client(client_id) - raise Exception("client {} already exists".format(name)) - - client = Client( - client_id=client_id, - client_secret=hashed_secret, - user=user, - redirect_uris=urls, - allowed_scopes=" ".join(allowed_scopes), - description=description, - name=name, - auto_approve=auto_approve, - grant_types=grant_types, - is_confidential=confidential, - token_endpoint_auth_method=auth_method, - expires_in=expires_in, - ) - s.add(client) - s.commit() - - return client_id, client_secret - - -def hash_secret(f): - @wraps(f) - def wrapper(*args, **kwargs): - has_secret = "client_secret" in flask.request.form - has_client_id = "client_id" in flask.request.form - if flask.request.form and has_secret and has_client_id: - form = flask.request.form.to_dict() - with flask.current_app.db.session as session: - client = ( - session.query(Client) - .filter(Client.client_id == form["client_id"]) - .first() - ) - if client: - form["client_secret"] = bcrypt.hashpw( - form["client_secret"].encode("utf-8"), - client.client_secret.encode("utf-8"), - ).decode("utf-8") - flask.request.form = ImmutableMultiDict(form) - - return f(*args, **kwargs) - - return wrapper - - def wrap_list_required(f): @wraps(f) def wrapper(d, *args, **kwargs): @@ -254,52 +166,6 @@ def split_url_and_query_params(url): return url, query_params -def send_email(from_email, to_emails, subject, text, smtp_domain): - """ - Send email to group of emails using mail gun api. - - https://app.mailgun.com/ - - Args: - from_email(str): from email - to_emails(list): list of emails to receive the messages - text(str): the text message - smtp_domain(dict): smtp domain server - - { - "smtp_hostname": "smtp.mailgun.org", - "default_login": "postmaster@mailgun.planx-pla.net", - "api_url": "https://api.mailgun.net/v3/mailgun.planx-pla.net", - "smtp_password": "password", # pragma: allowlist secret - "api_key": "api key" # pragma: allowlist secret - } - - Returns: - Http response - - Exceptions: - KeyError - - """ - if smtp_domain not in config["GUN_MAIL"] or not config["GUN_MAIL"].get( - smtp_domain - ).get("smtp_password"): - raise NotFound( - "SMTP Domain '{}' does not exist in configuration for GUN_MAIL or " - "smtp_password was not provided. " - "Cannot send email.".format(smtp_domain) - ) - - api_key = config["GUN_MAIL"][smtp_domain].get("api_key", "") - email_url = config["GUN_MAIL"][smtp_domain].get("api_url", "") + "/messages" - - return requests.post( - email_url, - auth=("api", api_key), - data={"from": from_email, "to": to_emails, "subject": subject, "text": text}, - ) - - def get_valid_expiration_from_request( expiry_param="expires_in", max_limit=None, default=None ): @@ -428,6 +294,8 @@ def get_SQLAlchemyDriver(db_conn_url): # Default settings to control usage of backoff library. +# TODO(fix-it): this variable should be moved to config.py to remove the reliance on the config +# module. Many files reference this property, so it should be handled in its own PR. DEFAULT_BACKOFF_SETTINGS = { "on_backoff": log_backoff_retry, "on_giveup": log_backoff_giveup, diff --git a/tests/scripting/test_fence-create.py b/tests/scripting/test_fence-create.py index deae90d34..4b1e439b6 100644 --- a/tests/scripting/test_fence-create.py +++ b/tests/scripting/test_fence-create.py @@ -6,13 +6,12 @@ import pytest import gen3cirrus -from gen3cirrus.google_cloud.errors import GoogleAuthError from userdatamodel.models import Group from fence.config import config from fence.errors import UserError from fence.jwt.validate import validate_jwt -from fence.utils import create_client, get_SQLAlchemyDriver +from fence.utils import get_SQLAlchemyDriver from fence.models import ( AccessPrivilege, Project, diff --git a/tests/test_app_config.py b/tests/test_app_config.py index ec7b7b8b7..8ee46ec84 100755 --- a/tests/test_app_config.py +++ b/tests/test_app_config.py @@ -120,6 +120,20 @@ def test_app_config(): patcher.stop() +def test_validate_parent_child_studies_against_none_config(): + """ + our dbgap config can have nothing for the parent_to_child_studies_mapping property + this test ensures the validator handles that case + """ + none_string_config = [{"parent_to_child_studies_mapping": 'None'}, + {"parent_to_child_studies_mapping": 'None'},] + + try: + FenceConfig._validate_parent_child_studies(none_string_config) + except Exception: + pytest.fail("Study validation failed when given 'None' mapping!") + + def test_app_config_parent_child_study_mapping(monkeypatch): invalid_dbgap_configs = [ { @@ -137,3 +151,22 @@ def test_app_config_parent_child_study_mapping(monkeypatch): ] with pytest.raises(Exception): FenceConfig._validate_parent_child_studies(invalid_dbgap_configs) + + valid_dbgap_configs = [ + { + "parent_to_child_studies_mapping": { + "phs001194": ["phs000571", "phs001843"], + "phs001193": ["phs000572", "phs001844"], + } + }, + { + "parent_to_child_studies_mapping": { + "phs001195": ["phs0015623"], + "phs001192": ["phs0001", "phs002"], + } + }, + ] + try: + FenceConfig._validate_parent_child_studies(valid_dbgap_configs) + except Exception: + pytest.fail("Study validation failed when it should have passed!") diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 000000000..93cb95972 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,46 @@ +import pytest + +from fence.config import FenceConfig + + +def test_find_duplicates(): + """ + Tests that the find duplicates function handles different kinds of duplicates cases + """ + dup_case = FenceConfig._find_duplicates([1, 1, 2]) + unique_case = FenceConfig._find_duplicates([1, 2, 3]) + tuple_dup_case = FenceConfig._find_duplicates((1, 1, 1, 2, 2)) + tuple_unique_case = FenceConfig._find_duplicates((1, 2, 3)) + assert (dup_case == {1} and unique_case == set() and + tuple_dup_case == {1, 2} and tuple_unique_case == set()) + + +def test_coerce_to_array(): + """ + Tests that we get arrays back in expected cases + """ + identity_case = FenceConfig._coerce_to_array([1, 2, 3]) + wrap_case = FenceConfig._coerce_to_array({"foo": "bar"}) + none_case = FenceConfig._coerce_to_array(None) + assert (identity_case == [1, 2, 3] and wrap_case == [{"foo": "bar"}] and + none_case == []) + + test_message = "foo" + with pytest.raises(ValueError): + bad_case = FenceConfig._coerce_to_array("uh oh") + with pytest.raises(ValueError): + bad_case_custom = FenceConfig._coerce_to_array("uh oh", test_message) + + +def test_some(): + """ + Tests that we get the first passing value out of a list in expected + use cases + """ + test_pred = lambda v: 0 < v < 2 + one_case = FenceConfig._some(test_pred, [3, 2, 1]) + two_case = FenceConfig._some(test_pred, [3, 2, 0.5, 2, 1]) + none_case = FenceConfig._some(test_pred, [0, 0, 0]) + custom_none_case = FenceConfig._some(test_pred, [0], "foo") + assert (one_case == 1 and two_case == 0.5 and + none_case is None and custom_none_case == "foo")