diff --git a/api/bin/create_erds.py b/api/bin/create_erds.py index 11e12b1b7..117c2f822 100755 --- a/api/bin/create_erds.py +++ b/api/bin/create_erds.py @@ -12,7 +12,7 @@ import src.db.models.staging.opportunity as staging_opportunity_models import src.db.models.staging.synopsis as staging_synopsis_models import src.logging -from src.db.models import opportunity_models +from src.db.models import agency_models, opportunity_models from src.db.models.transfer import topportunity_models logger = logging.getLogger(__name__) @@ -23,7 +23,10 @@ ERD_FOLDER = pathlib.Path(__file__).parent.resolve() # If we want to generate separate files for more specific groups, we can set that up here -API_MODULES = (opportunity_models,) +API_MODULES = ( + opportunity_models, + agency_models, +) STAGING_TABLE_MODULES = ( staging_opportunity_models, staging_forecast_models, diff --git a/api/src/adapters/search/opensearch_client.py b/api/src/adapters/search/opensearch_client.py index cb97a9c8c..24d3bdb1d 100644 --- a/api/src/adapters/search/opensearch_client.py +++ b/api/src/adapters/search/opensearch_client.py @@ -1,5 +1,5 @@ import logging -from typing import Any, Sequence +from typing import Any, Generator, Iterable import opensearchpy @@ -75,7 +75,7 @@ def delete_index(self, index_name: str) -> None: def bulk_upsert( self, index_name: str, - records: Sequence[dict[str, Any]], + records: Iterable[dict[str, Any]], primary_key_field: str, *, refresh: bool = True @@ -103,10 +103,51 @@ def bulk_upsert( logger.info( "Upserting records to %s", index_name, - extra={"index_name": index_name, "record_count": int(len(bulk_operations) / 2)}, + extra={ + "index_name": index_name, + "record_count": int(len(bulk_operations) / 2), + "operation": "update", + }, ) self._client.bulk(index=index_name, body=bulk_operations, refresh=refresh) + def bulk_delete(self, index_name: str, ids: Iterable[Any], *, refresh: bool = True) -> None: + """ + Bulk delete records from an index + + See: https://opensearch.org/docs/latest/api-reference/document-apis/bulk/ for details. + In this method, we delete records based on the IDs passed in. + """ + bulk_operations = [] + + for _id in ids: + # { "delete": { "_id": "tt2229499" } } + bulk_operations.append({"delete": {"_id": _id}}) + + logger.info( + "Deleting records from %s", + index_name, + extra={ + "index_name": index_name, + "record_count": len(bulk_operations), + "operation": "delete", + }, + ) + self._client.bulk(index=index_name, body=bulk_operations, refresh=refresh) + + def index_exists(self, index_name: str) -> bool: + """ + Check if an index OR alias exists by a given name + """ + return self._client.indices.exists(index_name) + + def alias_exists(self, alias_name: str) -> bool: + """ + Check if an alias exists + """ + existing_index_mapping = self._client.cat.aliases(alias_name, format="json") + return len(existing_index_mapping) > 0 + def swap_alias_index( self, index_name: str, alias_name: str, *, delete_prior_indexes: bool = False ) -> None: @@ -144,11 +185,71 @@ def search_raw(self, index_name: str, search_query: dict) -> dict: return self._client.search(index=index_name, body=search_query) def search( - self, index_name: str, search_query: dict, include_scores: bool = True + self, + index_name: str, + search_query: dict, + include_scores: bool = True, + params: dict | None = None, ) -> SearchResponse: - response = self._client.search(index=index_name, body=search_query) + if params is None: + params = {} + + response = self._client.search(index=index_name, body=search_query, params=params) return SearchResponse.from_opensearch_response(response, include_scores) + def scroll( + self, + index_name: str, + search_query: dict, + include_scores: bool = True, + duration: str = "10m", + ) -> Generator[SearchResponse, None, None]: + """ + Scroll (iterate) over a large result set a given search query. + + This query uses additional resources to keep the response open, but + keeps a consistent set of results and is useful for backend processes + that need to fetch a large amount of search data. After processing the results, + the scroll lock is closed for you. + + This method is setup as a generator method and the results can be iterated over:: + + for response in search_client.scroll("my_index", {"size": 10000}): + for record in response.records: + process_record(record) + + + See: https://opensearch.org/docs/latest/api-reference/scroll/ + """ + + # start scroll + response = self.search( + index_name=index_name, + search_query=search_query, + include_scores=include_scores, + params={"scroll": duration}, + ) + scroll_id = response.scroll_id + + yield response + + # iterate + while True: + raw_response = self._client.scroll({"scroll_id": scroll_id, "scroll": duration}) + response = SearchResponse.from_opensearch_response(raw_response, include_scores) + + # The scroll ID can change between queries according to the docs, so we + # keep updating the value while iterating in case they change. + scroll_id = response.scroll_id + + if len(response.records) == 0: + break + + yield response + + # close scroll + self._client.clear_scroll(scroll_id=scroll_id) + def _get_connection_parameters(opensearch_config: OpensearchConfig) -> dict[str, Any]: # TODO - we'll want to add the AWS connection params here when we set that up diff --git a/api/src/adapters/search/opensearch_response.py b/api/src/adapters/search/opensearch_response.py index c8bb16cb6..a54c6ecc7 100644 --- a/api/src/adapters/search/opensearch_response.py +++ b/api/src/adapters/search/opensearch_response.py @@ -10,6 +10,8 @@ class SearchResponse: aggregations: dict[str, dict[str, int]] + scroll_id: str | None + @classmethod def from_opensearch_response( cls, raw_json: dict[str, typing.Any], include_scores: bool = True @@ -40,6 +42,8 @@ def from_opensearch_response( ] } """ + scroll_id = raw_json.get("_scroll_id", None) + hits = raw_json.get("hits", {}) hits_total = hits.get("total", {}) total_records = hits_total.get("value", 0) @@ -59,7 +63,7 @@ def from_opensearch_response( raw_aggs: dict[str, dict[str, typing.Any]] = raw_json.get("aggregations", {}) aggregations = _parse_aggregations(raw_aggs) - return cls(total_records, records, aggregations) + return cls(total_records, records, aggregations, scroll_id) def _parse_aggregations( diff --git a/api/src/api/opportunities_v1/opportunity_schemas.py b/api/src/api/opportunities_v1/opportunity_schemas.py index 9de3bf40d..6366321ff 100644 --- a/api/src/api/opportunities_v1/opportunity_schemas.py +++ b/api/src/api/opportunities_v1/opportunity_schemas.py @@ -337,38 +337,34 @@ class OpportunitySearchFilterV1Schema(Schema): ) expected_number_of_awards = fields.Nested( IntegerSearchSchemaBuilder("ExpectedNumberAwardsFilterV1Schema") - .with_minimum_value(example=0) - .with_maximum_value(example=25) + .with_integer_range(min_example=0, max_example=25) .build() ) award_floor = fields.Nested( IntegerSearchSchemaBuilder("AwardFloorFilterV1Schema") - .with_minimum_value(example=0) - .with_maximum_value(example=10_000) + .with_integer_range(min_example=0, max_example=10_000) .build() ) award_ceiling = fields.Nested( IntegerSearchSchemaBuilder("AwardCeilingFilterV1Schema") - .with_minimum_value(example=0) - .with_maximum_value(example=10_000_000) + .with_integer_range(min_example=0, max_example=10_000_000) .build() ) estimated_total_program_funding = fields.Nested( IntegerSearchSchemaBuilder("EstimatedTotalProgramFundingFilterV1Schema") - .with_minimum_value(example=0) - .with_maximum_value(example=10_000_000) + .with_integer_range(min_example=0, max_example=10_000_000) .build() ) post_date = fields.Nested( - DateSearchSchemaBuilder("PostDateFilterV1Schema").with_start_date().with_end_date().build() + DateSearchSchemaBuilder("PostDateFilterV1Schema").with_date_range().build() ) close_date = fields.Nested( - DateSearchSchemaBuilder("CloseDateFilterV1Schema").with_start_date().with_end_date().build() + DateSearchSchemaBuilder("CloseDateFilterV1Schema").with_date_range().build() ) diff --git a/api/src/api/schemas/search_schema.py b/api/src/api/schemas/search_schema.py index 35be5a1b6..0bd5acf99 100644 --- a/api/src/api/schemas/search_schema.py +++ b/api/src/api/schemas/search_schema.py @@ -1,5 +1,5 @@ from enum import StrEnum -from typing import Any, Pattern, Type +from typing import Any, Callable, Pattern, Type from marshmallow import ValidationError, validates_schema @@ -37,8 +37,9 @@ def validates_non_empty(self, data: dict, **kwargs: Any) -> None: class BaseSearchSchemaBuilder: def __init__(self, schema_class_name: str): + # schema fields are the fields and functions of the class + self.schema_fields: dict[str, fields.MixinField | Callable[..., Any]] = {} # The schema class name is used on the endpoint - self.schema_fields: dict[str, fields.MixinField] = {} self.schema_class_name = schema_class_name def build(self) -> Schema: @@ -147,13 +148,23 @@ class IntegerSearchSchemaBuilder(BaseSearchSchemaBuilder): class OpportunitySearchFilterSchema(Schema): example_int_field = fields.Nested( IntegerSearchSchemaBuilder("ExampleIntFieldSchema") - .with_minimum_value(example=1) - .with_maximum_value(example=25) + .with_integer_range(min_example=1, max_example=25) .build() ) """ - def with_minimum_value( + def with_integer_range( + self, + min_example: int | None = None, + max_example: int | None = None, + positive_only: bool = True, + ) -> "IntegerSearchSchemaBuilder": + self._with_minimum_value(min_example, positive_only) + self._with_maximum_value(max_example, positive_only) + self._with_int_range_validator() + return self + + def _with_minimum_value( self, example: int | None = None, positive_only: bool = True ) -> "IntegerSearchSchemaBuilder": metadata = {} @@ -169,7 +180,7 @@ def with_minimum_value( ) return self - def with_maximum_value( + def _with_maximum_value( self, example: int | None = None, positive_only: bool = True ) -> "IntegerSearchSchemaBuilder": metadata = {} @@ -185,6 +196,28 @@ def with_maximum_value( ) return self + def _with_int_range_validator(self) -> "IntegerSearchSchemaBuilder": + # Define a schema validator function that we'll use to define any + # rules that go across fields in the validation + @validates_schema + def validate_int_range(_: Any, data: dict, **kwargs: Any) -> None: + min_value = data.get("min", None) + max_value = data.get("max", None) + + # Error if min and max value are None (either explicitly set, or because they are missing) + if min_value is None and max_value is None: + raise ValidationError( + [ + MarshmallowErrorContainer( + ValidationErrorType.REQUIRED, + "At least one of min or max must be provided.", + ) + ] + ) + + self.schema_fields["validate_int_range"] = validate_int_range + return self + class BoolSearchSchemaBuilder(BaseSearchSchemaBuilder): """ @@ -250,30 +283,38 @@ class DateSearchSchemaBuilder(BaseSearchSchemaBuilder): Usage:: # In a search request schema, you would use it like so: - example_start_date_field = fields.Nested( - DateSearchSchemaBuilder("ExampleStartDateFieldSchema") - .with_start_date() - .build() - ) - - example_end_date_field = fields.Nested( - DateSearchSchemaBuilder("ExampleEndDateFieldSchema") - .with_end_date() - .build() - ) - example_startend_date_field = fields.Nested( DateSearchSchemaBuilder("ExampleStartEndDateFieldSchema") - .with_start_date() - .with_end_date() + .with_date_range() .build() ) """ - def with_start_date(self) -> "DateSearchSchemaBuilder": + def with_date_range(self) -> "DateSearchSchemaBuilder": self.schema_fields["start_date"] = fields.Date(allow_none=True) + self.schema_fields["end_date"] = fields.Date(allow_none=True) + self._with_date_range_validator() + return self - def with_end_date(self) -> "DateSearchSchemaBuilder": - self.schema_fields["end_date"] = fields.Date(allow_none=True) + def _with_date_range_validator(self) -> "DateSearchSchemaBuilder": + # Define a schema validator function that we'll use to define any + # rules that go across fields in the validation + @validates_schema + def validate_date_range(_: Any, data: dict, **kwargs: Any) -> None: + start_date = data.get("start_date", None) + end_date = data.get("end_date", None) + + # Error if start and end date are None (either explicitly set, or because they are missing) + if start_date is None and end_date is None: + raise ValidationError( + [ + MarshmallowErrorContainer( + ValidationErrorType.REQUIRED, + "At least one of start_date or end_date must be provided.", + ) + ] + ) + + self.schema_fields["validate_date_range"] = validate_date_range return self diff --git a/api/src/constants/lookup_constants.py b/api/src/constants/lookup_constants.py index a33135705..f686f95f2 100644 --- a/api/src/constants/lookup_constants.py +++ b/api/src/constants/lookup_constants.py @@ -105,3 +105,14 @@ class FundingInstrument(StrEnum): GRANT = "grant" # G PROCUREMENT_CONTRACT = "procurement_contract" # PC OTHER = "other" # O + + +class AgencyDownloadFileType(StrEnum): + XML = "xml" + PDF = "pdf" + + +class AgencySubmissionNotificationSetting(StrEnum): + NEVER = "never" + FIRST_APPLICATION_ONLY = "first_application_only" + ALWAYS = "always" diff --git a/api/src/data_migration/transformation/subtask/transform_agency.py b/api/src/data_migration/transformation/subtask/transform_agency.py new file mode 100644 index 000000000..b7414901f --- /dev/null +++ b/api/src/data_migration/transformation/subtask/transform_agency.py @@ -0,0 +1,422 @@ +import logging +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any + +from pydantic import Field +from sqlalchemy import select +from sqlalchemy.orm import selectinload + +import src.data_migration.transformation.transform_constants as transform_constants +import src.data_migration.transformation.transform_util as transform_util +from src.constants.lookup_constants import ( + AgencyDownloadFileType, + AgencySubmissionNotificationSetting, +) +from src.data_migration.transformation.subtask.abstract_transform_subtask import ( + AbstractTransformSubTask, +) +from src.db.models.agency_models import Agency, AgencyContactInfo, LinkAgencyDownloadFileType +from src.db.models.staging.tgroups import Tgroups +from src.task.task import Task +from src.util.env_config import PydanticBaseEnvConfig + +logger = logging.getLogger(__name__) + +NULLABLE_FIELDS = { + "AgencyCode", # Note this is the sub_agency_code in our system + "AgencyContactEMail2", +} + +AGENCY_FIELD_MAP = { + "AgencyName": "agency_name", + "AgencyCode": "sub_agency_code", + "AgencyCFDA": "assistance_listing_number", + "AgencyDownload": "agency_download_file_types", + "AgencyNotify": "agency_submission_notification_setting", + "ldapGp": "ldap_group", + "description": "description", + "label": "label", + "multilevel": "is_multilevel_agency", + "HasS2SCert": "has_system_to_system_certificate", + "ViewPkgsInGracePeriod": "can_view_packages_in_grace_period", + "multiproject": "is_multiproject", + "ImageWS": "is_image_workspace_enabled", + "ValidationWS": "is_validation_workspace_enabled", +} + +AGENCY_CONTACT_INFO_FIELD_MAP = { + "AgencyContactName": "contact_name", + "AgencyContactAddress1": "address_line_1", + "AgencyContactAddress2": "address_line_2", + "AgencyContactCity": "city", + "AgencyContactState": "state", + "AgencyContactZipCode": "zip_code", + "AgencyContactTelephone": "phone_number", + "AgencyContactEMail": "primary_email", + "AgencyContactEMail2": "secondary_email", +} + +NOT_MAPPED_FIELDS = { + "AgencyEnroll", + "ForecastPOC", + "ForecastPOCEmail", + "ForecastPOCEmailDesc", + "ForecastPOCPhone", + "SynopsisPOC", + "SynopsisPOCEmail", + "SynopsisPOCEmailDesc", + "PackagePOC", + # These fields were only found in the test environment + "ASSISTCompatible", + "SAMValidation", +} + +REQUIRED_FIELDS = { + "AgencyName", + "AgencyCFDA", + "AgencyDownload", + "AgencyNotify", + "ldapGp", + "description", + "label", + "AgencyContactName", + "AgencyContactAddress1", + "AgencyContactCity", + "AgencyContactState", + "AgencyContactZipCode", + "AgencyContactTelephone", + "AgencyContactEMail", +} + + +class AgencyConfig(PydanticBaseEnvConfig): + # TODO - we might want to put this somewhere more central + # as we might want to filter these out in other places + test_agency_config: set[str] = Field( + default={"GDIT", "IVV", "IVPDF", "0001", "FGLT", "NGMS", "NGMS-Sub1", "SECSCAN"} + ) + + +@dataclass +class TgroupAgency: + """ + Container class for holding all tgroup records for + a given agency. + """ + + agency_code: str + tgroups: list[Tgroups] = field(default_factory=list) + + has_update: bool = False + + def add_tgroup(self, tgroup: Tgroups) -> None: + if tgroup.transformed_at is None: + self.has_update = True + + self.tgroups.append(tgroup) + + def get_updated_field_names(self) -> set[str]: + return {tgroup.get_field_name() for tgroup in self.tgroups if tgroup.transformed_at is None} + + +@dataclass +class AgencyUpdates: + """ + Container class for holding all of the necessary updates + for an agency + """ + + agency_updates: dict[str, Any] = field(default_factory=dict) + agency_contact_info_updates: dict[str, Any] = field(default_factory=dict) + agency_download_file_types: set[AgencyDownloadFileType] = field(default_factory=set) + + agency_created_at: datetime | None = None + agency_updated_at: datetime | None = None + + +class TransformAgency(AbstractTransformSubTask): + def __init__(self, task: Task, agency_config: AgencyConfig | None = None) -> None: + super().__init__(task) + + if agency_config is None: + agency_config = AgencyConfig() + + self.agency_config = agency_config + + def transform_records(self) -> None: + # fetch tgroup records + tgroup_map = self.fetch_tgroup_mapping() + + # Fetch all existing agencies + agency_map = self.fetch_agency_mapping() + + for agency_code, tgroup_agency in tgroup_map.items(): + agency = agency_map.get(agency_code) + + try: + self.process_tgroups(tgroup_agency, agency) + except ValueError: + self.increment( + transform_constants.Metrics.TOTAL_ERROR_COUNT, + prefix=transform_constants.AGENCY, + ) + logger.exception("Failed to process agency", extra={"agency_code": agency_code}) + + def fetch_tgroup_mapping(self) -> dict[str, TgroupAgency]: + tgroups = self.db_session.scalars(select(Tgroups)) + + tgroup_mapping: dict[str, TgroupAgency] = {} + + for tgroup in tgroups: + agency_code = tgroup.get_agency_code() + + if agency_code not in tgroup_mapping: + tgroup_mapping[agency_code] = TgroupAgency(agency_code) + + tgroup_mapping[agency_code].add_tgroup(tgroup) + + return tgroup_mapping + + def fetch_agency_mapping(self) -> dict[str, Agency]: + agencies = self.db_session.scalars( + select(Agency).options(selectinload(Agency.agency_contact_info)) + ) + + return {agency.agency_code: agency for agency in agencies} + + def process_tgroups(self, tgroup_agency: TgroupAgency, agency: Agency | None) -> None: + log_extra = {"agency_code": tgroup_agency.agency_code} + logger.info("Processing agency", extra=log_extra) + if not tgroup_agency.has_update: + logger.info("No updates for agency", extra=log_extra) + return + + # Only increment counter for agencies with something to update + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_PROCESSED, prefix=transform_constants.AGENCY + ) + + # New agency insert case + is_insert = False + if agency is None: + is_insert = True + # If any field that is required for creating an agency is missing, we want to error + missing_required_fields = REQUIRED_FIELDS - tgroup_agency.get_updated_field_names() + if missing_required_fields: + raise ValueError( + "Cannot create agency %s as required fields are missing: %s" + % (tgroup_agency.agency_code, ",".join(missing_required_fields)) + ) + + logger.info("Creating new agency", extra=log_extra) + agency = Agency(agency_code=tgroup_agency.agency_code) + agency.agency_contact_info = AgencyContactInfo() + else: + logger.info("Updating agency", extra=log_extra) + + updates = get_agency_updates(tgroup_agency) + apply_updates( + agency, updates.agency_updates, updates.agency_created_at, updates.agency_updated_at + ) + apply_updates( + agency.agency_contact_info, + updates.agency_contact_info_updates, + updates.agency_created_at, + updates.agency_updated_at, + ) + self.update_agency_download_file_types(agency, updates.agency_download_file_types) + + # Set whether the agency is a test agency based on the config + is_test_agency = tgroup_agency.agency_code in self.agency_config.test_agency_config + agency.is_test_agency = is_test_agency + + # After we have fully updated the agency, set the transformed_at timestamp + # for all tgroup records that weren't already set. + for tgroup in tgroup_agency.tgroups: + if tgroup.transformed_at is None: + tgroup.transformed_at = self.transform_time + + if is_insert: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_INSERTED, + prefix=transform_constants.AGENCY, + ) + else: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_UPDATED, prefix=transform_constants.AGENCY + ) + + self.db_session.add(agency) + logger.info("Processed agency", extra=log_extra) + + def update_agency_download_file_types( + self, agency: Agency, agency_download_file_types: set[AgencyDownloadFileType] + ) -> None: + # If the download file types we have set is already the same, just return + if agency.agency_download_file_types == agency_download_file_types: + return + + file_types_to_delete = set(agency.agency_download_file_types) - agency_download_file_types + file_types_to_add = agency_download_file_types - set(agency.agency_download_file_types) + + for link_agency_download_file_type in agency.link_agency_download_file_types: + if link_agency_download_file_type.agency_download_file_type in file_types_to_delete: + self.db_session.delete(link_agency_download_file_type) + + for file_type_to_add in file_types_to_add: + self.db_session.add( + LinkAgencyDownloadFileType( + agency=agency, agency_download_file_type=file_type_to_add + ) + ) + + +############################ +# Transformation / utility functions +############################ + +AGENCY_DOWNLOAD_FILE_TYPE_MAP = { + "0": set(), + "1": {AgencyDownloadFileType.XML}, + "2": {AgencyDownloadFileType.XML, AgencyDownloadFileType.PDF}, + "3": {AgencyDownloadFileType.PDF}, +} + +AGENCY_SUBMISSION_NOTIFICATION_SETTING_MAP = { + "1": AgencySubmissionNotificationSetting.NEVER, + "2": AgencySubmissionNotificationSetting.FIRST_APPLICATION_ONLY, + "3": AgencySubmissionNotificationSetting.ALWAYS, +} + + +def get_agency_updates(tgroup_agency: TgroupAgency) -> AgencyUpdates: + updates = AgencyUpdates() + + for tgroup in tgroup_agency.tgroups: + if not tgroup.is_modified: + continue + + tgroup_field_name = tgroup.get_field_name() + + # TODO - how we want to actually handle deleted rows likely needs more investigation + # and discussion - do we assume that if certain fields are deleted that the + # entire agency should be deleted? Can they even be deleted once an opportunity refers to them? + # Rather than focus too much on that detail right now, I'm deferring + # a more thorough investigation to later + # For now - we'll error any agency that has deleted rows except for a few + # specific fields we know are safe to delete. + if tgroup.is_deleted: + if tgroup_field_name not in NULLABLE_FIELDS: + raise ValueError( + "Field %s in tgroups cannot be deleted as it is not nullable" + % tgroup_field_name + ) + value = None + else: + value = convert_field_values(tgroup_field_name, tgroup.value) + + if tgroup_field_name == "AgencyDownload": + updates.agency_download_file_types = value # type: ignore[assignment] + + elif tgroup_field_name in AGENCY_FIELD_MAP: + field_name = AGENCY_FIELD_MAP[tgroup_field_name] + updates.agency_updates[field_name] = value + + elif tgroup_field_name in AGENCY_CONTACT_INFO_FIELD_MAP: + field_name = AGENCY_CONTACT_INFO_FIELD_MAP[tgroup_field_name] + updates.agency_contact_info_updates[field_name] = value + + elif tgroup_field_name in NOT_MAPPED_FIELDS: + logger.info( + "Skipping processing of field %s for %s", + tgroup_field_name, + tgroup_agency.agency_code, + ) + continue + + else: + raise ValueError("Unknown tgroups agency field %s" % tgroup_field_name) + + # We effectively need to merge the created_at/updated_at timestamps to the earliest/latest respectively + created_at, updated_at = transform_util.get_create_update_timestamps( + tgroup.created_date, tgroup.last_upd_date + ) + + if updates.agency_created_at is None or created_at < updates.agency_created_at: + updates.agency_created_at = created_at + + if updates.agency_updated_at is None or updated_at > updates.agency_updated_at: + updates.agency_updated_at = updated_at + + return updates + + +def convert_field_values(field_name: str, value: str | None) -> Any: + if field_name == "AgencyDownload": + return transform_agency_download_file_types(value) + elif field_name == "AgencyNotify": + return transform_agency_notify(value) + elif field_name == "multilevel": + return transform_util.convert_true_false_bool(value) + elif field_name == "HasS2SCert": + return transform_util.convert_yn_bool(value) + elif field_name == "multiproject": + return transform_util.convert_yn_bool(value) + elif field_name == "ViewPkgsInGracePeriod": + return transform_util.convert_yn_bool(value) + elif field_name == "ImageWS": + return transform_util.convert_yn_bool(value) + elif field_name == "ValidationWS": + return transform_util.convert_yn_bool(value) + elif field_name == "AgencyContactAddress2": + return transform_util.convert_null_like_to_none(value) + + return value + + +def transform_agency_download_file_types(value: str | None) -> set[AgencyDownloadFileType]: + if value not in AGENCY_DOWNLOAD_FILE_TYPE_MAP: + raise ValueError("Unrecognized agency download file type value %s" % value) + + return AGENCY_DOWNLOAD_FILE_TYPE_MAP[value] + + +def transform_agency_notify(value: str | None) -> AgencySubmissionNotificationSetting: + if value not in AGENCY_SUBMISSION_NOTIFICATION_SETTING_MAP: + raise ValueError("Unrecognized agency notify setting value: %s" % value) + + return AGENCY_SUBMISSION_NOTIFICATION_SETTING_MAP[value] + + +def apply_updates( + record: Agency | AgencyContactInfo | None, + updates: dict[str, Any], + created_at: datetime | None, + updated_at: datetime | None, +) -> None: + # Note MyPy doesn't quite follow the typing in this function because it thinks + # created_at/updated_at aren't ever None. While they aren't ever null in the DB, + # before we insert a record they may not be set. Hence the type:ignores here + + if record is None: + # This shouldn't happen but need to make mypy happy because agency contact info + # can technically be null + raise ValueError("Cannot pass none value into apply_updates") + + for field_name, value in updates.items(): + setattr(record, field_name, value) + + # We will only set created_at if the value doesn't already exist on the record + # It would be confusing to change the created_at timestamp after the initial insert + if record.created_at is None and created_at is not None: # type: ignore[unreachable] + record.created_at = created_at # type: ignore[unreachable] + + # Updated at we'll either set if the value currently is null (ie. we're doing an insert) + # or if it is greater than whatever already exists. + if record.updated_at is None and updated_at is not None: # type: ignore[unreachable] + record.updated_at = updated_at # type: ignore[unreachable] + elif ( + record.updated_at is not None and updated_at is not None and record.updated_at < updated_at + ): + record.updated_at = updated_at diff --git a/api/src/data_migration/transformation/transform_constants.py b/api/src/data_migration/transformation/transform_constants.py index 9d50e2069..16d023fbd 100644 --- a/api/src/data_migration/transformation/transform_constants.py +++ b/api/src/data_migration/transformation/transform_constants.py @@ -34,6 +34,7 @@ APPLICANT_TYPE = "applicant_type" FUNDING_CATEGORY = "funding_category" FUNDING_INSTRUMENT = "funding_instrument" +AGENCY = "agency" class Metrics(StrEnum): diff --git a/api/src/data_migration/transformation/transform_oracle_data_task.py b/api/src/data_migration/transformation/transform_oracle_data_task.py index ed5f33a3c..b7ce8e0fd 100644 --- a/api/src/data_migration/transformation/transform_oracle_data_task.py +++ b/api/src/data_migration/transformation/transform_oracle_data_task.py @@ -5,6 +5,7 @@ import src.data_migration.transformation.transform_constants as transform_constants from src.adapters import db +from src.data_migration.transformation.subtask.transform_agency import TransformAgency from src.data_migration.transformation.subtask.transform_applicant_type import ( TransformApplicantType, ) @@ -37,6 +38,7 @@ class TransformOracleDataTaskConfig(PydanticBaseEnvConfig): enable_applicant_type: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_APPLICANT_TYPE enable_funding_category: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_FUNDING_CATEGORY enable_funding_instrument: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_FUNDING_INSTRUMENT + enable_agency: bool = False # TRANSFORM_ORACLE_DATA_ENABLE_AGENCY class TransformOracleDataTask(Task): @@ -76,3 +78,6 @@ def run_task(self) -> None: if self.transform_config.enable_funding_instrument: TransformFundingInstrument(self).run() + + if self.transform_config.enable_agency: + TransformAgency(self).run() diff --git a/api/src/data_migration/transformation/transform_util.py b/api/src/data_migration/transformation/transform_util.py index d8bf58a1b..216134a4c 100644 --- a/api/src/data_migration/transformation/transform_util.py +++ b/api/src/data_migration/transformation/transform_util.py @@ -1,5 +1,6 @@ import logging from datetime import datetime +from typing import Tuple from src.constants.lookup_constants import ( ApplicantType, @@ -377,38 +378,47 @@ def convert_est_timestamp_to_utc(timestamp: datetime | None) -> datetime | None: return datetime_util.adjust_timezone(aware_timestamp, "UTC") -def transform_update_create_timestamp( - source: StagingBase, target: TimestampMixin, log_extra: dict | None = None -) -> None: - # Convert the source timestamps to UTC - # Note: the type ignores are because created_date/last_upd_date are added - # on the individual class definitions, not the base class - due to how - # we need to maintain the column order of the legacy system. - # Every legacy table does have these columns. - created_timestamp = convert_est_timestamp_to_utc(source.created_date) # type: ignore[attr-defined] - updated_timestamp = convert_est_timestamp_to_utc(source.last_upd_date) # type: ignore[attr-defined] +def get_create_update_timestamps( + source_created_date: datetime | None, + source_last_upd_date: datetime | None, + log_extra: dict | None = None, +) -> Tuple[datetime, datetime]: + created_timestamp = convert_est_timestamp_to_utc(source_created_date) + updated_timestamp = convert_est_timestamp_to_utc(source_last_upd_date) - if created_timestamp is not None: - target.created_at = created_timestamp - else: - # This is incredibly rare, but possible - because our system requires - # we set something, we'll default to the current time and log a warning. + # This is incredibly rare, but possible - because our system requires + # we set something, we'll default to the current time and log a warning. + if created_timestamp is None: if log_extra is None: log_extra = {} logger.warning( - f"{source.__class__} does not have a created_date timestamp set, setting value to now.", + "Record does not have a created_date timestamp set, assuming value to be now.", extra=log_extra, ) - target.created_at = datetime_util.utcnow() + created_timestamp = datetime_util.utcnow() - if updated_timestamp is not None: - target.updated_at = updated_timestamp - else: + if updated_timestamp is None: # In the legacy system, they don't set whether something was updated # until it receives an update. We always set the value, and on initial insert # want it to be the same as the created_at. - target.updated_at = target.created_at + updated_timestamp = created_timestamp + + return created_timestamp, updated_timestamp + + +def transform_update_create_timestamp( + source: StagingBase, target: TimestampMixin, log_extra: dict | None = None +) -> None: + # Convert the source timestamps to UTC + # Note: the type ignores are because created_date/last_upd_date are added + # on the individual class definitions, not the base class - due to how + # we need to maintain the column order of the legacy system. + # Every legacy table does have these columns. + created_timestamp, updated_timestamp = get_create_update_timestamps(source.created_date, source.last_upd_date, log_extra) # type: ignore[attr-defined] + + target.created_at = created_timestamp + target.updated_at = updated_timestamp TRUTHY = {"Y", "Yes"} @@ -431,6 +441,23 @@ def convert_yn_bool(value: str | None) -> bool | None: raise ValueError("Unexpected Y/N bool value: %s" % value) +def convert_true_false_bool(value: str | None) -> bool | None: + if value is None or value == "": + return None + + return value == "TRUE" + + +def convert_null_like_to_none(value: str | None) -> str | None: + if value is None: + return None + + if value.lower() == "null": + return None + + return value + + def convert_action_type_to_is_deleted(value: str | None) -> bool: # Action type can be U (update) or D (delete) # however many older records seem to not have this set at all diff --git a/api/src/db/foreign/__init__.py b/api/src/db/foreign/__init__.py index 9d55582c3..7406213dc 100644 --- a/api/src/db/foreign/__init__.py +++ b/api/src/db/foreign/__init__.py @@ -2,8 +2,8 @@ # SQLAlchemy models for foreign tables. # -from . import forecast, foreignbase, opportunity, synopsis +from . import forecast, foreignbase, opportunity, synopsis, tgroups metadata = foreignbase.metadata -__all__ = ["metadata", "forecast", "opportunity", "synopsis"] +__all__ = ["metadata", "forecast", "opportunity", "synopsis", "tgroups"] diff --git a/api/src/db/foreign/tgroups.py b/api/src/db/foreign/tgroups.py new file mode 100644 index 000000000..029f90724 --- /dev/null +++ b/api/src/db/foreign/tgroups.py @@ -0,0 +1,14 @@ +# +# SQLAlchemy models for foreign tables. +# +# The order of the columns must match the remote Oracle database. The names are not required to +# match by oracle_fdw, but we are matching them for maintainability. +# + +from src.db.legacy_mixin import tgroups_mixin + +from . import foreignbase + + +class Tgroups(foreignbase.ForeignBase, tgroups_mixin.TGroupsMixin): + __tablename__ = "tgroups" diff --git a/api/src/db/legacy_mixin/tgroups_mixin.py b/api/src/db/legacy_mixin/tgroups_mixin.py new file mode 100644 index 000000000..026cd4029 --- /dev/null +++ b/api/src/db/legacy_mixin/tgroups_mixin.py @@ -0,0 +1,20 @@ +# +# SQLAlchemy models for foreign tables. +# +# The order of the columns must match the remote Oracle database. The names are not required to +# match by oracle_fdw, but we are matching them for maintainability. +# + +import datetime + +from sqlalchemy.orm import Mapped, declarative_mixin, mapped_column + + +@declarative_mixin +class TGroupsMixin: + keyfield: Mapped[str] = mapped_column(primary_key=True) + value: Mapped[str | None] + created_date: Mapped[datetime.datetime | None] + last_upd_date: Mapped[datetime.datetime | None] + creator_id: Mapped[str | None] + last_upd_id: Mapped[str | None] diff --git a/api/src/db/migrations/versions/2024_07_08_add_agency_related_tables.py b/api/src/db/migrations/versions/2024_07_08_add_agency_related_tables.py new file mode 100644 index 000000000..404c5ced1 --- /dev/null +++ b/api/src/db/migrations/versions/2024_07_08_add_agency_related_tables.py @@ -0,0 +1,223 @@ +"""Add agency related tables + +Revision ID: 4f7acbb61548 +Revises: 61c58638e56b +Create Date: 2024-07-08 12:43:45.240782 + +""" +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = "4f7acbb61548" +down_revision = "61c58638e56b" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "agency_contact_info", + sa.Column("agency_contact_info_id", sa.BigInteger(), nullable=False), + sa.Column("contact_name", sa.Text(), nullable=False), + sa.Column("address_line_1", sa.Text(), nullable=False), + sa.Column("address_line_2", sa.Text(), nullable=True), + sa.Column("city", sa.Text(), nullable=False), + sa.Column("state", sa.Text(), nullable=False), + sa.Column("zip_code", sa.Text(), nullable=False), + sa.Column("phone_number", sa.Text(), nullable=False), + sa.Column("primary_email", sa.Text(), nullable=False), + sa.Column("secondary_email", sa.Text(), nullable=True), + sa.Column( + "created_at", + sa.TIMESTAMP(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.Column( + "updated_at", + sa.TIMESTAMP(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.PrimaryKeyConstraint("agency_contact_info_id", name=op.f("agency_contact_info_pkey")), + schema="api", + ) + op.create_table( + "lk_agency_download_file_type", + sa.Column("agency_download_file_type_id", sa.Integer(), nullable=False), + sa.Column("description", sa.Text(), nullable=False), + sa.Column( + "created_at", + sa.TIMESTAMP(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.Column( + "updated_at", + sa.TIMESTAMP(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.PrimaryKeyConstraint( + "agency_download_file_type_id", name=op.f("lk_agency_download_file_type_pkey") + ), + schema="api", + ) + op.create_table( + "lk_agency_submission_notification_setting", + sa.Column("agency_submission_notification_setting_id", sa.Integer(), nullable=False), + sa.Column("description", sa.Text(), nullable=False), + sa.Column( + "created_at", + sa.TIMESTAMP(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.Column( + "updated_at", + sa.TIMESTAMP(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.PrimaryKeyConstraint( + "agency_submission_notification_setting_id", + name=op.f("lk_agency_submission_notification_setting_pkey"), + ), + schema="api", + ) + op.create_table( + "agency", + sa.Column("agency_id", sa.BigInteger(), nullable=False), + sa.Column("agency_name", sa.Text(), nullable=False), + sa.Column("agency_code", sa.Text(), nullable=False), + sa.Column("sub_agency_code", sa.Text(), nullable=True), + sa.Column("assistance_listing_number", sa.Text(), nullable=False), + sa.Column("agency_submission_notification_setting_id", sa.Integer(), nullable=False), + sa.Column("agency_contact_info_id", sa.BigInteger(), nullable=True), + sa.Column("is_test_agency", sa.Boolean(), nullable=False), + sa.Column("ldap_group", sa.Text(), nullable=False), + sa.Column("description", sa.Text(), nullable=False), + sa.Column("label", sa.Text(), nullable=False), + sa.Column("is_multilevel_agency", sa.Boolean(), nullable=False), + sa.Column("is_multiproject", sa.Boolean(), nullable=False), + sa.Column("has_system_to_system_certificate", sa.Boolean(), nullable=False), + sa.Column("can_view_packages_in_grace_period", sa.Boolean(), nullable=False), + sa.Column("is_image_workspace_enabled", sa.Boolean(), nullable=False), + sa.Column("is_validation_workspace_enabled", sa.Boolean(), nullable=False), + sa.Column( + "created_at", + sa.TIMESTAMP(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.Column( + "updated_at", + sa.TIMESTAMP(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.ForeignKeyConstraint( + ["agency_contact_info_id"], + ["api.agency_contact_info.agency_contact_info_id"], + name=op.f("agency_agency_contact_info_id_agency_contact_info_fkey"), + ), + sa.ForeignKeyConstraint( + ["agency_submission_notification_setting_id"], + [ + "api.lk_agency_submission_notification_setting.agency_submission_notification_setting_id" + ], + name=op.f( + "agency_agency_submission_notification_setting_id_lk_agency_submission_notification_setting_fkey" + ), + ), + sa.PrimaryKeyConstraint("agency_id", name=op.f("agency_pkey")), + schema="api", + ) + op.create_index( + op.f("agency_agency_code_idx"), "agency", ["agency_code"], unique=True, schema="api" + ) + op.create_table( + "link_agency_download_file_type", + sa.Column("agency_id", sa.BigInteger(), nullable=False), + sa.Column("agency_download_file_type_id", sa.Integer(), nullable=False), + sa.Column( + "created_at", + sa.TIMESTAMP(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.Column( + "updated_at", + sa.TIMESTAMP(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.ForeignKeyConstraint( + ["agency_download_file_type_id"], + ["api.lk_agency_download_file_type.agency_download_file_type_id"], + name=op.f( + "link_agency_download_file_type_agency_download_file_type_id_lk_agency_download_file_type_fkey" + ), + ), + sa.ForeignKeyConstraint( + ["agency_id"], + ["api.agency.agency_id"], + name=op.f("link_agency_download_file_type_agency_id_agency_fkey"), + ), + sa.PrimaryKeyConstraint( + "agency_id", + "agency_download_file_type_id", + name=op.f("link_agency_download_file_type_pkey"), + ), + schema="api", + ) + op.create_table( + "tgroups", + sa.Column("keyfield", sa.Text(), nullable=False), + sa.Column("value", sa.Text(), nullable=True), + sa.Column("created_date", sa.TIMESTAMP(timezone=True), nullable=True), + sa.Column("last_upd_date", sa.TIMESTAMP(timezone=True), nullable=True), + sa.Column("creator_id", sa.Text(), nullable=True), + sa.Column("last_upd_id", sa.Text(), nullable=True), + sa.Column("is_deleted", sa.Boolean(), nullable=False), + sa.Column("transformed_at", sa.TIMESTAMP(timezone=True), nullable=True), + sa.Column( + "created_at", + sa.TIMESTAMP(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.Column( + "updated_at", + sa.TIMESTAMP(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.Column("deleted_at", sa.TIMESTAMP(timezone=True), nullable=True), + sa.Column("transformation_notes", sa.Text(), nullable=True), + sa.PrimaryKeyConstraint("keyfield", name=op.f("tgroups_pkey")), + schema="staging", + ) + op.create_index( + op.f("tgroups_transformed_at_idx"), + "tgroups", + ["transformed_at"], + unique=False, + schema="staging", + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index(op.f("tgroups_transformed_at_idx"), table_name="tgroups", schema="staging") + op.drop_table("tgroups", schema="staging") + op.drop_table("link_agency_download_file_type", schema="api") + op.drop_index(op.f("agency_agency_code_idx"), table_name="agency", schema="api") + op.drop_table("agency", schema="api") + op.drop_table("lk_agency_submission_notification_setting", schema="api") + op.drop_table("lk_agency_download_file_type", schema="api") + op.drop_table("agency_contact_info", schema="api") + # ### end Alembic commands ### diff --git a/api/src/db/models/__init__.py b/api/src/db/models/__init__.py index 2a82158ff..5ed62061e 100644 --- a/api/src/db/models/__init__.py +++ b/api/src/db/models/__init__.py @@ -1,6 +1,6 @@ import logging -from . import base, lookup_models, opportunity_models +from . import agency_models, base, lookup_models, opportunity_models from .transfer import topportunity_models logger = logging.getLogger(__name__) @@ -9,4 +9,10 @@ # This is used by tests to create the test database. metadata = base.metadata -__all__ = ["metadata", "opportunity_models", "lookup_models", "topportunity_models"] +__all__ = [ + "metadata", + "opportunity_models", + "lookup_models", + "topportunity_models", + "agency_models", +] diff --git a/api/src/db/models/agency_models.py b/api/src/db/models/agency_models.py new file mode 100644 index 000000000..14075233d --- /dev/null +++ b/api/src/db/models/agency_models.py @@ -0,0 +1,110 @@ +from sqlalchemy import BigInteger, ForeignKey +from sqlalchemy.ext.associationproxy import AssociationProxy, association_proxy +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from src.adapters.db.type_decorators.postgres_type_decorators import LookupColumn +from src.constants.lookup_constants import ( + AgencyDownloadFileType, + AgencySubmissionNotificationSetting, +) +from src.db.models.base import ApiSchemaTable, TimestampMixin +from src.db.models.lookup_models import ( + LkAgencyDownloadFileType, + LkAgencySubmissionNotificationSetting, +) + + +class AgencyContactInfo(ApiSchemaTable, TimestampMixin): + __tablename__ = "agency_contact_info" + + agency_contact_info_id: Mapped[int] = mapped_column(BigInteger, primary_key=True) + + contact_name: Mapped[str] + + address_line_1: Mapped[str] + address_line_2: Mapped[str | None] + city: Mapped[str] + + # Note that while it would make sense to do an enum for state + # it doesn't look to be limited to US states and includes some foreign states + # as well as numbers(?) in the existing system + state: Mapped[str] + zip_code: Mapped[str] + phone_number: Mapped[str] + primary_email: Mapped[str] + secondary_email: Mapped[str | None] + + +class Agency(ApiSchemaTable, TimestampMixin): + __tablename__ = "agency" + + agency_id: Mapped[int] = mapped_column(BigInteger, primary_key=True) + + agency_name: Mapped[str] + + agency_code: Mapped[str] = mapped_column(index=True, unique=True) + sub_agency_code: Mapped[str | None] + + assistance_listing_number: Mapped[str] + + agency_submission_notification_setting: Mapped[ + AgencySubmissionNotificationSetting + ] = mapped_column( + "agency_submission_notification_setting_id", + LookupColumn(LkAgencySubmissionNotificationSetting), + ForeignKey(LkAgencySubmissionNotificationSetting.agency_submission_notification_setting_id), + ) + + agency_contact_info_id: Mapped[BigInteger | None] = mapped_column( + BigInteger, ForeignKey(AgencyContactInfo.agency_contact_info_id) + ) + agency_contact_info: Mapped[AgencyContactInfo | None] = relationship(AgencyContactInfo) + + # There are several agencies in the data we're ingesting that + # are clearly meant for testing, I'm not certain we want to flag + # them in this way, but adding it for now - can revisit later + # From the legacy system configurations, this should be the following agencies + # GDIT,IVV,IVPDF,0001,FGLT,NGMS,NGMS-Sub1,SECSCAN + # including any subagencies + is_test_agency: Mapped[bool] + + # These values come from the legacy system, but their exact usage isn't entirely + # clear at this point in time. + ldap_group: Mapped[str] + description: Mapped[str] + label: Mapped[str] + + is_multilevel_agency: Mapped[bool] = mapped_column(default=False) + is_multiproject: Mapped[bool] = mapped_column(default=False) + has_system_to_system_certificate: Mapped[bool] = mapped_column(default=False) + can_view_packages_in_grace_period: Mapped[bool] = mapped_column(default=False) + is_image_workspace_enabled: Mapped[bool] = mapped_column(default=False) + is_validation_workspace_enabled: Mapped[bool] = mapped_column(default=False) + + link_agency_download_file_types: Mapped[list["LinkAgencyDownloadFileType"]] = relationship( + back_populates="agency", uselist=True, cascade="all, delete-orphan" + ) + + agency_download_file_types: AssociationProxy[set[AgencyDownloadFileType]] = association_proxy( + "link_agency_download_file_types", + "agency_download_file_type", + creator=lambda obj: LinkAgencyDownloadFileType(agency_download_file_type=obj), + ) + + +class LinkAgencyDownloadFileType(ApiSchemaTable, TimestampMixin): + __tablename__ = "link_agency_download_file_type" + + agency_id: Mapped[int] = mapped_column( + BigInteger, + ForeignKey(Agency.agency_id), + primary_key=True, + ) + agency: Mapped[Agency] = relationship(Agency) + + agency_download_file_type: Mapped[AgencyDownloadFileType] = mapped_column( + "agency_download_file_type_id", + LookupColumn(LkAgencyDownloadFileType), + ForeignKey(LkAgencyDownloadFileType.agency_download_file_type_id), + primary_key=True, + ) diff --git a/api/src/db/models/lookup_models.py b/api/src/db/models/lookup_models.py index 91b71e7cd..d954d6422 100644 --- a/api/src/db/models/lookup_models.py +++ b/api/src/db/models/lookup_models.py @@ -1,6 +1,8 @@ from sqlalchemy.orm import Mapped, mapped_column from src.constants.lookup_constants import ( + AgencyDownloadFileType, + AgencySubmissionNotificationSetting, ApplicantType, FundingCategory, FundingInstrument, @@ -93,6 +95,18 @@ ] ) +AGENCY_DOWNLOAD_FILE_TYPE_CONFIG = LookupConfig( + [LookupStr(AgencyDownloadFileType.XML, 1), LookupStr(AgencyDownloadFileType.PDF, 2)] +) + +AGENCY_SUBMISSION_NOTIFICATION_SETTING_CONFIG = LookupConfig( + [ + LookupStr(AgencySubmissionNotificationSetting.NEVER, 1), + LookupStr(AgencySubmissionNotificationSetting.FIRST_APPLICATION_ONLY, 2), + LookupStr(AgencySubmissionNotificationSetting.ALWAYS, 3), + ] +) + @LookupRegistry.register_lookup(OPPORTUNITY_CATEGORY_CONFIG) class LkOpportunityCategory(LookupTable, TimestampMixin): @@ -162,3 +176,32 @@ def from_lookup(cls, lookup: Lookup) -> "LkOpportunityStatus": return LkOpportunityStatus( opportunity_status_id=lookup.lookup_val, description=lookup.get_description() ) + + +@LookupRegistry.register_lookup(AGENCY_DOWNLOAD_FILE_TYPE_CONFIG) +class LkAgencyDownloadFileType(LookupTable, TimestampMixin): + __tablename__ = "lk_agency_download_file_type" + + agency_download_file_type_id: Mapped[int] = mapped_column(primary_key=True) + description: Mapped[str] + + @classmethod + def from_lookup(cls, lookup: Lookup) -> "LkAgencyDownloadFileType": + return LkAgencyDownloadFileType( + agency_download_file_type_id=lookup.lookup_val, description=lookup.get_description() + ) + + +@LookupRegistry.register_lookup(AGENCY_SUBMISSION_NOTIFICATION_SETTING_CONFIG) +class LkAgencySubmissionNotificationSetting(LookupTable, TimestampMixin): + __tablename__ = "lk_agency_submission_notification_setting" + + agency_submission_notification_setting_id: Mapped[int] = mapped_column(primary_key=True) + description: Mapped[str] + + @classmethod + def from_lookup(cls, lookup: Lookup) -> "LkAgencySubmissionNotificationSetting": + return LkAgencySubmissionNotificationSetting( + agency_submission_notification_setting_id=lookup.lookup_val, + description=lookup.get_description(), + ) diff --git a/api/src/db/models/staging/__init__.py b/api/src/db/models/staging/__init__.py index d89da9dc9..6a2de9c45 100644 --- a/api/src/db/models/staging/__init__.py +++ b/api/src/db/models/staging/__init__.py @@ -1,5 +1,5 @@ -from . import forecast, opportunity, staging_base, synopsis +from . import forecast, opportunity, staging_base, synopsis, tgroups metadata = staging_base.metadata -__all__ = ["metadata", "opportunity", "forecast", "synopsis"] +__all__ = ["metadata", "opportunity", "forecast", "synopsis", "tgroups"] diff --git a/api/src/db/models/staging/staging_base.py b/api/src/db/models/staging/staging_base.py index 0705a866e..12bca9685 100644 --- a/api/src/db/models/staging/staging_base.py +++ b/api/src/db/models/staging/staging_base.py @@ -74,3 +74,7 @@ class StagingParamMixin: ) transformation_notes: Mapped[str | None] + + @property + def is_modified(self) -> bool: + return self.transformed_at is None diff --git a/api/src/db/models/staging/tgroups.py b/api/src/db/models/staging/tgroups.py new file mode 100644 index 000000000..97e70f3f0 --- /dev/null +++ b/api/src/db/models/staging/tgroups.py @@ -0,0 +1,17 @@ +from src.db.legacy_mixin import tgroups_mixin +from src.db.models.staging.staging_base import StagingBase, StagingParamMixin + + +class Tgroups(StagingBase, tgroups_mixin.TGroupsMixin, StagingParamMixin): + __tablename__ = "tgroups" + + def get_agency_code(self) -> str: + # The keyfield is formatted as: + # Agency-- + # so to get the agency code, we need to parse out the middle bit + # so we split and drop the first + last field and rejoin it. + tokens = self.keyfield.split("-") + return "-".join(tokens[1:-1]) + + def get_field_name(self) -> str: + return self.keyfield.split("-")[-1] diff --git a/api/src/search/backend/load_opportunities_to_index.py b/api/src/search/backend/load_opportunities_to_index.py index 630ecf616..dcf778037 100644 --- a/api/src/search/backend/load_opportunities_to_index.py +++ b/api/src/search/backend/load_opportunities_to_index.py @@ -38,21 +38,52 @@ def __init__( self, db_session: db.Session, search_client: search.SearchClient, + is_full_refresh: bool = True, config: LoadOpportunitiesToIndexConfig | None = None, ) -> None: super().__init__(db_session) self.search_client = search_client + self.is_full_refresh = is_full_refresh if config is None: config = LoadOpportunitiesToIndexConfig() self.config = config - current_timestamp = get_now_us_eastern_datetime().strftime("%Y-%m-%d_%H-%M-%S") - self.index_name = f"{self.config.index_prefix}-{current_timestamp}" + if is_full_refresh: + current_timestamp = get_now_us_eastern_datetime().strftime("%Y-%m-%d_%H-%M-%S") + self.index_name = f"{self.config.index_prefix}-{current_timestamp}" + else: + self.index_name = self.config.alias_name self.set_metrics({"index_name": self.index_name}) def run_task(self) -> None: + if self.is_full_refresh: + logger.info("Running full refresh") + self.full_refresh() + else: + logger.info("Running incremental load") + self.incremental_updates_and_deletes() + + def incremental_updates_and_deletes(self) -> None: + existing_opportunity_ids = self.fetch_existing_opportunity_ids_in_index() + + # load the records incrementally + # TODO - The point of this incremental load is to support upcoming work + # to load only opportunities that have changes as we'll eventually be indexing + # files which will take longer. However - the structure of the data isn't yet + # known so I want to hold on actually setting up any change-detection logic + loaded_opportunity_ids = set() + for opp_batch in self.fetch_opportunities(): + loaded_opportunity_ids.update(self.load_records(opp_batch)) + + # Delete + opportunity_ids_to_delete = existing_opportunity_ids - loaded_opportunity_ids + + if len(opportunity_ids_to_delete) > 0: + self.search_client.bulk_delete(self.index_name, opportunity_ids_to_delete) + + def full_refresh(self) -> None: # create the index self.search_client.create_index( self.index_name, @@ -93,11 +124,32 @@ def fetch_opportunities(self) -> Iterator[Sequence[Opportunity]]: .partitions() ) - def load_records(self, records: Sequence[Opportunity]) -> None: + def fetch_existing_opportunity_ids_in_index(self) -> set[int]: + if not self.search_client.alias_exists(self.index_name): + raise RuntimeError( + "Alias %s does not exist, please run the full refresh job before the incremental job" + % self.index_name + ) + + opportunity_ids: set[int] = set() + + for response in self.search_client.scroll( + self.config.alias_name, + {"size": 10000, "_source": ["opportunity_id"]}, + include_scores=False, + ): + for record in response.records: + opportunity_ids.add(record["opportunity_id"]) + + return opportunity_ids + + def load_records(self, records: Sequence[Opportunity]) -> set[int]: logger.info("Loading batch of opportunities...") schema = OpportunityV1Schema() json_records = [] + loaded_opportunity_ids = set() + for record in records: logger.info( "Preparing opportunity for upload to search index", @@ -109,4 +161,8 @@ def load_records(self, records: Sequence[Opportunity]) -> None: json_records.append(schema.dump(record)) self.increment(self.Metrics.RECORDS_LOADED) + loaded_opportunity_ids.add(record.opportunity_id) + self.search_client.bulk_upsert(self.index_name, json_records, "opportunity_id") + + return loaded_opportunity_ids diff --git a/api/src/search/backend/load_search_data.py b/api/src/search/backend/load_search_data.py index cf6f0445f..5b82e5a6d 100644 --- a/api/src/search/backend/load_search_data.py +++ b/api/src/search/backend/load_search_data.py @@ -1,3 +1,5 @@ +import click + import src.adapters.db as db import src.adapters.search as search from src.adapters.db import flask_db @@ -8,8 +10,13 @@ @load_search_data_blueprint.cli.command( "load-opportunity-data", help="Load opportunity data from our database to the search index" ) +@click.option( + "--full-refresh/--incremental", + default=True, + help="Whether to run a full refresh, or only incrementally update oppportunities", +) @flask_db.with_db_session() -def load_opportunity_data(db_session: db.Session) -> None: +def load_opportunity_data(db_session: db.Session, full_refresh: bool) -> None: search_client = search.SearchClient() - LoadOpportunitiesToIndex(db_session, search_client).run() + LoadOpportunitiesToIndex(db_session, search_client, full_refresh).run() diff --git a/api/src/search/search_models.py b/api/src/search/search_models.py new file mode 100644 index 000000000..e3982323a --- /dev/null +++ b/api/src/search/search_models.py @@ -0,0 +1,21 @@ +from datetime import date + +from pydantic import BaseModel + + +class StrSearchFilter(BaseModel): + one_of: list[str] | None = None + + +class BoolSearchFilter(BaseModel): + one_of: list[bool] | None = None + + +class IntSearchFilter(BaseModel): + min: int | None = None + max: int | None = None + + +class DateSearchFilter(BaseModel): + start_date: date | None = None + end_date: date | None = None diff --git a/api/src/services/opportunities_v1/search_opportunities.py b/api/src/services/opportunities_v1/search_opportunities.py index e3252e90e..e6f1efb69 100644 --- a/api/src/services/opportunities_v1/search_opportunities.py +++ b/api/src/services/opportunities_v1/search_opportunities.py @@ -8,6 +8,12 @@ from src.api.opportunities_v1.opportunity_schemas import OpportunityV1Schema from src.pagination.pagination_models import PaginationInfo, PaginationParams, SortDirection from src.search.search_config import get_search_config +from src.search.search_models import ( + BoolSearchFilter, + DateSearchFilter, + IntSearchFilter, + StrSearchFilter, +) logger = logging.getLogger(__name__) @@ -28,6 +34,11 @@ "funding_instrument": "summary.funding_instruments.keyword", "funding_category": "summary.funding_categories.keyword", "applicant_type": "summary.applicant_types.keyword", + "is_cost_sharing": "summary.is_cost_sharing", + "expected_number_of_awards": "summary.expected_number_of_awards", + "award_floor": "summary.award_floor", + "award_ceiling": "summary.award_ceiling", + "estimated_total_program_funding": "summary.estimated_total_program_funding", } SEARCH_FIELDS = [ @@ -45,11 +56,31 @@ SCHEMA = OpportunityV1Schema() +class OpportunityFilters(BaseModel): + applicant_type: StrSearchFilter | None = None + funding_instrument: StrSearchFilter | None = None + funding_category: StrSearchFilter | None = None + funding_applicant_type: StrSearchFilter | None = None + opportunity_status: StrSearchFilter | None = None + agency: StrSearchFilter | None = None + assistance_listing_number: StrSearchFilter | None = None + + is_cost_sharing: BoolSearchFilter | None = None + + expected_number_of_awards: IntSearchFilter | None = None + award_floor: IntSearchFilter | None = None + award_ceiling: IntSearchFilter | None = None + estimated_total_program_funding: IntSearchFilter | None = None + + post_date: DateSearchFilter | None = None + close_date: DateSearchFilter | None = None + + class SearchOpportunityParams(BaseModel): pagination: PaginationParams query: str | None = Field(default=None) - filters: dict | None = Field(default=None) + filters: OpportunityFilters | None = Field(default=None) def _adjust_field_name(field: str) -> str: @@ -68,16 +99,30 @@ def _get_sort_by(pagination: PaginationParams) -> list[tuple[str, SortDirection] return sort_by -def _add_search_filters(builder: search.SearchQueryBuilder, filters: dict | None) -> None: +def _add_search_filters( + builder: search.SearchQueryBuilder, filters: OpportunityFilters | None +) -> None: if filters is None: return - for field, field_filters in filters.items(): - # one_of filters translate to an opensearch term filter - # see: https://opensearch.org/docs/latest/query-dsl/term/terms/ - one_of_filters = field_filters.get("one_of", None) - if one_of_filters: - builder.filter_terms(_adjust_field_name(field), one_of_filters) + for field in filters.model_fields_set: + field_filters = getattr(filters, field) + field_name = _adjust_field_name(field) + + # We use the type of the search filter to determine what methods + # we call on the builder. This way we can make sure we have the proper + # type mappings. + if isinstance(field_filters, StrSearchFilter) and field_filters.one_of: + builder.filter_terms(field_name, field_filters.one_of) + + elif isinstance(field_filters, BoolSearchFilter) and field_filters.one_of: + builder.filter_terms(field_name, field_filters.one_of) + + elif isinstance(field_filters, IntSearchFilter): + builder.filter_int_range(field_name, field_filters.min, field_filters.max) + + elif isinstance(field_filters, DateSearchFilter): + builder.filter_date_range(field_name, field_filters.start_date, field_filters.end_date) def _add_aggregations(builder: search.SearchQueryBuilder) -> None: diff --git a/api/tests/lib/seed_local_db.py b/api/tests/lib/seed_local_db.py index ad87525ab..4562d2fe1 100644 --- a/api/tests/lib/seed_local_db.py +++ b/api/tests/lib/seed_local_db.py @@ -9,6 +9,7 @@ import src.util.datetime_util as datetime_util import tests.src.db.models.factories as factories from src.adapters.db import PostgresDBClient +from src.db.models.agency_models import Agency from src.db.models.opportunity_models import Opportunity from src.db.models.transfer.topportunity_models import TransferTopportunity from src.util.local import error_if_not_local @@ -115,6 +116,41 @@ def _build_opportunities(db_session: db.Session, iterations: int, include_histor logger.info("Finished creating records in the transfer_topportunity table") +# Agencies we want to create locally - if we want to create significantly more +# we can consider shoving this into a CSV that we load instead. +AGENCIES_TO_CREATE = [ + { + "agency_code": "USAID", + "agency_name": "Agency for International Development", + }, + { + "agency_code": "ARPAH", + "agency_name": "Advanced Research Projects Agency for Health", + }, + { + "agency_code": "DOC", + "agency_name": "Agency for International Development", + }, + { + "agency_code": "DOC-EDA", + "agency_name": "Agency for International Development", + }, +] + + +def _build_agencies(db_session: db.Session) -> None: + # Create a static set of agencies, only if they don't already exist + agencies = db_session.query(Agency).all() + agency_codes = set([a.agency_code for a in agencies]) + + for agency_to_create in AGENCIES_TO_CREATE: + if agency_to_create["agency_code"] in agency_codes: + continue + + logger.info("Creating agency %s in agency table", agency_to_create["agency_code"]) + factories.AgencyFactory.create(**agency_to_create) + + @click.command() @click.option( "--iterations", @@ -141,3 +177,5 @@ def seed_local_db(iterations: int, include_history: bool) -> None: # Need to commit to force any updates made # after factories created objects db_session.commit() + + _build_agencies(db_session) diff --git a/api/tests/src/adapters/search/test_opensearch_client.py b/api/tests/src/adapters/search/test_opensearch_client.py index 916c6effd..8de2c2cc9 100644 --- a/api/tests/src/adapters/search/test_opensearch_client.py +++ b/api/tests/src/adapters/search/test_opensearch_client.py @@ -65,6 +65,25 @@ def test_bulk_upsert(search_client, generic_index): assert search_client._client.get(generic_index, record["id"])["_source"] == record +def test_bulk_delete(search_client, generic_index): + records = [ + {"id": 1, "title": "Green Eggs & Ham", "notes": "why are the eggs green?"}, + {"id": 2, "title": "The Cat in the Hat", "notes": "silly cat wears a hat"}, + {"id": 3, "title": "One Fish, Two Fish, Red Fish, Blue Fish", "notes": "fish"}, + ] + + search_client.bulk_upsert(generic_index, records, primary_key_field="id") + + search_client.bulk_delete(generic_index, [1]) + + resp = search_client.search(generic_index, {}, include_scores=False) + assert resp.records == records[1:] + + search_client.bulk_delete(generic_index, [2, 3]) + resp = search_client.search(generic_index, {}, include_scores=False) + assert resp.records == [] + + def test_swap_alias_index(search_client, generic_index): alias_name = f"tmp-alias-{uuid.uuid4().int}" @@ -101,3 +120,76 @@ def test_swap_alias_index(search_client, generic_index): # Verify the tmp one was deleted assert search_client._client.indices.exists(tmp_index) is False + + +def test_index_or_alias_exists(search_client, generic_index): + # Create a few aliased indexes + index_a = f"test-index-a-{uuid.uuid4().int}" + index_b = f"test-index-b-{uuid.uuid4().int}" + index_c = f"test-index-c-{uuid.uuid4().int}" + + search_client.create_index(index_a) + search_client.create_index(index_b) + search_client.create_index(index_c) + + alias_index_a = f"test-alias-a-{uuid.uuid4().int}" + alias_index_b = f"test-alias-b-{uuid.uuid4().int}" + alias_index_c = f"test-alias-c-{uuid.uuid4().int}" + + search_client.swap_alias_index(index_a, alias_index_a) + search_client.swap_alias_index(index_b, alias_index_b) + search_client.swap_alias_index(index_c, alias_index_c) + + # Checking the indexes directly - we expect the index method to return true + # and the alias method to not + assert search_client.index_exists(index_a) is True + assert search_client.index_exists(index_b) is True + assert search_client.index_exists(index_c) is True + + assert search_client.alias_exists(index_a) is False + assert search_client.alias_exists(index_b) is False + assert search_client.alias_exists(index_c) is False + + # We just created these aliases, they should exist + assert search_client.index_exists(alias_index_a) is True + assert search_client.index_exists(alias_index_b) is True + assert search_client.index_exists(alias_index_c) is True + + assert search_client.alias_exists(alias_index_a) is True + assert search_client.alias_exists(alias_index_b) is True + assert search_client.alias_exists(alias_index_c) is True + + # Other random things won't be found for either case + assert search_client.index_exists("test-index-a") is False + assert search_client.index_exists("asdasdasd") is False + assert search_client.index_exists(alias_index_a + "-other") is False + + assert search_client.alias_exists("test-index-a") is False + assert search_client.alias_exists("asdasdasd") is False + assert search_client.alias_exists(alias_index_a + "-other") is False + + +def test_scroll(search_client, generic_index): + records = [ + {"id": 1, "title": "Green Eggs & Ham", "notes": "why are the eggs green?"}, + {"id": 2, "title": "The Cat in the Hat", "notes": "silly cat wears a hat"}, + {"id": 3, "title": "One Fish, Two Fish, Red Fish, Blue Fish", "notes": "fish"}, + {"id": 4, "title": "Fox in Socks", "notes": "why he wearing socks?"}, + {"id": 5, "title": "The Lorax", "notes": "trees"}, + {"id": 6, "title": "Oh, the Places You'll Go", "notes": "graduation gift"}, + {"id": 7, "title": "Hop on Pop", "notes": "Let him sleep"}, + {"id": 8, "title": "How the Grinch Stole Christmas", "notes": "who"}, + ] + + search_client.bulk_upsert(generic_index, records, primary_key_field="id") + + results = [] + + for response in search_client.scroll(generic_index, {"size": 3}): + assert response.total_records == 8 + results.append(response) + + assert len(results) == 3 + assert len(results[0].records) == 3 + assert len(results[1].records) == 3 + assert len(results[2].records) == 2 diff --git a/api/tests/src/api/opportunities_v1/test_opportunity_route_search.py b/api/tests/src/api/opportunities_v1/test_opportunity_route_search.py index c1661e431..ae5ee250b 100644 --- a/api/tests/src/api/opportunities_v1/test_opportunity_route_search.py +++ b/api/tests/src/api/opportunities_v1/test_opportunity_route_search.py @@ -72,6 +72,11 @@ def build_opp( funding_categories: list, post_date: date, close_date: date | None, + is_cost_sharing: bool, + expected_number_of_awards: int | None, + award_floor: int | None, + award_ceiling: int | None, + estimated_total_program_funding: int | None, ) -> Opportunity: opportunity = OpportunityFactory.build( opportunity_title=opportunity_title, @@ -98,6 +103,11 @@ def build_opp( funding_categories=funding_categories, post_date=post_date, close_date=close_date, + is_cost_sharing=is_cost_sharing, + expected_number_of_awards=expected_number_of_awards, + award_floor=award_floor, + award_ceiling=award_ceiling, + estimated_total_program_funding=estimated_total_program_funding, ) opportunity.current_opportunity_summary = CurrentOpportunitySummaryFactory.build( @@ -135,6 +145,11 @@ def build_opp( funding_categories=[FundingCategory.EDUCATION], post_date=date(2020, 3, 1), close_date=date(2027, 6, 1), + is_cost_sharing=True, + expected_number_of_awards=3, + award_floor=50_000, + award_ceiling=5_000_000, + estimated_total_program_funding=15_000_000, ) NASA_INNOVATIONS = build_opp( @@ -149,6 +164,11 @@ def build_opp( funding_categories=[FundingCategory.SCIENCE_TECHNOLOGY_AND_OTHER_RESEARCH_AND_DEVELOPMENT], post_date=date(2019, 3, 1), close_date=None, + is_cost_sharing=False, + expected_number_of_awards=1, + award_floor=5000, + award_ceiling=5000, + estimated_total_program_funding=5000, ) NASA_SUPERSONIC = build_opp( @@ -163,6 +183,11 @@ def build_opp( funding_categories=[FundingCategory.SCIENCE_TECHNOLOGY_AND_OTHER_RESEARCH_AND_DEVELOPMENT], post_date=date(2021, 3, 1), close_date=date(2030, 6, 1), + is_cost_sharing=True, + expected_number_of_awards=9, + award_floor=10_000, + award_ceiling=50_000, + estimated_total_program_funding=None, ) NASA_K12_DIVERSITY = build_opp( @@ -177,6 +202,11 @@ def build_opp( funding_categories=[FundingCategory.EDUCATION], post_date=date(2025, 3, 1), close_date=date(2018, 6, 1), + is_cost_sharing=False, + expected_number_of_awards=None, + award_floor=None, + award_ceiling=None, + estimated_total_program_funding=None, ) LOC_TEACHING = build_opp( @@ -197,6 +227,11 @@ def build_opp( funding_categories=[FundingCategory.EDUCATION], post_date=date(2031, 3, 1), close_date=date(2010, 6, 1), + is_cost_sharing=True, + expected_number_of_awards=100, + award_floor=500, + award_ceiling=1_000, + estimated_total_program_funding=10_000, ) LOC_HIGHER_EDUCATION = build_opp( @@ -214,6 +249,11 @@ def build_opp( funding_categories=[FundingCategory.OTHER], post_date=date(2026, 3, 1), close_date=None, + is_cost_sharing=False, + expected_number_of_awards=1, + award_floor=None, + award_ceiling=None, + estimated_total_program_funding=15_000_000, ) DOS_DIGITAL_LITERACY = build_opp( @@ -233,6 +273,11 @@ def build_opp( funding_categories=[FundingCategory.OTHER], post_date=date(2028, 3, 1), close_date=date(2023, 6, 1), + is_cost_sharing=True, + expected_number_of_awards=2, + award_floor=5, + award_ceiling=10, + estimated_total_program_funding=15, ) DOC_SPACE_COAST = build_opp( @@ -251,6 +296,11 @@ def build_opp( funding_categories=[FundingCategory.OTHER, FundingCategory.REGIONAL_DEVELOPMENT], post_date=date(2017, 3, 1), close_date=date(2019, 6, 1), + is_cost_sharing=False, + expected_number_of_awards=1000, + award_floor=1, + award_ceiling=2, + estimated_total_program_funding=2000, ) DOC_MANUFACTURING = build_opp( @@ -269,6 +319,11 @@ def build_opp( ], post_date=date(2013, 3, 1), close_date=date(2035, 6, 1), + is_cost_sharing=True, + expected_number_of_awards=25, + award_floor=50_000_000, + award_ceiling=5_000_000_000, + estimated_total_program_funding=15_000_000_000, ) OPPORTUNITIES = [ @@ -709,33 +764,257 @@ def test_search_filters_200(self, client, api_auth_token, search_request, expect call_search_and_validate(client, api_auth_token, search_request, expected_results) @pytest.mark.parametrize( - "search_request", + "search_request, expected_results", [ - # Post Date - (get_search_request(post_date={"start_date": None})), - (get_search_request(post_date={"end_date": None})), - (get_search_request(post_date={"start_date": "2020-01-01"})), - (get_search_request(post_date={"end_date": "2020-02-01"})), - (get_search_request(post_date={"start_date": None, "end_date": None})), - (get_search_request(post_date={"start_date": "2020-01-01", "end_date": None})), - (get_search_request(post_date={"start_date": None, "end_date": "2020-02-01"})), - (get_search_request(post_date={"start_date": "2020-01-01", "end_date": "2020-02-01"})), - # Close Date - (get_search_request(close_date={"start_date": None})), - (get_search_request(close_date={"end_date": None})), - (get_search_request(close_date={"start_date": "2020-01-01"})), - (get_search_request(close_date={"end_date": "2020-02-01"})), - (get_search_request(close_date={"start_date": None, "end_date": None})), - (get_search_request(close_date={"start_date": "2020-01-01", "end_date": None})), - (get_search_request(close_date={"start_date": None, "end_date": "2020-02-01"})), - (get_search_request(close_date={"start_date": "2020-01-01", "end_date": "2020-02-01"})), + # Post date + ( + get_search_request( + post_date={"start_date": "1970-01-01", "end_date": "2050-01-01"} + ), + OPPORTUNITIES, + ), + ( + get_search_request( + post_date={"start_date": "1999-01-01", "end_date": "2000-01-01"} + ), + [], + ), + ( + get_search_request( + post_date={"start_date": "2015-01-01", "end_date": "2018-01-01"} + ), + [DOC_SPACE_COAST], + ), + ( + get_search_request( + post_date={"start_date": "2019-06-01", "end_date": "2024-01-01"} + ), + [NASA_SPACE_FELLOWSHIP, NASA_SUPERSONIC], + ), + (get_search_request(post_date={"end_date": "2016-01-01"}), [DOC_MANUFACTURING]), + # Close date + ( + get_search_request( + close_date={"start_date": "1970-01-01", "end_date": "2050-01-01"} + ), + [ + NASA_SPACE_FELLOWSHIP, + NASA_SUPERSONIC, + NASA_K12_DIVERSITY, + LOC_TEACHING, + DOS_DIGITAL_LITERACY, + DOC_SPACE_COAST, + DOC_MANUFACTURING, + ], + ), + ( + get_search_request(close_date={"start_date": "2019-01-01"}), + [ + NASA_SPACE_FELLOWSHIP, + NASA_SUPERSONIC, + DOS_DIGITAL_LITERACY, + DOC_SPACE_COAST, + DOC_MANUFACTURING, + ], + ), + ( + get_search_request(close_date={"end_date": "2019-01-01"}), + [NASA_K12_DIVERSITY, LOC_TEACHING], + ), + ( + get_search_request( + close_date={"start_date": "2015-01-01", "end_date": "2019-12-01"} + ), + [NASA_K12_DIVERSITY, DOC_SPACE_COAST], + ), ], ) - def test_search_validate_date_filters_200(self, client, api_auth_token, search_request): - resp = client.post( - "/v1/opportunities/search", json=search_request, headers={"X-Auth": api_auth_token} - ) - assert resp.status_code == 200 + def test_search_filters_date_200( + self, client, api_auth_token, search_request, expected_results + ): + call_search_and_validate(client, api_auth_token, search_request, expected_results) + + @pytest.mark.parametrize( + "search_request, expected_results", + [ + # Is cost sharing + (get_search_request(is_cost_sharing_one_of=[True, False]), OPPORTUNITIES), + (get_search_request(is_cost_sharing_one_of=["1", "0"]), OPPORTUNITIES), + ( + get_search_request(is_cost_sharing_one_of=["t"]), + [ + NASA_SPACE_FELLOWSHIP, + NASA_SUPERSONIC, + LOC_TEACHING, + DOS_DIGITAL_LITERACY, + DOC_MANUFACTURING, + ], + ), + ( + get_search_request(is_cost_sharing_one_of=["on"]), + [ + NASA_SPACE_FELLOWSHIP, + NASA_SUPERSONIC, + LOC_TEACHING, + DOS_DIGITAL_LITERACY, + DOC_MANUFACTURING, + ], + ), + ( + get_search_request(is_cost_sharing_one_of=["false"]), + [NASA_INNOVATIONS, NASA_K12_DIVERSITY, LOC_HIGHER_EDUCATION, DOC_SPACE_COAST], + ), + ( + get_search_request(is_cost_sharing_one_of=["no"]), + [NASA_INNOVATIONS, NASA_K12_DIVERSITY, LOC_HIGHER_EDUCATION, DOC_SPACE_COAST], + ), + ], + ) + def test_search_bool_filters_200( + self, client, api_auth_token, search_request, expected_results + ): + call_search_and_validate(client, api_auth_token, search_request, expected_results) + + @pytest.mark.parametrize( + "search_request, expected_results", + [ + # Expected Number of Awards + ( + get_search_request(expected_number_of_awards={"min": 0, "max": 1000}), + [ + NASA_SPACE_FELLOWSHIP, + NASA_INNOVATIONS, + NASA_SUPERSONIC, + LOC_TEACHING, + LOC_HIGHER_EDUCATION, + DOS_DIGITAL_LITERACY, + DOC_SPACE_COAST, + DOC_MANUFACTURING, + ], + ), + ( + get_search_request(expected_number_of_awards={"min": 5, "max": 10}), + [NASA_SUPERSONIC], + ), + ( + get_search_request(expected_number_of_awards={"min": 12}), + [LOC_TEACHING, DOC_SPACE_COAST, DOC_MANUFACTURING], + ), + ( + get_search_request(expected_number_of_awards={"min": 7}), + [NASA_SUPERSONIC, LOC_TEACHING, DOC_SPACE_COAST, DOC_MANUFACTURING], + ), + # Award Floor + ( + get_search_request(award_floor={"min": 0, "max": 10_000_000_000}), + [ + NASA_SPACE_FELLOWSHIP, + NASA_INNOVATIONS, + NASA_SUPERSONIC, + LOC_TEACHING, + DOS_DIGITAL_LITERACY, + DOC_SPACE_COAST, + DOC_MANUFACTURING, + ], + ), + ( + get_search_request(award_floor={"min": 1, "max": 5_000}), + [ + NASA_INNOVATIONS, + LOC_TEACHING, + DOS_DIGITAL_LITERACY, + DOC_SPACE_COAST, + ], + ), + ( + get_search_request(award_floor={"min": 5_000, "max": 10_000}), + [ + NASA_INNOVATIONS, + NASA_SUPERSONIC, + ], + ), + # Award Ceiling + ( + get_search_request(award_ceiling={"min": 0, "max": 10_000_000_000}), + [ + NASA_SPACE_FELLOWSHIP, + NASA_INNOVATIONS, + NASA_SUPERSONIC, + LOC_TEACHING, + DOS_DIGITAL_LITERACY, + DOC_SPACE_COAST, + DOC_MANUFACTURING, + ], + ), + ( + get_search_request(award_ceiling={"min": 5_000, "max": 50_000}), + [ + NASA_INNOVATIONS, + NASA_SUPERSONIC, + ], + ), + ( + get_search_request(award_ceiling={"min": 50_000}), + [ + NASA_SPACE_FELLOWSHIP, + NASA_SUPERSONIC, + DOC_MANUFACTURING, + ], + ), + # Estimated Total Program Funding + ( + get_search_request( + estimated_total_program_funding={"min": 0, "max": 100_000_000_000} + ), + [ + NASA_SPACE_FELLOWSHIP, + NASA_INNOVATIONS, + LOC_TEACHING, + LOC_HIGHER_EDUCATION, + DOS_DIGITAL_LITERACY, + DOC_SPACE_COAST, + DOC_MANUFACTURING, + ], + ), + ( + get_search_request(estimated_total_program_funding={"min": 0, "max": 5_000}), + [ + NASA_INNOVATIONS, + DOS_DIGITAL_LITERACY, + DOC_SPACE_COAST, + ], + ), + # Mix + ( + get_search_request( + expected_number_of_awards={"min": 0}, + award_floor={"max": 10_000}, + award_ceiling={"max": 10_000_000}, + estimated_total_program_funding={"min": 10_000}, + ), + [LOC_TEACHING], + ), + ( + get_search_request( + expected_number_of_awards={"max": 10}, + award_floor={"min": 1_000, "max": 10_000}, + award_ceiling={"max": 10_000_000}, + ), + [NASA_INNOVATIONS, NASA_SUPERSONIC], + ), + ( + get_search_request( + expected_number_of_awards={"min": 1, "max": 2}, + award_floor={"min": 0, "max": 1000}, + award_ceiling={"min": 10000, "max": 10000000}, + estimated_total_program_funding={"min": 123456, "max": 345678}, + ), + [], + ), + ], + ) + def test_search_int_filters_200(self, client, api_auth_token, search_request, expected_results): + call_search_and_validate(client, api_auth_token, search_request, expected_results) @pytest.mark.parametrize( "search_request", @@ -760,7 +1039,7 @@ def test_search_validate_date_filters_200(self, client, api_auth_token, search_r (get_search_request(close_date={"end_date": 5})), ], ) - def test_search_validate_date_filters_422(self, client, api_auth_token, search_request): + def test_search_validate_date_filters_format_422(self, client, api_auth_token, search_request): resp = client.post( "/v1/opportunities/search", json=search_request, headers={"X-Auth": api_auth_token} ) @@ -771,6 +1050,34 @@ def test_search_validate_date_filters_422(self, client, api_auth_token, search_r assert json["message"] == "Validation error" assert error["message"] == "Not a valid date." + @pytest.mark.parametrize( + "search_request", + [ + # Post Date + (get_search_request(post_date={"start_date": None, "end_date": None})), + (get_search_request(post_date={"start_date": None})), + (get_search_request(post_date={"end_date": None})), + (get_search_request(post_date={})), + # Close Date + (get_search_request(close_date={"start_date": None, "end_date": None})), + (get_search_request(close_date={"start_date": None})), + (get_search_request(close_date={"end_date": None})), + (get_search_request(close_date={})), + ], + ) + def test_search_validate_date_filters_nullability_422( + self, client, api_auth_token, search_request + ): + resp = client.post( + "/v1/opportunities/search", json=search_request, headers={"X-Auth": api_auth_token} + ) + assert resp.status_code == 422 + + json = resp.get_json() + error = json["errors"][0] + assert json["message"] == "Validation error" + assert error["message"] == "At least one of start_date or end_date must be provided." + @pytest.mark.parametrize( "search_request", [ @@ -812,23 +1119,6 @@ def test_search_validate_assistance_listing_filters_422( assert json["message"] == "Validation error" assert error["message"] == "String does not match expected pattern." - @pytest.mark.parametrize( - "search_request", - [ - get_search_request(is_cost_sharing_one_of=[True, False]), - get_search_request(is_cost_sharing_one_of=["1", "0"]), - get_search_request(is_cost_sharing_one_of=["t", "f"]), - get_search_request(is_cost_sharing_one_of=["true", "false"]), - get_search_request(is_cost_sharing_one_of=["on", "off"]), - get_search_request(is_cost_sharing_one_of=["yes", "no"]), - ], - ) - def test_search_validate_is_cost_sharing_200(self, client, api_auth_token, search_request): - resp = client.post( - "/v1/opportunities/search", json=search_request, headers={"X-Auth": api_auth_token} - ) - assert resp.status_code == 200 - @pytest.mark.parametrize( "search_request", [ @@ -852,33 +1142,6 @@ def test_search_validate_is_cost_sharing_filters_422( assert json["message"] == "Validation error" assert error["message"] == "Not a valid boolean." - @pytest.mark.parametrize( - "search_request", - [ - get_search_request( - expected_number_of_awards={"min": 0}, - award_floor={"max": 35}, - award_ceiling={"max": "10000000"}, - estimated_total_program_funding={"min": "123456"}, - ), - get_search_request( - expected_number_of_awards={"min": 1, "max": 2}, - award_floor={"min": 0, "max": 1000}, - award_ceiling={"min": 10000, "max": 10000000}, - estimated_total_program_funding={"min": 123456, "max": 345678}, - ), - get_search_request(expected_number_of_awards={"min": 1, "max": 2}), - get_search_request(award_floor={"min": 0, "max": 1000}), - get_search_request(award_ceiling={"min": "10000", "max": 10000000}), - get_search_request(estimated_total_program_funding={"min": 123456, "max": "345678"}), - ], - ) - def test_search_validate_award_values_200(self, client, api_auth_token, search_request): - resp = client.post( - "/v1/opportunities/search", json=search_request, headers={"X-Auth": api_auth_token} - ) - assert resp.status_code == 200 - @pytest.mark.parametrize( "search_request", [ @@ -925,6 +1188,44 @@ def test_search_validate_award_values_negative_422( for error in json["errors"]: assert error["message"] == "Must be greater than or equal to 0." + @pytest.mark.parametrize( + "search_request", + [ + # Both set to None + get_search_request( + expected_number_of_awards={"min": None, "max": None}, + award_floor={"min": None, "max": None}, + award_ceiling={"min": None, "max": None}, + estimated_total_program_funding={"min": None, "max": None}, + ), + # Min only set + get_search_request( + expected_number_of_awards={"min": None}, + award_floor={"min": None}, + award_ceiling={"min": None}, + estimated_total_program_funding={"min": None}, + ), + # Max only set + get_search_request( + expected_number_of_awards={"max": None}, + award_floor={"max": None}, + award_ceiling={"max": None}, + estimated_total_program_funding={"max": None}, + ), + ], + ) + def test_search_validate_award_values_nullability_422( + self, client, api_auth_token, search_request + ): + resp = client.post( + "/v1/opportunities/search", json=search_request, headers={"X-Auth": api_auth_token} + ) + + json = resp.get_json() + assert json["message"] == "Validation error" + for error in json["errors"]: + assert error["message"] == "At least one of min or max must be provided." + @pytest.mark.parametrize( "search_request, expected_results", [ diff --git a/api/tests/src/data_migration/transformation/conftest.py b/api/tests/src/data_migration/transformation/conftest.py index 443c113b6..d78af140d 100644 --- a/api/tests/src/data_migration/transformation/conftest.py +++ b/api/tests/src/data_migration/transformation/conftest.py @@ -7,6 +7,7 @@ from src.constants.lookup_constants import ApplicantType, FundingCategory, FundingInstrument from src.data_migration.transformation.transform_oracle_data_task import TransformOracleDataTask from src.db.models import staging +from src.db.models.agency_models import Agency from src.db.models.opportunity_models import ( LinkOpportunitySummaryApplicantType, LinkOpportunitySummaryFundingCategory, @@ -299,13 +300,42 @@ def setup_funding_category( return source_funding_category +def setup_agency( + agency_code: str, + create_existing: bool, + is_already_processed: bool = False, + deleted_fields: set | None = None, + already_processed_fields: set | None = None, + source_values: dict | None = None, +): + if source_values is None: + source_values = {} + + tgroups = f.create_tgroups_agency( + agency_code, + is_already_processed=is_already_processed, + deleted_fields=deleted_fields, + already_processed_fields=already_processed_fields, + **source_values, + ) + + if create_existing: + f.AgencyFactory.create(agency_code=agency_code) + + return tgroups + + def validate_matching_fields( source, destination, fields: list[Tuple[str, str]], expect_all_to_match: bool ): mismatched_fields = [] for source_field, destination_field in fields: - source_value = getattr(source, source_field) + if isinstance(source, dict): + source_value = source.get(source_field) + else: + source_value = getattr(source, source_field) + destination_value = getattr(destination, destination_field) # Some fields that we copy in are datetime typed (although behave as dates and we convert as such) @@ -657,3 +687,63 @@ def validate_funding_category( [("creator_id", "created_by"), ("last_upd_id", "updated_by")], expect_values_to_match, ) + + +AGENCY_FIELD_MAPPING = [ + ("AgencyName", "agency_name"), + ("AgencyCode", "sub_agency_code"), + ("AgencyCFDA", "assistance_listing_number"), + ("ldapGp", "ldap_group"), + ("description", "description"), + ("label", "label"), +] + +AGENCY_CONTACT_FIELD_MAPPING = [ + ("AgencyContactName", "contact_name"), + ("AgencyContactAddress1", "address_line_1"), + ("AgencyContactCity", "city"), + ("AgencyContactState", "state"), + ("AgencyContactZipCode", "zip_code"), + ("AgencyContactTelephone", "phone_number"), + ("AgencyContactEMail", "primary_email"), +] + + +def validate_agency( + db_session, + source_tgroups: list[staging.tgroups.Tgroups], + expect_in_db: bool = True, + expect_values_to_match: bool = True, + is_test_agency: bool = False, + non_matching_fields: set | None = None, +): + agency_code = source_tgroups[0].get_agency_code() + agency = db_session.query(Agency).filter(Agency.agency_code == agency_code).one_or_none() + + if not expect_in_db: + assert agency is None + return + + assert agency is not None + + # need to restructure the tgroups into a dict + tgroup_map = {tgroup.get_field_name(): tgroup.value for tgroup in source_tgroups} + + if non_matching_fields is not None: + agency_field_mapping = [m for m in AGENCY_FIELD_MAPPING if m[0] not in non_matching_fields] + else: + agency_field_mapping = AGENCY_FIELD_MAPPING + + validate_matching_fields(tgroup_map, agency, agency_field_mapping, expect_values_to_match) + assert agency.is_test_agency == is_test_agency + + if non_matching_fields is not None: + agency_contact_field_mapping = [ + m for m in AGENCY_CONTACT_FIELD_MAPPING if m[0] not in non_matching_fields + ] + else: + agency_contact_field_mapping = AGENCY_CONTACT_FIELD_MAPPING + + validate_matching_fields( + tgroup_map, agency.agency_contact_info, agency_contact_field_mapping, expect_values_to_match + ) diff --git a/api/tests/src/data_migration/transformation/subtask/test_transform_agency.py b/api/tests/src/data_migration/transformation/subtask/test_transform_agency.py new file mode 100644 index 000000000..906ee8e64 --- /dev/null +++ b/api/tests/src/data_migration/transformation/subtask/test_transform_agency.py @@ -0,0 +1,277 @@ +from datetime import datetime + +import pytest + +import src.data_migration.transformation.transform_constants as transform_constants +from src.constants.lookup_constants import ( + AgencyDownloadFileType, + AgencySubmissionNotificationSetting, +) +from src.data_migration.transformation.subtask.transform_agency import ( + TgroupAgency, + TransformAgency, + apply_updates, + transform_agency_download_file_types, + transform_agency_notify, +) +from tests.src.data_migration.transformation.conftest import ( + BaseTransformTestClass, + setup_agency, + validate_agency, +) +from tests.src.db.models.factories import AgencyFactory + + +class TestTransformAgency(BaseTransformTestClass): + @pytest.fixture() + def transform_agency(self, transform_oracle_data_task): + return TransformAgency(transform_oracle_data_task) + + def test_process_agencies(self, db_session, transform_agency): + insert_agency1 = setup_agency("INSERT-AGENCY-1", create_existing=False) + insert_agency2 = setup_agency("INSERT-AGENCY-2", create_existing=False) + insert_agency3 = setup_agency("INSERT-AGENCY-3", create_existing=False) + insert_agency4 = setup_agency("INSERT-AGENCY-4", create_existing=False) + insert_test_agency = setup_agency("GDIT", create_existing=False) + + # Already processed fields are ones that were handled on a prior run and won't be updated + # during this specific run + update_agency1 = setup_agency("UPDATE-AGENCY-1", create_existing=True) + update_agency2 = setup_agency( + "UPDATE-AGENCY-2", create_existing=True, deleted_fields={"AgencyContactEMail2"} + ) + update_agency3 = setup_agency( + "UPDATE-AGENCY-3", + create_existing=True, + already_processed_fields={ + "AgencyName", + "AgencyCFDA", + "description", + "AgencyContactName", + "AgencyContactAddress1", + }, + ) + update_test_agency = setup_agency("SECSCAN", create_existing=True) + + already_processed1 = setup_agency( + "ALREADY-PROCESSED-1", create_existing=True, is_already_processed=True + ) + already_processed2 = setup_agency( + "ALREADY-PROCESSED-2", create_existing=True, is_already_processed=True + ) + already_processed3 = setup_agency( + "ALREADY-PROCESSED-3", create_existing=True, is_already_processed=True + ) + + insert_error = setup_agency( + "INSERT-ERROR", create_existing=False, source_values={"AgencyName": None} + ) + update_error1 = setup_agency( + "UPDATE-ERROR-1", create_existing=True, source_values={"AgencyDownload": "xyz"} + ) + update_error2 = setup_agency( + "UPDATE-ERROR-2", create_existing=True, source_values={"UnknownField": "xyz"} + ) + + transform_agency.run_subtask() + + validate_agency(db_session, insert_agency1) + validate_agency(db_session, insert_agency2) + validate_agency(db_session, insert_agency3) + validate_agency(db_session, insert_agency4) + validate_agency(db_session, insert_test_agency, is_test_agency=True) + + validate_agency(db_session, update_agency1) + validate_agency(db_session, update_agency2) + validate_agency( + db_session, + update_agency3, + non_matching_fields={ + "AgencyName", + "AgencyCFDA", + "description", + "AgencyContactName", + "AgencyContactAddress1", + }, + ) + validate_agency(db_session, update_test_agency, is_test_agency=True) + + validate_agency(db_session, already_processed1, expect_values_to_match=False) + validate_agency(db_session, already_processed2, expect_values_to_match=False) + validate_agency(db_session, already_processed3, expect_values_to_match=False) + + validate_agency(db_session, insert_error, expect_in_db=False) + validate_agency(db_session, update_error1, expect_values_to_match=False) + validate_agency(db_session, update_error2, expect_values_to_match=False) + + metrics = transform_agency.metrics + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 12 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_INSERTED] == 5 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_UPDATED] == 4 + assert metrics[transform_constants.Metrics.TOTAL_ERROR_COUNT] == 3 + + # Rerunning does mostly nothing, it will attempt to re-process the three that errored + # but otherwise won't find anything else + db_session.commit() # commit to end any existing transactions as run_subtask starts a new one + transform_agency.run_subtask() + + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 15 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_INSERTED] == 5 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_UPDATED] == 4 + assert metrics[transform_constants.Metrics.TOTAL_ERROR_COUNT] == 6 + + def test_process_tgroups_missing_fields_for_insert(self, db_session, transform_agency): + # Fields set to None don't get a tgroup record created + insert_that_will_fail = setup_agency( + "ERROR-CASE-MISSING-FIELDS", + create_existing=False, + source_values={"AgencyName": None, "AgencyContactCity": None}, + ) + + with pytest.raises( + ValueError, + match="Cannot create agency ERROR-CASE-MISSING-FIELDS as required fields are missing", + ): + transform_agency.process_tgroups( + TgroupAgency("ERROR-CASE-MISSING-FIELDS", insert_that_will_fail, has_update=True), + None, + ) + + validate_agency(db_session, insert_that_will_fail, expect_in_db=False) + + def test_process_tgroups_unknown_field(self, db_session, transform_agency): + insert_that_will_fail = setup_agency( + "ERROR-CASE-UNKNOWN-FIELD", create_existing=False, source_values={"MysteryField": "X"} + ) + + with pytest.raises(ValueError, match="Unknown tgroups agency field"): + transform_agency.process_tgroups( + TgroupAgency("ERROR-CASE-UNKNOWN-FIELD", insert_that_will_fail, has_update=True), + None, + ) + + validate_agency(db_session, insert_that_will_fail, expect_in_db=False) + + def test_process_tgroups_disallowed_deleted_fields(self, db_session, transform_agency): + update_that_will_fail = setup_agency( + "ERROR-CASE-DELETED-FIELD", create_existing=True, deleted_fields={"AgencyContactCity"} + ) + + with pytest.raises( + ValueError, + match="Field AgencyContactCity in tgroups cannot be deleted as it is not nullable", + ): + transform_agency.process_tgroups( + TgroupAgency("ERROR-CASE-DELETED-FIELD", update_that_will_fail, has_update=True), + None, + ) + + validate_agency(db_session, update_that_will_fail, expect_values_to_match=False) + + def test_process_tgroups_invalid_file_type(self, db_session, transform_agency): + insert_that_will_fail = setup_agency( + "ERROR-CASE-BAD-DOWNLOAD", create_existing=False, source_values={"AgencyDownload": "X"} + ) + + with pytest.raises(ValueError, match="Unrecognized agency download file type value"): + transform_agency.process_tgroups( + TgroupAgency("ERROR-CASE-BAD-DOWNLOAD", insert_that_will_fail, has_update=True), + None, + ) + + validate_agency(db_session, insert_that_will_fail, expect_in_db=False) + + def test_process_tgroups_invalid_agency_notify(self, db_session, transform_agency): + insert_that_will_fail = setup_agency( + "ERROR-CASE-BAD-NOTIFY", create_existing=False, source_values={"AgencyNotify": "4"} + ) + + with pytest.raises(ValueError, match="Unrecognized agency notify setting value"): + transform_agency.process_tgroups( + TgroupAgency("ERROR-CASE-BAD-NOTIFY", insert_that_will_fail, has_update=True), None + ) + + validate_agency(db_session, insert_that_will_fail, expect_in_db=False) + + +@pytest.mark.parametrize( + "value,expected_value", + [ + ("0", set()), + ("1", {AgencyDownloadFileType.XML}), + ("2", {AgencyDownloadFileType.XML, AgencyDownloadFileType.PDF}), + ("3", {AgencyDownloadFileType.PDF}), + ], +) +def test_transform_agency_download_file_types(value, expected_value): + assert transform_agency_download_file_types(value) == expected_value + + +@pytest.mark.parametrize("value", ["A", "B", "NULL", "", None]) +def test_transform_agency_download_file_types_unexpected_values(value): + with pytest.raises(ValueError, match="Unrecognized agency download file type value"): + transform_agency_download_file_types(value) + + +@pytest.mark.parametrize( + "value,expected_value", + [ + ("1", AgencySubmissionNotificationSetting.NEVER), + ("2", AgencySubmissionNotificationSetting.FIRST_APPLICATION_ONLY), + ("3", AgencySubmissionNotificationSetting.ALWAYS), + ], +) +def test_transform_agency_notify(value, expected_value): + assert transform_agency_notify(value) == expected_value + + +@pytest.mark.parametrize("value", ["A", "B", "NULL", "", None]) +def test_transform_agency_notify_unexpected_value(value): + with pytest.raises(ValueError, match="Unrecognized agency notify setting value"): + transform_agency_notify(value) + + +@pytest.mark.parametrize( + "agency_created_at,agency_updated_at,created_at,updated_at,expect_created_at_to_change,expect_updated_at_to_change", + [ + (None, None, None, None, False, False), + (None, None, datetime.now(), datetime.now(), True, True), + ( + datetime(2020, 1, 1), + datetime(2021, 1, 1), + datetime(2019, 12, 31), + datetime(2021, 1, 2), + False, + True, + ), + ( + datetime(2020, 1, 1), + datetime(2021, 1, 1), + datetime(2020, 12, 31), + datetime(2020, 1, 1), + False, + False, + ), + ], +) +def test_apply_updates_timestamps( + agency_created_at, + agency_updated_at, + created_at, + updated_at, + expect_created_at_to_change, + expect_updated_at_to_change, +): + agency = AgencyFactory.build(created_at=agency_created_at, updated_at=agency_updated_at) + + apply_updates(agency, {}, created_at, updated_at) + + if expect_created_at_to_change: + assert agency.created_at == created_at + else: + assert agency.created_at == agency_created_at + + if expect_updated_at_to_change: + assert agency.updated_at == updated_at + else: + assert agency.updated_at == agency_updated_at diff --git a/api/tests/src/db/models/factories.py b/api/tests/src/db/models/factories.py index 9aa87663f..9e215a78f 100644 --- a/api/tests/src/db/models/factories.py +++ b/api/tests/src/db/models/factories.py @@ -24,6 +24,8 @@ import src.db.models.transfer.topportunity_models as transfer_topportunity_models import src.util.datetime_util as datetime_util from src.constants.lookup_constants import ( + AgencyDownloadFileType, + AgencySubmissionNotificationSetting, ApplicantType, FundingCategory, FundingInstrument, @@ -31,6 +33,7 @@ OpportunityCategoryLegacy, OpportunityStatus, ) +from src.db.models import agency_models def sometimes_none(factory_value, none_chance: float = 0.5): @@ -137,7 +140,9 @@ class CustomProvider(BaseProvider): "{{word}}-###-##", ] - YN_BOOLEAN_VALUES = ["Y", "N", "Yes", "No"] + YN_BOOLEAN_VALUES = ["Y", "N"] + + YN_YESNO_BOOLEAN_VALUES = ["Y", "N", "Yes", "No"] def agency(self) -> str: return self.random_element(self.AGENCIES) @@ -177,6 +182,9 @@ def summary_description(self) -> str: def yn_boolean(self) -> str: return self.random_element(self.YN_BOOLEAN_VALUES) + def yn_yesno_boolean(self) -> str: + return self.random_element(self.YN_YESNO_BOOLEAN_VALUES) + fake = faker.Faker() fake.add_provider(CustomProvider) @@ -638,6 +646,61 @@ class Meta: applicant_type = factory.Iterator(ApplicantType) +class AgencyContactInfoFactory(BaseFactory): + class Meta: + model = agency_models.AgencyContactInfo + + contact_name = factory.Faker("name") + address_line_1 = factory.Faker("street_address") + address_line_2 = sometimes_none(factory.Sequence(lambda n: f"Room {n}")) + city = factory.Faker("city") + state = factory.Faker("state_abbr") + zip_code = factory.Faker("street_address") + phone_number = factory.Faker("basic_phone_number") + primary_email = factory.Faker("email") + secondary_email = sometimes_none(factory.Faker("email")) + + +class AgencyFactory(BaseFactory): + class Meta: + model = agency_models.Agency + + agency_name = factory.Faker("agency_name") + + agency_code = factory.Faker("agency") + sub_agency_code = factory.LazyAttribute(lambda a: a.agency_code.split("-")[0]) + + assistance_listing_number = factory.Faker("random_int", min=1, max=999) + + agency_submission_notification_setting = factory.fuzzy.FuzzyChoice( + AgencySubmissionNotificationSetting + ) + + agency_contact_info = factory.SubFactory(AgencyContactInfoFactory) + agency_contact_info_id = factory.LazyAttribute( + lambda a: a.agency_contact_info.agency_contact_info_id if a.agency_contact_info else None + ) + + is_test_agency = False + + ldap_group = factory.LazyAttribute(lambda a: a.agency_code) + description = factory.LazyAttribute(lambda a: a.agency_name) + label = factory.LazyAttribute(lambda a: a.agency_name) + is_multilevel_agency = factory.Faker("boolean") + is_multiproject = factory.Faker("boolean") + has_system_to_system_certificate = factory.Faker("boolean") + can_view_packages_in_grace_period = factory.Faker("boolean") + is_image_workspace_enabled = factory.Faker("boolean") + is_validation_workspace_enabled = factory.Faker("boolean") + + agency_download_file_types = factory.Faker( + "random_elements", + length=random.randint(1, 2), + elements=[a for a in AgencyDownloadFileType], + unique=True, + ) + + #################################### # Staging Table Factories #################################### @@ -797,7 +860,7 @@ class Meta: syn_desc = factory.Faker("summary_description") oth_cat_fa_desc = sometimes_none(factory.Faker("paragraph", nb_sentences=1)) - cost_sharing = sometimes_none(factory.Faker("yn_boolean"), none_chance=0.1) + cost_sharing = sometimes_none(factory.Faker("yn_yesno_boolean"), none_chance=0.1) # These int values are stored as strings number_of_awards = sometimes_none( factory.LazyFunction(lambda: str(fake.random_int(1, 25))), none_chance=0.1 @@ -828,7 +891,7 @@ class Meta: factory.Faker("date_time_between", start_date="-5y", end_date="now") ) create_ts = factory.Faker("date_time_between", start_date="-10y", end_date="-5y") - sendmail = sometimes_none(factory.Faker("yn_boolean")) + sendmail = sometimes_none(factory.Faker("yn_yesno_boolean")) response_date_desc = sometimes_none(factory.Faker("paragraph", nb_sentences=2)) applicant_elig_desc = sometimes_none(factory.Faker("paragraph", nb_sentences=5)) version_nbr = factory.Faker("random_int", min=0, max=10) @@ -871,7 +934,7 @@ class Meta: forecast_desc = factory.Faker("summary_description") oth_cat_fa_desc = sometimes_none(factory.Faker("paragraph", nb_sentences=1)) - cost_sharing = sometimes_none(factory.Faker("yn_boolean"), none_chance=0.1) + cost_sharing = sometimes_none(factory.Faker("yn_yesno_boolean"), none_chance=0.1) # These int values are stored as strings number_of_awards = sometimes_none( factory.LazyFunction(lambda: str(fake.random_int(1, 25))), none_chance=0.1 @@ -901,7 +964,7 @@ class Meta: factory.Faker("date_time_between", start_date="-5y", end_date="now") ) create_ts = factory.Faker("date_time_between", start_date="-10y", end_date="-5y") - sendmail = sometimes_none(factory.Faker("yn_boolean")) + sendmail = sometimes_none(factory.Faker("yn_yesno_boolean")) applicant_elig_desc = sometimes_none(factory.Faker("paragraph", nb_sentences=5)) version_nbr = factory.Faker("random_int", min=0, max=10) modification_comments = sometimes_none(factory.Faker("paragraph", nb_sentences=1)) @@ -1230,6 +1293,29 @@ class Params: ) +class StagingTgroupsFactory(BaseFactory): + class Meta: + model = staging.tgroups.Tgroups + + keyfield = "" + value = "" + + is_deleted = False + + created_date = factory.Faker("date_time_between", start_date="-10y", end_date="-5y") + last_upd_date = sometimes_none( + factory.Faker("date_time_between", start_date="-5y", end_date="now") + ) + + last_upd_id = factory.Faker("first_name") + creator_id = factory.Faker("first_name") + + class Params: + already_transformed = factory.Trait( + transformed_at=factory.Faker("date_time_between", start_date="-7d", end_date="-1d") + ) + + #################################### # Transfer Table Factories #################################### @@ -1532,3 +1618,92 @@ def build(self) -> opportunity_models.Opportunity: revision_number -= 1 return self.opportunity + + +class StagingTgroupsAgencyFactory(factory.DictFactory): + """ + This does not need to be called directly, and instead you should use + create_tgroups_agency (defined below) in order to call this. + + We use this to help organize factories / the ability to override and set + values for the tgroups agency data which is spread across many rows. + + Note: Any value that is "None" will not be included in the created + tgroups records (empty strings, or strings of values like "null" will be) + """ + + AgencyName = factory.Faker("agency_name") + AgencyCode = "" # see: create_tgroups_agency for how this gets set + AgencyCFDA = factory.Faker("random_int", min=1, max=99) + AgencyDownload = factory.Faker("random_int", min=1, max=3) + AgencyNotify = factory.Faker("random_int", min=1, max=3) + AgencyEnroll = "" # see: create_tgroups_agency for how this gets set + + AgencyContactName = factory.Faker("name") + AgencyContactAddress1 = factory.Faker("street_address") + AgencyContactAddress2 = factory.Maybe( + decider=factory.LazyAttribute(lambda s: random.random() > 0.5), + yes_declaration=factory.Sequence(lambda n: f"Room {n}"), + no_declaration="NULL", + ) + AgencyContactCity = factory.Faker("city") + AgencyContactState = factory.Faker("state_abbr") + AgencyContactZipCode = factory.Faker("postcode") + AgencyContactTelephone = Generators.PhoneNumber + AgencyContactEMail = factory.Faker("email") + AgencyContactEMail2 = sometimes_none(factory.Faker("email")) + + ldapGp = "" # see: create_tgroups_agency for how this gets set + description = factory.LazyAttribute(lambda g: g.AgencyName) + label = factory.LazyAttribute(lambda g: g.AgencyName) + multilevel = sometimes_none("TRUE", none_chance=0.8) + + HasS2SCert = sometimes_none(factory.Faker("yn_boolean"), none_chance=0.8) + ViewPkgsInGracePeriod = sometimes_none(factory.Faker("yn_boolean"), none_chance=0.8) + multiproject = sometimes_none(factory.Faker("yn_boolean"), none_chance=0.8) + ImageWS = sometimes_none(factory.Faker("yn_boolean"), none_chance=0.8) + ValidationWS = sometimes_none(factory.Faker("yn_boolean"), none_chance=0.8) + + +def create_tgroups_agency( + agency_code: str, + is_deleted: bool = False, + is_already_processed: bool = False, + deleted_fields: set | None = None, + already_processed_fields: set | None = None, + **kwargs, +) -> list[staging.tgroups.Tgroups]: + # The agency_code value is actually just the first bit (the top-level agency) + kwargs["AgencyCode"] = agency_code.split("-")[0] + kwargs["AgencyEnroll"] = agency_code + kwargs["ldapGp"] = agency_code + + field_values = StagingTgroupsAgencyFactory.build(**kwargs) + + groups = [] + + field_prefix = f"Agency-{agency_code}-" + + if already_processed_fields is None: + already_processed_fields = set() + + if deleted_fields is None: + deleted_fields = set() + + for field_name, value in field_values.items(): + if value is None: + continue + + is_field_already_processed = is_already_processed or field_name in already_processed_fields + is_field_deleted = is_deleted or field_name in deleted_fields + + tgroup = StagingTgroupsFactory.create( + keyfield=field_prefix + field_name, + value=str(value), + is_deleted=is_field_deleted, + already_transformed=is_field_already_processed, + ) + + groups.append(tgroup) + + return groups diff --git a/api/tests/src/search/backend/test_load_opportunities_to_index.py b/api/tests/src/search/backend/test_load_opportunities_to_index.py index e939f569f..9a3961f2b 100644 --- a/api/tests/src/search/backend/test_load_opportunities_to_index.py +++ b/api/tests/src/search/backend/test_load_opportunities_to_index.py @@ -4,17 +4,18 @@ LoadOpportunitiesToIndex, LoadOpportunitiesToIndexConfig, ) +from src.util.datetime_util import get_now_us_eastern_datetime from tests.conftest import BaseTestClass from tests.src.db.models.factories import OpportunityFactory -class TestLoadOpportunitiesToIndex(BaseTestClass): +class TestLoadOpportunitiesToIndexFullRefresh(BaseTestClass): @pytest.fixture(scope="class") def load_opportunities_to_index(self, db_session, search_client, opportunity_index_alias): config = LoadOpportunitiesToIndexConfig( alias_name=opportunity_index_alias, index_prefix="test-load-opps" ) - return LoadOpportunitiesToIndex(db_session, search_client, config) + return LoadOpportunitiesToIndex(db_session, search_client, True, config) def test_load_opportunities_to_index( self, @@ -83,3 +84,70 @@ def test_load_opportunities_to_index( assert set([opp.opportunity_id for opp in opportunities]) == set( [record["opportunity_id"] for record in resp.records] ) + + +class TestLoadOpportunitiesToIndexPartialRefresh(BaseTestClass): + @pytest.fixture(scope="class") + def load_opportunities_to_index(self, db_session, search_client, opportunity_index_alias): + config = LoadOpportunitiesToIndexConfig( + alias_name=opportunity_index_alias, index_prefix="test-load-opps" + ) + return LoadOpportunitiesToIndex(db_session, search_client, False, config) + + def test_load_opportunities_to_index( + self, + truncate_opportunities, + enable_factory_create, + db_session, + search_client, + opportunity_index_alias, + load_opportunities_to_index, + ): + index_name = "partial-refresh-index-" + get_now_us_eastern_datetime().strftime( + "%Y-%m-%d_%H-%M-%S" + ) + search_client.create_index(index_name) + search_client.swap_alias_index( + index_name, load_opportunities_to_index.config.alias_name, delete_prior_indexes=True + ) + + # Load a bunch of records into the DB + opportunities = [] + opportunities.extend(OpportunityFactory.create_batch(size=6, is_posted_summary=True)) + opportunities.extend(OpportunityFactory.create_batch(size=3, is_forecasted_summary=True)) + opportunities.extend(OpportunityFactory.create_batch(size=2, is_closed_summary=True)) + opportunities.extend( + OpportunityFactory.create_batch(size=8, is_archived_non_forecast_summary=True) + ) + opportunities.extend( + OpportunityFactory.create_batch(size=6, is_archived_forecast_summary=True) + ) + + load_opportunities_to_index.run() + + resp = search_client.search(opportunity_index_alias, {"size": 100}) + assert resp.total_records == len(opportunities) + + # Add a few more opportunities that will be created + opportunities.extend(OpportunityFactory.create_batch(size=3, is_posted_summary=True)) + + # Delete some opportunities + opportunities_to_delete = [opportunities.pop(), opportunities.pop(), opportunities.pop()] + for opportunity in opportunities_to_delete: + db_session.delete(opportunity) + + load_opportunities_to_index.run() + + resp = search_client.search(opportunity_index_alias, {"size": 100}) + assert resp.total_records == len(opportunities) + + def test_load_opportunities_to_index_index_does_not_exist(self, db_session, search_client): + config = LoadOpportunitiesToIndexConfig( + alias_name="fake-index-that-will-not-exist", index_prefix="test-load-opps" + ) + load_opportunities_to_index = LoadOpportunitiesToIndex( + db_session, search_client, False, config + ) + + with pytest.raises(RuntimeError, match="please run the full refresh job"): + load_opportunities_to_index.run() diff --git a/documentation/api/database/erds/api-schema.png b/documentation/api/database/erds/api-schema.png index f97bd993c..3ea9cb144 100644 Binary files a/documentation/api/database/erds/api-schema.png and b/documentation/api/database/erds/api-schema.png differ diff --git a/documentation/api/database/erds/full-schema.png b/documentation/api/database/erds/full-schema.png index 9923cdfef..66d3af5bc 100644 Binary files a/documentation/api/database/erds/full-schema.png and b/documentation/api/database/erds/full-schema.png differ diff --git a/documentation/api/development.md b/documentation/api/development.md index c49d99d8c..733143907 100644 --- a/documentation/api/development.md +++ b/documentation/api/development.md @@ -11,18 +11,29 @@ A very simple [docker-compose.yml](../../docker-compose.yml) has been included t 1. Install the version of Python specified in [pyproject.toml](../../api/pyproject.toml) [pyenv](https://github.com/pyenv/pyenv#installation) is one popular option for installing Python, or [asdf](https://asdf-vm.com/). - -2. After installing and activating the right version of Python, install + - If using pyenv run `pyenv local ` to ensure that version will be used in subsequent steps +2. Ensure that `python -V` and `python3 -V` are picking up that version. + - If not, run `pyenv init -` and/or restart your shell to ensure it was run automatically +3. After installing and activating the right version of Python, install [poetry](https://python-poetry.org/docs/#installation) and follow the instructions to add poetry to your path if necessary. ```bash curl -sSL https://install.python-poetry.org | python3 - ``` -3. If you are using an M1 mac, you will need to install postgres as well: `brew install postgresql` (The psycopg2-binary is built from source on M1 macs which requires the postgres executable to be present) - 4. You'll also need [Docker Desktop](https://www.docker.com/products/docker-desktop/) +## Database setup: Run Migrations/Seeds + +1. If you haven't done local development before you'll need to execute the migrations and seed the DB with data using the steps in [database-local-usage.md](database/database-local-usage.md) + +## OpenSearch setup + +1. Run `make init-opensearch` setup the OpenSearch Container +2. Run `make populate-search-opportunities` to push data previously seeded in the DB into the search index + +If your DB or OpenSearch end up in an odd place, you can reset all the persistent storage using `make volume-recreate` + ## Run the application 1. Make sure you have [Docker Desktop](https://www.docker.com/products/docker-desktop/) installed & running. @@ -36,11 +47,14 @@ A very simple [docker-compose.yml](../../docker-compose.yml) has been included t `make test` will run all of the tests. Additional arguments can be passed to this command which will be passed to pytest like so: `make test args="tests/api/route -v"` which would run all tests in the route folder with verbosity increased. See the [Pytest Docs](https://docs.pytest.org/en/7.1.x/reference/reference.html#command-line-flags) for more details on CLI flags you can set. -`make clean-volumes` will spin down the docker containers + delete the volumes. This can be useful to reset your DB, or fix any bad states your local environment may have gotten into. +`make clean-volumes` will spin down the docker containers + delete the volumes. + +`make volume-recreate` Deletes the volumes and then re-initializes the persistant portions of the stack. This can be useful to reset your DB, or fix any bad states your local environment may have gotten into. See the [Makefile](../../api/Makefile) for a full list of commands you can run. The `make console` command initializes a Python REPL environment pre-configured with database connectivity. This allows developers to perform database queries, utilize factories for data generation, and interact with the application's models directly. + - Writing a query: `dbs.query(Opportunity).all()` - Saving some factory generated data to the db: `f.OpportunityFactory.create()` @@ -51,8 +65,8 @@ Running in Docker is the default, but on some machines like the M1 Mac, running You can switch which way many of these components are run by setting the `PY_RUN_APPROACH` env variable in your terminal. -* `export PY_RUN_APPROACH=local` will run these components natively -* `export PY_RUN_APPROACH=docker` will run these within Docker +- `export PY_RUN_APPROACH=local` will run these components natively +- `export PY_RUN_APPROACH=docker` will run these within Docker Note that even with the native mode, many components like the DB and API will only ever run in Docker, and you should always make sure that any implementations work within docker. @@ -82,7 +96,8 @@ The API can be run in debug mode that allows for remote attach debugging (curren - See `./vscode/launch.json` which has the debug config. (Named `API Remote Attach`) - Start the server in debug mode via `make start-debug` or `make start-debug run-logs`. - - This will start the `main-app` service with port 5678 exposed. + + - This will start the `main-app` service with port 5678 exposed. - The server will start in waiting mode, waiting for you to attach the debugger (see `/src/app.py`) before continuing to run. @@ -90,8 +105,6 @@ The API can be run in debug mode that allows for remote attach debugging (curren - You should now be able to hit set breakpoints throughout the API - - ## Next steps Now that you're up and running, read the [application docs](../../api/README.md) to familiarize yourself with the application. diff --git a/frontend/README.md b/frontend/README.md index 742dcbd59..6697826c5 100644 --- a/frontend/README.md +++ b/frontend/README.md @@ -166,7 +166,7 @@ It's recommended that developers configure their code editor to auto run these t VSCode instructions 1. Install the [Prettier](https://marketplace.visualstudio.com/items?itemName=esbenp.prettier-vscode) and [ESLint](https://marketplace.visualstudio.com/items?itemName=dbaeumer.vscode-eslint) extensions. -2. Add the following to a `.vscode/settings.json` Worspace Settings file: +2. Add the following to a `.vscode/settings.json` Workspace Settings file: ```json { diff --git a/frontend/src/app/layout.tsx b/frontend/src/app/layout.tsx index e914be1af..993d66550 100644 --- a/frontend/src/app/layout.tsx +++ b/frontend/src/app/layout.tsx @@ -6,5 +6,9 @@ type Props = { }; export default function RootLayout({ children }: Props) { - return children; + return ( + + {children} + + ); }