Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reparse Memory Management #3172

Merged
merged 48 commits into from
Sep 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
cc536b5
- Updated method to batch delete records in elastic to avoid pulling …
elipe17 Sep 4, 2024
a27086c
- Instantiate document once
elipe17 Sep 4, 2024
6f2105f
- Updated clean and reparse to not load entire queryset into memory
elipe17 Sep 5, 2024
223bc40
Merge branch 'develop' into 3170-reparse-memory-mgmt
elipe17 Sep 5, 2024
a8093e5
- Update names of functions
elipe17 Sep 10, 2024
f34b014
- Update sequential function to return boolean to allow testing
elipe17 Sep 10, 2024
ccd1816
Merge branch 'develop' into 3170-reparse-memory-mgmt
elipe17 Sep 11, 2024
d96f455
- new indices
elipe17 Sep 11, 2024
b1f71dd
Merge branch '3170-reparse-memory-mgmt' of https://github.com/raft-te…
elipe17 Sep 11, 2024
5621f7f
- linting
elipe17 Sep 11, 2024
9a03ff3
- Added tests for most exception paths
elipe17 Sep 11, 2024
778c827
-linting
elipe17 Sep 11, 2024
4d2ee69
- Adding refresh
elipe17 Sep 12, 2024
d8a3b44
- testing not duplicating container
elipe17 Sep 12, 2024
fa77bac
- revert test change
elipe17 Sep 12, 2024
01dc2c3
- remove param
elipe17 Sep 12, 2024
876260c
- Add missing tests
elipe17 Sep 12, 2024
76624fc
- adding test
elipe17 Sep 12, 2024
644703f
- Add remaining tests
elipe17 Sep 12, 2024
09bf83d
- Add more debug logging
elipe17 Sep 13, 2024
9fe3d1c
- running only failing test
elipe17 Sep 13, 2024
9421869
- more debugging
elipe17 Sep 13, 2024
d456c8b
- backend only
elipe17 Sep 13, 2024
f408833
- remove test_reparse only
elipe17 Sep 13, 2024
a3aa2f4
- Reset settings after parsing
elipe17 Sep 13, 2024
9e28a66
- update file
elipe17 Sep 13, 2024
d7b543a
- Update factories to use the correct types for fields
elipe17 Sep 13, 2024
e1bfce3
- paremetrize test
elipe17 Sep 13, 2024
3f89673
- revert debugging changes
elipe17 Sep 13, 2024
dc55e3e
Merge branch 'develop' into 3170-reparse-memory-mgmt
elipe17 Sep 13, 2024
c3002e0
- Add tests for more coverage
elipe17 Sep 13, 2024
4075194
Merge branch '3170-reparse-memory-mgmt' of https://github.com/raft-te…
elipe17 Sep 13, 2024
96372d8
- linting
elipe17 Sep 13, 2024
830b6d2
- UPdate logic for setting reparse to finished
elipe17 Sep 13, 2024
51ec731
- linting
elipe17 Sep 13, 2024
b9cb34e
- add basic tests for db backup
elipe17 Sep 13, 2024
a85b232
- remove mock
elipe17 Sep 13, 2024
76fa734
- add main routine test for backup
elipe17 Sep 13, 2024
0dfb37e
- linting
elipe17 Sep 13, 2024
16c1e11
- Added comment for posterity
elipe17 Sep 19, 2024
0fd06c2
- Update reparse to not fail/exit when elastic throws index update. i…
elipe17 Sep 23, 2024
8187285
Merge branch 'develop' into 3170-reparse-memory-mgmt
ADPennington Sep 24, 2024
e752589
Merge branch 'develop' into 3170-reparse-memory-mgmt
ADPennington Sep 26, 2024
23924fb
- update terraform for testing
elipe17 Sep 26, 2024
93bd544
Merge branch '3170-reparse-memory-mgmt' of https://github.com/raft-te…
elipe17 Sep 26, 2024
0543f29
- Update terraform based on cloud.gov response
elipe17 Sep 30, 2024
f0a6c55
Merge branch 'develop' into 3170-reparse-memory-mgmt
elipe17 Sep 30, 2024
a981311
Merge branch 'develop' into 3170-reparse-memory-mgmt
ADPennington Sep 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions tdrs-backend/tdpservice/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,12 @@ def test_private_key():
yield get_private_key(key)


@pytest.fixture()
def system_user():
"""Create system user."""
return UserFactory.create(username='system')


# Register factories with pytest-factoryboy for automatic dependency injection
# of model-related fixtures into tests.
register(OwaspZapScanFactory)
Expand Down
7 changes: 3 additions & 4 deletions tdrs-backend/tdpservice/parsers/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,11 +211,10 @@ def rollback_records(unsaved_records, datafile):
f"Encountered error while indexing datafile documents: \n{e}",
"error"
)
logger.warn("Encountered an Elastic exception, enforcing DB cleanup.")
logger.warning("Encountered an Elastic exception, enforcing DB cleanup.")
num_deleted, models = qset.delete()
logger.info("Succesfully performed DB cleanup after elastic failure.")
log_parser_exception(datafile,
"Succesfully performed DB cleanup after elastic failure.",
"Succesfully performed DB cleanup after elastic failure in rollback_records.",
"info"
)
except DatabaseError as e:
Expand Down Expand Up @@ -310,7 +309,7 @@ def delete_serialized_records(duplicate_manager, dfs):
total_deleted += num_deleted
dfs.total_number_of_records_created -= num_deleted
log_parser_exception(dfs.datafile,
"Succesfully performed DB cleanup after elastic failure.",
"Succesfully performed DB cleanup after elastic failure in delete_serialized_records.",
"info"
)
except DatabaseError as e:
Expand Down
82 changes: 41 additions & 41 deletions tdrs-backend/tdpservice/parsers/test/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,43 +184,43 @@ class Meta:
EMPLOYMENT_STATUS = 1
WORK_ELIGIBLE_INDICATOR = "01"
WORK_PART_STATUS = "01"
UNSUB_EMPLOYMENT = 1
SUB_PRIVATE_EMPLOYMENT = 1
SUB_PUBLIC_EMPLOYMENT = 1
WORK_EXPERIENCE_HOP = 1
WORK_EXPERIENCE_EA = 1
WORK_EXPERIENCE_HOL = 1
OJT = 1
JOB_SEARCH_HOP = 1
JOB_SEARCH_EA = 1
JOB_SEARCH_HOL = 1
COMM_SERVICES_HOP = 1
COMM_SERVICES_EA = 1
COMM_SERVICES_HOL = 1
VOCATIONAL_ED_TRAINING_HOP = 1
VOCATIONAL_ED_TRAINING_EA = 1
VOCATIONAL_ED_TRAINING_HOL = 1
JOB_SKILLS_TRAINING_HOP = 1
JOB_SKILLS_TRAINING_EA = 1
JOB_SKILLS_TRAINING_HOL = 1
ED_NO_HIGH_SCHOOL_DIPL_HOP = 1
ED_NO_HIGH_SCHOOL_DIPL_EA = 1
ED_NO_HIGH_SCHOOL_DIPL_HOL = 1
SCHOOL_ATTENDENCE_HOP = 1
SCHOOL_ATTENDENCE_EA = 1
SCHOOL_ATTENDENCE_HOL = 1
PROVIDE_CC_HOP = 1
PROVIDE_CC_EA = 1
PROVIDE_CC_HOL = 1
OTHER_WORK_ACTIVITIES = 1
DEEMED_HOURS_FOR_OVERALL = 1
DEEMED_HOURS_FOR_TWO_PARENT = 1
EARNED_INCOME = 1
UNEARNED_INCOME_TAX_CREDIT = 1
UNEARNED_SOCIAL_SECURITY = 1
UNEARNED_SSI = 1
UNEARNED_WORKERS_COMP = 1
OTHER_UNEARNED_INCOME = 1
UNSUB_EMPLOYMENT = "01"
SUB_PRIVATE_EMPLOYMENT = "01"
SUB_PUBLIC_EMPLOYMENT = "01"
WORK_EXPERIENCE_HOP = "01"
WORK_EXPERIENCE_EA = "01"
WORK_EXPERIENCE_HOL = "01"
OJT = "01"
JOB_SEARCH_HOP = "01"
JOB_SEARCH_EA = "01"
JOB_SEARCH_HOL = "01"
COMM_SERVICES_HOP = "01"
COMM_SERVICES_EA = "01"
COMM_SERVICES_HOL = "01"
VOCATIONAL_ED_TRAINING_HOP = "01"
VOCATIONAL_ED_TRAINING_EA = "01"
VOCATIONAL_ED_TRAINING_HOL = "01"
JOB_SKILLS_TRAINING_HOP = "01"
JOB_SKILLS_TRAINING_EA = "01"
JOB_SKILLS_TRAINING_HOL = "01"
ED_NO_HIGH_SCHOOL_DIPL_HOP = "01"
ED_NO_HIGH_SCHOOL_DIPL_EA = "01"
ED_NO_HIGH_SCHOOL_DIPL_HOL = "01"
SCHOOL_ATTENDENCE_HOP = "01"
SCHOOL_ATTENDENCE_EA = "01"
SCHOOL_ATTENDENCE_HOL = "01"
PROVIDE_CC_HOP = "01"
PROVIDE_CC_EA = "01"
PROVIDE_CC_HOL = "01"
OTHER_WORK_ACTIVITIES = "01"
DEEMED_HOURS_FOR_OVERALL = "01"
DEEMED_HOURS_FOR_TWO_PARENT = "01"
EARNED_INCOME = "01"
UNEARNED_INCOME_TAX_CREDIT = "01"
UNEARNED_SOCIAL_SECURITY = "01"
UNEARNED_SSI = "01"
UNEARNED_WORKERS_COMP = "01"
OTHER_UNEARNED_INCOME = "01"


class TanfT3Factory(factory.django.DjangoModelFactory):
Expand Down Expand Up @@ -451,10 +451,10 @@ class Meta:
CURRENT_MONTH_STATE_EXEMPT = 1
EMPLOYMENT_STATUS = 1
WORK_PART_STATUS = "01"
UNSUB_EMPLOYMENT = 1
SUB_PRIVATE_EMPLOYMENT = 1
SUB_PUBLIC_EMPLOYMENT = 1
OJT = 1
UNSUB_EMPLOYMENT = "01"
SUB_PRIVATE_EMPLOYMENT = "01"
SUB_PUBLIC_EMPLOYMENT = "01"
OJT = "01"
JOB_SEARCH = '1'
COMM_SERVICES = '1'
VOCATIONAL_ED_TRAINING = '1'
Expand Down
9 changes: 9 additions & 0 deletions tdrs-backend/tdpservice/parsers/test/test_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@


import pytest
import os
from django.contrib.admin.models import LogEntry
from django.conf import settings
from django.db.models import Q as Query
Expand Down Expand Up @@ -1739,6 +1740,9 @@ def test_parse_duplicate(file, batch_size, model, record_type, num_errors, dfs,
settings.BULK_CREATE_BATCH_SIZE = batch_size

parse.parse_datafile(datafile, dfs)

settings.BULK_CREATE_BATCH_SIZE = os.getenv("BULK_CREATE_BATCH_SIZE", 10000)

parser_errors = ParserError.objects.filter(file=datafile,
error_type=ParserErrorCategoryChoices.CASE_CONSISTENCY).order_by('id')
for e in parser_errors:
Expand Down Expand Up @@ -1782,6 +1786,9 @@ def test_parse_partial_duplicate(file, batch_size, model, record_type, num_error
settings.BULK_CREATE_BATCH_SIZE = batch_size

parse.parse_datafile(datafile, dfs)

settings.BULK_CREATE_BATCH_SIZE = os.getenv("BULK_CREATE_BATCH_SIZE", 10000)

parser_errors = ParserError.objects.filter(file=datafile,
error_type=ParserErrorCategoryChoices.CASE_CONSISTENCY).order_by('id')
for e in parser_errors:
Expand All @@ -1806,6 +1813,8 @@ def test_parse_cat_4_edge_case_file(cat4_edge_case_file, dfs):

parse.parse_datafile(cat4_edge_case_file, dfs)

settings.BULK_CREATE_BATCH_SIZE = os.getenv("BULK_CREATE_BATCH_SIZE", 10000)

parser_errors = ParserError.objects.filter(file=cat4_edge_case_file).filter(
error_type=ParserErrorCategoryChoices.CASE_CONSISTENCY)

Expand Down
2 changes: 1 addition & 1 deletion tdrs-backend/tdpservice/parsers/validators/category3.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ def validate(record, row_schema):
"Caught exception in validator: validate__WORK_ELIGIBLE_INDICATOR__HOH__AGE. " +
f"With field values: {vals}."
)
logger.error(f'Exception: {e}')
logger.debug(f'Exception: {e}')
elipe17 marked this conversation as resolved.
Show resolved Hide resolved
# Per conversation with Alex on 03/26/2024, returning the true case during exception handling to avoid
# confusing the STTs.
return true_case
Expand Down
25 changes: 14 additions & 11 deletions tdrs-backend/tdpservice/scheduling/management/db_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@


OS_ENV = os.environ
content_type = ContentType.objects.get_for_model(LogEntry)

def get_content_type():
"""Get content type for log entry."""
return ContentType.objects.get_for_model(LogEntry)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was this a result of a linting issue or bad practice for using globals? Or is there some variations among the log lines which may result in a different content_type needing a more dynamic call/evaluation?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was actually causing pytest to fail because it was making a query to the DB and it wasnt marked with @pytest.mark.django_db. The reason we were getting that error is because the globals were being executed upon import of that module. So I had to wrap it in the "getter" so that it was executed in the correct context.


def get_system_values():
"""Return dict of keys and settings to use whether local or deployed."""
Expand Down Expand Up @@ -91,7 +94,7 @@ def backup_database(file_name,
logger.info(msg)
LogEntry.objects.log_action(
user_id=system_user.pk,
content_type_id=content_type.pk,
content_type_id=get_content_type().pk,
object_id=None,
object_repr="Executed Database Backup",
action_flag=ADDITION,
Expand Down Expand Up @@ -123,7 +126,7 @@ def restore_database(file_name, postgres_client, database_uri, system_user):
msg = "Completed database creation."
LogEntry.objects.log_action(
user_id=system_user.pk,
content_type_id=content_type.pk,
content_type_id=get_content_type().pk,
object_id=None,
object_repr="Executed Database create",
action_flag=ADDITION,
Expand All @@ -145,7 +148,7 @@ def restore_database(file_name, postgres_client, database_uri, system_user):
msg = "Completed database restoration."
LogEntry.objects.log_action(
user_id=system_user.pk,
content_type_id=content_type.pk,
content_type_id=get_content_type().pk,
object_id=None,
object_repr="Executed Database restore",
action_flag=ADDITION,
Expand Down Expand Up @@ -177,7 +180,7 @@ def upload_file(file_name, bucket, sys_values, system_user, object_name=None, re
msg = "Successfully uploaded {} to s3://{}/{}.".format(file_name, bucket, object_name)
LogEntry.objects.log_action(
user_id=system_user.pk,
content_type_id=content_type.pk,
content_type_id=get_content_type().pk,
object_id=None,
object_repr="Executed database backup S3 upload",
action_flag=ADDITION,
Expand Down Expand Up @@ -208,7 +211,7 @@ def download_file(bucket,
msg = "Successfully downloaded s3 file {}/{} to {}.".format(bucket, object_name, file_name)
LogEntry.objects.log_action(
user_id=system_user.pk,
content_type_id=content_type.pk,
content_type_id=get_content_type().pk,
object_id=None,
object_repr="Executed database backup S3 download",
action_flag=ADDITION,
Expand Down Expand Up @@ -293,7 +296,7 @@ def main(argv, sys_values, system_user):
if arg_to_backup:
LogEntry.objects.log_action(
user_id=system_user.pk,
content_type_id=content_type.pk,
content_type_id=get_content_type().pk,
object_id=None,
object_repr="Begining Database Backup",
action_flag=ADDITION,
Expand All @@ -316,7 +319,7 @@ def main(argv, sys_values, system_user):

LogEntry.objects.log_action(
user_id=system_user.pk,
content_type_id=content_type.pk,
content_type_id=get_content_type().pk,
object_id=None,
object_repr="Finished Database Backup",
action_flag=ADDITION,
Expand All @@ -329,7 +332,7 @@ def main(argv, sys_values, system_user):
elif arg_to_restore:
LogEntry.objects.log_action(
user_id=system_user.pk,
content_type_id=content_type.pk,
content_type_id=get_content_type().pk,
object_id=None,
object_repr="Begining Database Restore",
action_flag=ADDITION,
Expand All @@ -352,7 +355,7 @@ def main(argv, sys_values, system_user):

LogEntry.objects.log_action(
user_id=system_user.pk,
content_type_id=content_type.pk,
content_type_id=get_content_type().pk,
object_id=None,
object_repr="Finished Database Restore",
action_flag=ADDITION,
Expand All @@ -377,7 +380,7 @@ def run_backup(arg):
logger.error(f"Caught Exception in run_backup. Exception: {e}.")
LogEntry.objects.log_action(
user_id=system_user.pk,
content_type_id=content_type.pk,
content_type_id=get_content_type().pk,
object_id=None,
object_repr="Exception in run_backup",
action_flag=ADDITION,
Expand Down
69 changes: 69 additions & 0 deletions tdrs-backend/tdpservice/scheduling/test/test_db_backup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""Test cases for db_backup.py functions."""

import os
import pytest
from tdpservice.scheduling.management import db_backup
from django.contrib.admin.models import LogEntry

@pytest.mark.django_db
def test_backup_database(system_user):
"""Test backup functionality."""
file_name = "/tmp/test_backup.pg"
ret = db_backup.backup_database(file_name, "",
"postgres://tdpuser:something_secure@postgres:5432/tdrs_test",
system_user)

assert ret is True
assert os.path.getsize(file_name) > 0
os.remove(file_name)
assert os.path.exists(file_name) is False

@pytest.mark.django_db
def test_backup_database_fail_on_backup(system_user):
"""Test backup fails on psql non-zero return code."""
with pytest.raises(Exception) as e:
file_name = "/tmp/test_backup.pg"
db_backup.backup_database(file_name, "asdfasdfassfd",
"postgres://tdpuser:something_secure@postgres:5432/tdrs_test",
system_user)

assert str(e.value) == "pg_dump command failed with a non zero exit code."
assert os.path.exists(file_name) is False

@pytest.mark.django_db
def test_backup_database_fail_on_general_exception():
"""Test backup succeeds but raises exception on string user for log entry."""
with pytest.raises(Exception) as e:
file_name = "/tmp/test_backup.pg"
db_backup.backup_database(file_name, "",
"postgres://tdpuser:something_secure@postgres:5432/tdrs_test",
"system_user")

assert str(e.value) == "'str' object has no attribute 'pk'"
assert os.path.exists(file_name) is True
os.remove(file_name)
assert os.path.exists(file_name) is False


@pytest.mark.django_db
def test_get_database_credentials():
"""Test get credentials."""
creds = db_backup.get_database_credentials("postgres://tdpuser:something_secure@postgres:5432/tdrs_test")
assert creds == ["tdpuser", "something_secure", "postgres", "5432", "tdrs_test"]

@pytest.mark.django_db
def test_main_backup(mocker, system_user):
"""Test call the main function."""
mocker.patch(
'tdpservice.scheduling.management.db_backup.upload_file',
return_value=True
)
sys_vals = {"DATABASE_URI": "postgres://tdpuser:something_secure@postgres:5432",
"DATABASE_DB_NAME": "tdrs_test",
"POSTGRES_CLIENT_DIR": "",
"S3_BUCKET": "",
"S3_REGION": ""}

db_backup.main(['-b', '-f', '/tmp/test_backup.pg'], sys_values=sys_vals, system_user=system_user)
assert LogEntry.objects.get(change_message="Begining database backup.").pk is not None
assert LogEntry.objects.get(change_message="Finished database backup.").pk is not None
Loading
Loading