Skip to content

Commit

Permalink
Merge pull request #955 from openedx/mkeating/ENT-8248-revert-2
Browse files Browse the repository at this point in the history
revert: feat: take exec ed course data from course run instead of additional_metadata attempt 2
  • Loading branch information
marlonkeating committed Sep 27, 2024
2 parents 0e910c0 + caa1ff1 commit 4a20261
Show file tree
Hide file tree
Showing 6 changed files with 190 additions and 232 deletions.
37 changes: 0 additions & 37 deletions enterprise_catalog/apps/api/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
from enterprise_catalog.apps.catalog.models import CatalogQuery, ContentMetadata
from enterprise_catalog.apps.catalog.serializers import (
DEFAULT_NORMALIZED_PRICE,
NormalizedContentMetadataSerializer,
_find_best_mode_seat,
)
from enterprise_catalog.apps.catalog.tests.factories import (
Expand Down Expand Up @@ -55,37 +54,6 @@ def mock_task(self, *args, **kwargs): # pylint: disable=unused-argument
mock_task.name = 'mock_task'


def _hydrate_normalized_metadata(metadata_record):
"""
Populate normalized_metadata fields for ContentMetadata
"""
normalized_metadata_input = {
'course_metadata': metadata_record.json_metadata,
}
metadata_record.json_metadata['normalized_metadata'] =\
NormalizedContentMetadataSerializer(normalized_metadata_input).data
metadata_record.json_metadata['normalized_metadata_by_run'] = {}
for run in metadata_record.json_metadata.get('course_runs', []):
metadata_record.json_metadata['normalized_metadata_by_run'].update({
run['key']: NormalizedContentMetadataSerializer({
'course_run_metadata': run,
'course_metadata': metadata_record.json_metadata,
}).data
})


def _hydrate_course_normalized_metadata():
"""
Populate normalized_metadata fields for all course ContentMetadata
Needed for tests that generate test ContentMetadata, which does not have
normalized_metadata populated by default.
"""
all_course_metadata = ContentMetadata.objects.filter(content_type=COURSE)
for course_metadata in all_course_metadata:
_hydrate_normalized_metadata(course_metadata)
course_metadata.save()


@ddt.ddt
class TestTaskResultFunctions(TestCase):
"""
Expand Down Expand Up @@ -858,8 +826,6 @@ def setUp(self):
self.course_run_metadata_unpublished.catalog_queries.set([course_run_catalog_query])
self.course_run_metadata_unpublished.save()

_hydrate_course_normalized_metadata()

def _set_up_factory_data_for_algolia(self):
expected_catalog_uuids = sorted([
str(self.enterprise_catalog_courses.uuid),
Expand Down Expand Up @@ -1059,7 +1025,6 @@ def test_index_algolia_program_common_uuids_only(self, mock_search_client):
test_course_1.save()
test_course_2.save()
test_course_3.save()
_hydrate_course_normalized_metadata()

actual_algolia_products_sent = []

Expand Down Expand Up @@ -1158,7 +1123,6 @@ def test_index_algolia_program_unindexable_content(self, mock_search_client):
test_course_1.save()
test_course_2.save()
test_course_3.save()
_hydrate_course_normalized_metadata()

actual_algolia_products_sent = []

Expand Down Expand Up @@ -2173,7 +2137,6 @@ def test_index_algolia_duplicate_content_uuids(self, mock_search_client):
)
course_run_for_duplicate = ContentMetadataFactory(content_type=COURSE_RUN, parent_content_key='duplicateX')
course_run_for_duplicate.catalog_queries.set([self.enterprise_catalog_course_runs.catalog_query])
_hydrate_course_normalized_metadata()

actual_algolia_products_sent_sequence = []

Expand Down
186 changes: 96 additions & 90 deletions enterprise_catalog/apps/api/v1/export_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,98 +169,66 @@ def program_hit_to_row(hit):
return csv_row


def _base_csv_row_data(hit):
""" Returns the formatted, shared attributes common across all course types. """
title = hit.get('title')
aggregation_key = hit.get('aggregation_key')
language = hit.get('language')
transcript_languages = ', '.join(hit.get('transcript_languages', []))
marketing_url = hit.get('marketing_url')
short_description = strip_tags(hit.get('short_description', ''))
subjects = ', '.join(hit.get('subjects', []))
skills = ', '.join([skill['name'] for skill in hit.get('skills', [])])
outcome = strip_tags(hit.get('outcome', '')) # What You’ll Learn

# FIXME: currently ignores partner names when a course has multiple partners
partner_name = hit['partners'][0]['name'] if hit.get('partners') else None
def course_hit_to_row(hit):
"""
Helper function to construct a CSV row according to a single Algolia result course hit.
"""
csv_row = []
csv_row.append(hit.get('title'))

if hit.get('partners'):
csv_row.append(hit['partners'][0]['name'])
else:
csv_row.append(None)

empty_advertised_course_run = {}
advertised_course_run = hit.get('advertised_course_run', empty_advertised_course_run)
course_run_key = advertised_course_run.get('key')
min_effort = advertised_course_run.get('min_effort')
max_effort = advertised_course_run.get('max_effort')
weeks_to_complete = advertised_course_run.get('weeks_to_complete') # Length

if start_date := advertised_course_run.get('start'):
start_date = parser.parse(start_date).strftime(DATE_FORMAT)
csv_row.append(start_date)

if end_date := advertised_course_run.get('end'):
end_date = parser.parse(end_date).strftime(DATE_FORMAT)

if enroll_by := advertised_course_run.get('enroll_by'):
enroll_by = datetime.datetime.fromtimestamp(enroll_by).strftime(DATE_FORMAT)

return {
'title': title,
'partner_name': partner_name,
'start_date': start_date,
'end_date': end_date,
'enroll_by': enroll_by,
'aggregation_key': aggregation_key,
'course_run_key': course_run_key,
'language': language,
'transcript_languages': transcript_languages,
'marketing_url': marketing_url,
'short_description': short_description,
'subjects': subjects,
'skills': skills,
'min_effort': min_effort,
'max_effort': max_effort,
'weeks_to_complete': weeks_to_complete,
'outcome': outcome,
'advertised_course_run': advertised_course_run,
}


def course_hit_to_row(hit):
"""
Helper function to construct a CSV row according to a single Algolia result course hit.
"""
row_data = _base_csv_row_data(hit)
csv_row = []
csv_row.append(row_data.get('title'))
csv_row.append(row_data.get('partner_name'))

advertised_course_run = row_data.get('advertised_course_run')

csv_row.append(row_data.get('start_date'))
csv_row.append(row_data.get('end_date'))
csv_row.append(end_date)

# upgrade_deadline deprecated in favor of enroll_by
if upgrade_deadline := advertised_course_run.get('upgrade_deadline'):
upgrade_deadline = datetime.datetime.fromtimestamp(upgrade_deadline).strftime(DATE_FORMAT)
csv_row.append(upgrade_deadline)
csv_row.append(row_data.get('enroll_by'))

if enroll_by := advertised_course_run.get('enroll_by'):
enroll_by = datetime.datetime.fromtimestamp(enroll_by).strftime(DATE_FORMAT)
csv_row.append(enroll_by)

pacing_type = advertised_course_run.get('pacing_type')
key = advertised_course_run.get('key')

csv_row.append(', '.join(hit.get('programs', [])))
csv_row.append(', '.join(hit.get('program_titles', [])))

pacing_type = advertised_course_run.get('pacing_type')
csv_row.append(pacing_type)

csv_row.append(hit.get('level_type'))

csv_row.append(hit.get('first_enrollable_paid_seat_price'))
csv_row.append(row_data.get('language'))
csv_row.append(row_data.get('transcript_languages'))
csv_row.append(row_data.get('marketing_url'))
csv_row.append(row_data.get('short_description'))
csv_row.append(row_data.get('subjects'))
csv_row.append(row_data.get('course_run_key'))
csv_row.append(row_data.get('aggregation_key'))
csv_row.append(row_data.get('skills'))
csv_row.append(row_data.get('min_effort'))
csv_row.append(row_data.get('max_effort'))
csv_row.append(row_data.get('weeks_to_complete'))
csv_row.append(row_data.get('outcome'))
csv_row.append(hit.get('language'))
csv_row.append(', '.join(hit.get('transcript_languages', [])))
csv_row.append(hit.get('marketing_url'))
csv_row.append(strip_tags(hit.get('short_description', '')))

csv_row.append(', '.join(hit.get('subjects', [])))
csv_row.append(key)
csv_row.append(hit.get('aggregation_key'))

skills = [skill['name'] for skill in hit.get('skills', [])]
csv_row.append(', '.join(skills))

advertised_course_run = hit.get('advertised_course_run', {})
csv_row.append(advertised_course_run.get('min_effort'))
csv_row.append(advertised_course_run.get('max_effort'))
csv_row.append(advertised_course_run.get('weeks_to_complete')) # Length

csv_row.append(strip_tags(hit.get('outcome', ''))) # What You’ll Learn

csv_row.append(strip_tags(hit.get('prerequisites_raw', ''))) # Pre-requisites

Expand All @@ -270,33 +238,71 @@ def course_hit_to_row(hit):
return csv_row


def fetch_and_format_registration_date(obj):
enroll_by_date = obj.get('registration_deadline')
stripped_enroll_by = enroll_by_date.split("T")[0]
formatted_enroll_by = None
try:
enroll_by_datetime_obj = datetime.datetime.strptime(stripped_enroll_by, '%Y-%m-%d')
formatted_enroll_by = enroll_by_datetime_obj.strftime('%m-%d-%Y')
except ValueError as exc:
logger.info(f"Unable to format registration deadline, failed with error: {exc}")
return formatted_enroll_by


def exec_ed_course_to_row(hit):
"""
Helper function to construct a CSV row according to a single executive education course hit.
"""
row_data = _base_csv_row_data(hit)
csv_row = []
csv_row.append(row_data.get('title'))
csv_row.append(row_data.get('partners'))
csv_row.append(hit.get('title'))

csv_row.append(row_data.get('start_date'))
csv_row.append(row_data.get('end_date'))
csv_row.append(row_data.get('enroll_by'))
if hit.get('partners'):
csv_row.append(hit['partners'][0]['name'])
else:
csv_row.append(None)
if hit.get('additional_metadata'):
start_date = None
additional_md = hit['additional_metadata']
if additional_md.get('start_date'):
start_date = parser.parse(additional_md['start_date']).strftime(DATE_FORMAT)
csv_row.append(start_date)

end_date = None
if additional_md.get('end_date'):
end_date = parser.parse(additional_md['end_date']).strftime(DATE_FORMAT)
csv_row.append(end_date)
formatted_enroll_by = fetch_and_format_registration_date(additional_md)
else:
csv_row.append(None) # no start date
csv_row.append(None) # no end date
formatted_enroll_by = None

csv_row.append(formatted_enroll_by)

adv_course_run = hit.get('advertised_course_run', {})
key = adv_course_run.get('key')

price = float(hit['entitlements'][0]['price'])
csv_row.append(math.trunc(price))
csv_row.append(row_data.get('language'))
csv_row.append(row_data.get('transcript_languages'))
csv_row.append(row_data.get('marketing_url'))
csv_row.append(row_data.get('short_description'))
csv_row.append(row_data.get('subjects'))
csv_row.append(row_data.get('course_run_key'))
csv_row.append(row_data.get('aggregation_key'))
csv_row.append(row_data.get('skills'))
csv_row.append(row_data.get('min_effort'))
csv_row.append(row_data.get('max_effort'))
csv_row.append(row_data.get('weeks_to_complete'))
csv_row.append(row_data.get('outcome'))
csv_row.append(hit.get('language'))
csv_row.append(', '.join(hit.get('transcript_languages', [])))
csv_row.append(hit.get('marketing_url'))
csv_row.append(strip_tags(hit.get('short_description', '')))

csv_row.append(', '.join(hit.get('subjects', [])))
csv_row.append(key)
csv_row.append(hit.get('aggregation_key'))

skills = [skill['name'] for skill in hit.get('skills', [])]
csv_row.append(', '.join(skills))

csv_row.append(adv_course_run.get('min_effort'))
csv_row.append(adv_course_run.get('max_effort'))
csv_row.append(adv_course_run.get('weeks_to_complete')) # Length

csv_row.append(strip_tags(hit.get('outcome', ''))) # What You’ll Learn

csv_row.append(strip_tags(hit.get('full_description', '')))

return csv_row
Expand Down
13 changes: 13 additions & 0 deletions enterprise_catalog/apps/api/v1/tests/test_export_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,16 @@ def test_retrieve_available_fields(self):
"""
# assert that ALGOLIA_ATTRIBUTES_TO_RETRIEVE is a SUBSET of ALGOLIA_FIELDS
assert set(export_utils.ALGOLIA_ATTRIBUTES_TO_RETRIEVE) <= set(algolia_utils.ALGOLIA_FIELDS)

def test_fetch_and_format_registration_date(self):
"""
Test the export properly fetches executive education registration dates
"""
# expected hit format from algolia, porperly reformatted for csv download
assert export_utils.fetch_and_format_registration_date(
{'registration_deadline': '2002-02-15T12:12:200'}
) == '02-15-2002'
# some other format from algolia, should return None
assert export_utils.fetch_and_format_registration_date(
{'registration_deadline': '02-15-2015T12:12:200'}
) is None
27 changes: 18 additions & 9 deletions enterprise_catalog/apps/catalog/algolia_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ def course_run_not_active_checker():
return not is_course_run_active(advertised_course_run)

def deadline_passed_checker():
return _has_enroll_by_deadline_passed(course_json_metadata)
return _has_enroll_by_deadline_passed(course_json_metadata, advertised_course_run)

for should_not_index_function, log_message in (
(no_advertised_course_run_checker, 'no advertised course run'),
Expand All @@ -242,16 +242,25 @@ def deadline_passed_checker():
return True


def _has_enroll_by_deadline_passed(course_json_metadata):
def _has_enroll_by_deadline_passed(course_json_metadata, advertised_course_run):
"""
Helper to determine if the enrollment deadline has passed for the given course
based on normalized_metadata's enroll_by_date
"""
enroll_by_deadline = course_json_metadata.get('normalized_metadata')['enroll_by_date']
enroll_by_deadline_timestamp = datetime.datetime.strptime(
enroll_by_deadline,
'%Y-%m-%dT%H:%M:%S%z',
).timestamp()
and advertised course run. For course metadata records with a `course_type` of "course" (e.g. OCM courses),
this is based on the verified upgrade deadline.
For 2u exec ed courses, this is based on the registration deadline.
"""
enroll_by_deadline_timestamp = 0
if course_json_metadata.get('course_type') == EXEC_ED_2U_COURSE_TYPE:
additional_metadata = course_json_metadata.get('additional_metadata') or {}
registration_deadline = additional_metadata.get('registration_deadline')
if registration_deadline:
enroll_by_deadline_timestamp = datetime.datetime.strptime(
registration_deadline,
'%Y-%m-%dT%H:%M:%S%z',
).timestamp()
else:
enroll_by_deadline_timestamp = _get_verified_upgrade_deadline(advertised_course_run)

return enroll_by_deadline_timestamp < localized_utcnow().timestamp()


Expand Down
12 changes: 3 additions & 9 deletions enterprise_catalog/apps/catalog/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,18 +136,12 @@ def get_enroll_start_date(self, obj) -> str: # pylint: disable=unused-argument
def get_enroll_by_date(self, obj) -> str: # pylint: disable=unused-argument
if not self.course_run_metadata:
return None

if self.is_exec_ed_2u_course:
return self.course_run_metadata.get('enrollment_end')

all_seats = self.course_run_metadata.get('seats', [])

seat = _find_best_mode_seat(all_seats)
upgrade_deadline = None
if seat := _find_best_mode_seat(all_seats):
if seat:
upgrade_deadline = seat.get('upgrade_deadline_override') or seat.get('upgrade_deadline')

enrollment_end = self.course_run_metadata.get('enrollment_end')
return min(filter(None, [upgrade_deadline, enrollment_end]), default=None)
return upgrade_deadline or self.course_run_metadata.get('enrollment_end')

@extend_schema_field(serializers.FloatField)
def get_content_price(self, obj) -> float: # pylint: disable=unused-argument
Expand Down
Loading

0 comments on commit 4a20261

Please sign in to comment.