Skip to content

Commit

Permalink
More refactoring based on meeting notes
Browse files Browse the repository at this point in the history
  • Loading branch information
mbertrand committed Jul 15, 2024
1 parent 7b1ba70 commit c270eb5
Showing 1 changed file with 59 additions and 9 deletions.
68 changes: 59 additions & 9 deletions learning_resources/etl/mitpe.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Professional Education ETL"""

import copy
import html
import json
import logging
from datetime import UTC, datetime
Expand All @@ -10,6 +11,7 @@

import requests
from django.conf import settings
from django.utils.html import strip_tags

from learning_resources.constants import (
CertificationType,
Expand All @@ -36,7 +38,8 @@
105: "In Person",
}

MITPE_STATUS_DICT = {14: "Open", 15: "Closed", 17: "Waitlist"}
# 14: open, 15: closed, 17: waitlisted
STATUS_DICT = {14: True, 15: False, 17: True}


def _fetch_data(url, params=None) -> list[dict]:
Expand Down Expand Up @@ -102,14 +105,41 @@ def parse_topics(resource_data: dict) -> list[dict]:
Returns:
list of dict: list containing topic dicts with a name attribute
"""
topic_names = []
subtopic_names = []
topic_relationships = resource_data["relationships"]["field_course_topics"]
topics_url = topic_relationships["links"].get("related", {}).get("href")
if topic_relationships["data"] and topics_url:
topic_details = _fetch_data(topics_url)
return transform_topics(
[{"name": topic["attributes"]["name"]} for topic in topic_details]
)
return []
topic_names = [
":".join(
[
topic_name.strip()
for topic_name in topic["attributes"]["name"].split(":")
]
)
for topic in topic_details
]
subtopic_relationships = resource_data["relationships"]["field_subtopic"]
subtopics_url = subtopic_relationships["links"].get("related", {}).get("href")
if subtopic_relationships["data"] and subtopics_url:
subtopic_details = _fetch_data(subtopics_url)
subtopic_names = [
":".join(
[
subtopic_name.strip()
for subtopic_name in strip_tags(
html.unescape(
subtopic["attributes"]["description"]["processed"]
)
).split(":")
]
)
for subtopic in subtopic_details
]
return transform_topics(
[{"name": topic_name} for topic_name in (topic_names + subtopic_names)]
)


def parse_instructors(resource_data: dict) -> list[dict]:
Expand Down Expand Up @@ -298,6 +328,28 @@ def _transform_runs(resource_data: dict) -> list[dict]:
return runs


def parse_published(resource_data: dict, runs: list[dict]) -> bool:
"""
Return the published status of the resource
Args:
resource_data: course or program data
runs: list of course or program runs
Returns:
bool: published status of the resource
"""
return (
STATUS_DICT[
resource_data["relationships"]["field_course_status"]["data"]["meta"][
"drupal_internal__target_id"
]
]
and not resource_data["attributes"]["field_do_not_show_in_catalog"]
and len([run for run in runs if run["published"] is True]) > 0
)


def transform_course(resource_data: dict) -> dict or None:
"""
Transform raw resource data into a format suitable for
Expand Down Expand Up @@ -332,8 +384,7 @@ def transform_course(resource_data: dict) -> dict or None:
"learning_format": parse_format(
resource_data["relationships"]["field_location_tag"]
),
"published": not resource_data["attributes"]["field_do_not_show_in_catalog"]
and len([run for run in runs if run["published"] is True]) > 0,
"published": parse_published(resource_data, runs),
"topics": parse_topics(resource_data),
"runs": runs,
"unique_field": UNIQUE_FIELD,
Expand Down Expand Up @@ -371,8 +422,7 @@ def transform_program(resource_data: dict) -> dict or None:
"learning_format": parse_format(
resource_data["relationships"]["field_location_tag"]
),
"published": not resource_data["attributes"]["field_do_not_show_in_catalog"]
and len([run for run in runs if run["published"] is True]) > 0,
"published": parse_published(resource_data, runs),
"topics": parse_topics(resource_data),
"course_ids": [
course["id"]
Expand Down

0 comments on commit c270eb5

Please sign in to comment.