Skip to content

Commit

Permalink
Update Professional Ed ETL pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
mbertrand committed Sep 20, 2024
1 parent f3a3145 commit 30bed3f
Show file tree
Hide file tree
Showing 11 changed files with 58 additions and 335 deletions.
4 changes: 0 additions & 4 deletions app.json
Original file line number Diff line number Diff line change
Expand Up @@ -455,10 +455,6 @@
"description": "The time in seconds between periodic syncs of podcasts",
"required": false
},
"PROLEARN_CATALOG_API_URL": {
"description": "Base URL for the Prolearn search API",
"required": false
},
"RECAPTCHA_SITE_KEY": {
"description": "Google Recaptcha site key",
"required": false
Expand Down
30 changes: 30 additions & 0 deletions data_fixtures/migrations/0015_unpublish_prolearn_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from django.db import migrations

from learning_resources.etl.constants import ETLSource
from learning_resources.models import LearningResource
from learning_resources.utils import resource_unpublished_actions


def unpublish_prolearn_data(apps, schema_editor):
"""Unpublish all prolearn data"""
for resource in LearningResource.objects.filter(
etl_source=ETLSource.prolearn.value
):
resource.published = False
resource.save()
resource_unpublished_actions(resource)


class Migration(migrations.Migration):
dependencies = [
(
"data_fixtures",
"0014_add_department_SP",
),
]

operations = [
migrations.RunPython(
unpublish_prolearn_data, reverse_code=migrations.RunPython.noop
),
]
1 change: 0 additions & 1 deletion env/codespaces.env
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ MITOL_SECURE_SSL_REDIRECT=False
MITOL_DB_DISABLE_SSL=True
MITOL_FEATURES_DEFAULT=True
MITOL_USE_S3=False
PROLEARN_CATALOG_API_URL=https://prolearn.mit.edu/graphql
MITOL_API_BASE_URL="https://${CODESPACE_NAME}-8063.app.github.dev"
MITOL_APP_BASE_URL="https://${CODESPACE_NAME}-8062.app.github.dev"
MITX_ONLINE_BASE_URL=https://mitxonline.mit.edu/
Expand Down
11 changes: 8 additions & 3 deletions learning_resources/etl/mitpe.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,14 @@
from django.conf import settings

from learning_resources.constants import (
Availability,
CertificationType,
LearningResourceType,
OfferedBy,
PlatformType,
)
from learning_resources.etl.constants import ETLSource
from learning_resources.etl.utils import transform_format, transform_topics
from learning_resources.etl.utils import transform_delivery, transform_topics
from main.utils import clean_data

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -232,6 +233,8 @@ def _transform_runs(resource_data: dict) -> list[dict]:
else [],
"url": parse_resource_url(resource_data),
"instructors": parse_instructors(resource_data),
"delivery": transform_delivery(resource_data["learning_format"]),
"availability": Availability.dated.name,
}
for run_data in runs_data
]
Expand Down Expand Up @@ -265,10 +268,11 @@ def transform_course(resource_data: dict) -> dict or None:
"course": {
"course_numbers": [],
},
"learning_format": transform_format(resource_data["learning_format"]),
"published": True,
"topics": parse_topics(resource_data),
"runs": runs,
"delivery": transform_delivery(resource_data["learning_format"]),
"availability": Availability.dated.name,
}
return None

Expand Down Expand Up @@ -302,10 +306,11 @@ def transform_program(resource_data: dict) -> dict or None:
for course_title in resource_data["courses"].split("|")
if course_title
],
"learning_format": transform_format(resource_data["learning_format"]),
"published": True,
"topics": parse_topics(resource_data),
"runs": runs,
"delivery": transform_delivery(resource_data["learning_format"]),
"availability": Availability.dated.name,
}
return None

Expand Down
22 changes: 16 additions & 6 deletions learning_resources/etl/mitpe_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import pytest

from learning_resources.constants import Availability, LearningResourceDelivery
from learning_resources.etl import mitpe
from learning_resources.factories import (
LearningResourceOfferorFactory,
Expand All @@ -31,7 +32,6 @@
},
"description": "Profissionais de áreas técnicas estão acostumados a falar ou apresentar dados para perfis que compartem os mesmos interesses e campo de atuação, mas podem encontrar dificuldades em transmitir suas ideias para pessoas de outros setores.\n",
"course": {"course_numbers": []},
"learning_format": ["online"],
"published": True,
"topics": [{"name": "Data Science"}],
"runs": [
Expand All @@ -48,8 +48,12 @@
"prices": ["1870"],
"url": "https://professional.mit.edu/course-catalog/comunicacao-persuasiva-pensamento-critico-para-aprimorar-mensagem-portuguese",
"instructors": [{"full_name": "Edward Schiappa"}, {"full_name": ""}],
"delivery": [LearningResourceDelivery.online.name],
"availability": Availability.dated.name,
}
],
"delivery": [LearningResourceDelivery.online.name],
"availability": Availability.dated.name,
},
{
"readable_id": "e3be75f6-f7c9-432b-9c24-70c7132e1583",
Expand All @@ -67,7 +71,6 @@
},
"description": "Become a stronger leader of innovation and design-thinking in your workplace. Join us for a highly interactive and engaging course that will teach you powerful new approaches for creating innovative solutions, crafting vision that gets buy-in, and developing solutions that people love. You'll learn our proven 10-Step Design Process and gain the strategies and hands-on experience to make your mark as a leader of innovation. Don't miss this opportunity to take your leadership capabilities to the next level.\n\nThis course may be taken individually or as part of the Professional Certificate Program in Innovation and Technology.\n",
"course": {"course_numbers": []},
"learning_format": ["in_person"],
"published": True,
"topics": [{"name": "Data Science"}, {"name": "Product Innovation"}],
"runs": [
Expand All @@ -88,8 +91,12 @@
{"full_name": "Reza Rahaman"},
{"full_name": ""},
],
"delivery": [LearningResourceDelivery.in_person.name],
"availability": Availability.dated.name,
}
],
"delivery": [LearningResourceDelivery.in_person.name],
"availability": Availability.dated.name,
},
]
EXPECTED_PROGRAMS = [
Expand All @@ -108,7 +115,6 @@
"url": "https://professional.mit.edu/sites/default/files/2020-08/Smart%20Manufacturing.jpg",
},
"description": "A fábrica do futuro já está aqui. Participe do programa online Manufatura Inteligente: Produção na Indústria 4.0 e aproveite a experiência de mais de cem anos de colaboração do MIT com vários setores. Aprenda as chaves para criar uma indústria inteligente em qualquer escala e saiba como software, sensores e sistemas são integrados para essa finalidade. Com este programa interativo, você passará da criação de modelos a sistemas de fabricação e análise avançada de dados para desenvolver estratégias que gerem uma vantagem competitiva.\n",
"learning_format": ["online"],
"published": True,
"topics": [{"name": "Product Innovation"}],
"runs": [
Expand All @@ -125,21 +131,25 @@
"prices": ["1870"],
"url": "https://professional.mit.edu/course-catalog/manufatura-inteligente-producao-na-industria-40-portuguese",
"instructors": [{"full_name": ""}, {"full_name": "Brian Anthony"}],
"delivery": [LearningResourceDelivery.online.name],
"availability": Availability.dated.name,
}
],
"courses": [EXPECTED_COURSES[0], EXPECTED_COURSES[1]],
"delivery": [LearningResourceDelivery.online.name],
"availability": Availability.dated.name,
}
]


@pytest.fixture()
@pytest.fixture
def prof_ed_settings(settings):
"""Fixture to set Professional Education API URL"""
settings.PROFESSIONAL_EDUCATION_RESOURCES_API_URL = "http://pro_edu_api.com"
return settings


@pytest.fixture()
@pytest.fixture
def mock_fetch_data(mocker):
"""Mock fetch_data function"""

Expand Down Expand Up @@ -176,7 +186,7 @@ def test_extract(settings, mock_fetch_data, prof_ed_api_url):
assert len(results) == 0


@pytest.mark.django_db()
@pytest.mark.django_db
def test_transform(settings, mock_fetch_data, prof_ed_settings):
"""Test transform function, and effectivelu most other functions"""
settings.MITPE_BASE_API_URL = "http://pro_edu_api.edu"
Expand Down
8 changes: 4 additions & 4 deletions learning_resources/etl/sloan_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Tests for prolearn etl functions"""
"""Tests for sloan etl functions"""

import json
from urllib.parse import urljoin
Expand Down Expand Up @@ -28,7 +28,7 @@

@pytest.fixture(autouse=True)
def mock_sloan_api_setting(settings): # noqa: PT004
"""Set the prolearn api url"""
"""Set the sloan api url"""
settings.SEE_API_URL = "http://localhost/test/programs/api"
settings.SEE_API_CLIENT_ID = "test"
settings.SEE_API_CLIENT_SECRET = "test" # noqa: S105
Expand All @@ -38,14 +38,14 @@ def mock_sloan_api_setting(settings): # noqa: PT004

@pytest.fixture
def mock_sloan_courses_data():
"""Mock prolearn MIT Professional Education courses data"""
"""Mock Sloan MIT Professional Education courses data"""
with open("./test_json/test_sloan_courses.json") as f: # noqa: PTH123
return json.loads(f.read())


@pytest.fixture
def mock_sloan_runs_data():
"""Mock prolearn MIT Professional Education courses data"""
"""Mock Sloan MIT Professional Education courses data"""
with open("./test_json/test_sloan_runs.json") as f: # noqa: PTH123
return json.loads(f.read())

Expand Down

This file was deleted.

4 changes: 0 additions & 4 deletions main/settings_celery.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,6 @@
"task": "learning_resources.tasks.get_mitpe_data",
"schedule": crontab(minute=0, hour=21), # 5:00pm EST
},
"update-prolearn-courses-every-1-days": {
"task": "learning_resources.tasks.get_prolearn_data",
"schedule": crontab(minute=0, hour=5), # 1:00am EST
},
"update-xpro-courses-every-1-days": {
"task": "learning_resources.tasks.get_xpro_data",
"schedule": crontab(minute=0, hour=5), # 1:00am EST
Expand Down
3 changes: 0 additions & 3 deletions main/settings_course_etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,6 @@
# Base URL for Micromasters data
MICROMASTERS_CATALOG_API_URL = get_string("MICROMASTERS_CATALOG_API_URL", None)

# Base URL for Prolearn data
PROLEARN_CATALOG_API_URL = get_string("PROLEARN_CATALOG_API_URL", None)

# Iterator chunk size for MITx and xPRO courses
LEARNING_COURSE_ITERATOR_CHUNK_SIZE = get_int("LEARNING_COURSE_ITERATOR_CHUNK_SIZE", 20)

Expand Down
Loading

0 comments on commit 30bed3f

Please sign in to comment.