Skip to content

Commit

Permalink
Merge pull request #1829 from He3lixxx/importers
Browse files Browse the repository at this point in the history
Fix importers
  • Loading branch information
richardebeling committed Dec 13, 2022
2 parents 6711f09 + 81d5e47 commit 47afab5
Show file tree
Hide file tree
Showing 5 changed files with 182 additions and 34 deletions.
5 changes: 3 additions & 2 deletions evap/staff/importers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,11 @@ class Category(Enum):
DUPL = _CATEGORY_TUPLE("duplicate", gettext_lazy("Possible duplicates"), 9)
EXISTS = _CATEGORY_TUPLE("existing", gettext_lazy("Existing courses"), 10)
IGNORED = _CATEGORY_TUPLE("ignored", gettext_lazy("Ignored duplicates"), 11)
ALREADY_PARTICIPATING = _CATEGORY_TUPLE("already_participating", gettext_lazy("Existing participants"), 12)

DEGREE = _CATEGORY_TUPLE("degree", gettext_lazy("Degree mismatches"), 12)
DEGREE = _CATEGORY_TUPLE("degree", gettext_lazy("Degree mismatches"), 13)
TOO_MANY_ENROLLMENTS = _CATEGORY_TUPLE(
"too_many_enrollments", gettext_lazy("Unusually high number of enrollments"), 13
"too_many_enrollments", gettext_lazy("Unusually high number of enrollments"), 14
)

level: Level
Expand Down
109 changes: 86 additions & 23 deletions evap/staff/importers/enrollment.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from typing_extensions import TypeGuard

from evap.evaluation.models import Contribution, Course, CourseType, Degree, Evaluation, Semester, UserProfile
from evap.evaluation.tools import ilen
from evap.evaluation.tools import clean_email, ilen, unordered_groupby
from evap.staff.tools import create_user_list_html_string_for_message

from .base import (
Expand All @@ -34,7 +34,9 @@
)


@dataclass
class InvalidValue:
# We make this a dataclass to make sure all instances compare equal.
pass


Expand All @@ -58,11 +60,13 @@ class CourseData:
# An existing course that this imported one should be merged with. See #1596
merge_into_course: MaybeInvalid[Optional[Course]] = invalid_value

def equals_when_ignoring_degrees(self, other) -> bool:
def key(data):
return (data.name_de, data.name_en, data.course_type, data.is_graded, data.responsible_email)
def __post_init__(self):
self.name_de = self.name_de.strip()
self.name_en = self.name_en.strip()
self.responsible_email = clean_email(self.responsible_email)

return key(self) == key(other)
def differing_fields(self, other) -> Set[str]:
return {field.name for field in fields(self) if getattr(self, field.name) != getattr(other, field.name)}


class ValidCourseData(CourseData):
Expand Down Expand Up @@ -250,12 +254,12 @@ def _map_row(self, row: EnrollmentInputRow) -> EnrollmentParsedRow:
self.invalid_is_graded_tracker.add_location_for_key(row.location, e.invalid_is_graded)

course_data = CourseData(
name_de=row.evaluation_name_de.strip(),
name_en=row.evaluation_name_en.strip(),
name_de=row.evaluation_name_de,
name_en=row.evaluation_name_en,
degrees=degrees,
course_type=course_type,
is_graded=is_graded,
responsible_email=row.responsible_email.strip(),
responsible_email=row.responsible_email,
)

return EnrollmentParsedRow(
Expand Down Expand Up @@ -326,7 +330,7 @@ def __init__(self, semester: Semester):
self.courses_by_name_en = {course.name_en: course for course in courses}

@staticmethod
def get_merge_hindrances(course_data: CourseData, merge_candidate: Course):
def get_merge_hindrances(course_data: CourseData, merge_candidate: Course) -> List[str]:
hindrances = []
if merge_candidate.type != course_data.course_type:
hindrances.append(_("the course type does not match"))
Expand All @@ -342,7 +346,7 @@ def get_merge_hindrances(course_data: CourseData, merge_candidate: Course):

return hindrances

def set_course_merge_target(self, course_data: CourseData):
def set_course_merge_target(self, course_data: CourseData) -> None:
course_with_same_name_en = self.courses_by_name_en.get(course_data.name_en, None)
course_with_same_name_de = self.courses_by_name_de.get(course_data.name_de, None)

Expand Down Expand Up @@ -393,7 +397,7 @@ def __init__(self, *args, semester: Semester, **kwargs):
self.name_en_by_name_de: Dict[str, str] = {}
self.name_de_mismatch_tracker: FirstLocationAndCountTracker = FirstLocationAndCountTracker()

def check_course_data(self, course_data: CourseData, location: ExcelFileLocation):
def check_course_data(self, course_data: CourseData, location: ExcelFileLocation) -> None:
try:
self.course_merge_logic.set_course_merge_target(course_data)

Expand Down Expand Up @@ -474,6 +478,48 @@ def finalize(self) -> None:
)


class ExistingParticipationChecker(Checker, RowCheckerMixin):
"""Warn if users are already stored as participants for a course in the database"""

def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.participant_emails_per_course_name_en: DefaultDict[str, Set[str]] = defaultdict(set)

def check_row(self, row: EnrollmentParsedRow) -> None:
# invalid value will still be set for courses with merge conflicts
if row.course_data.merge_into_course is not None and row.course_data.merge_into_course != invalid_value:
self.participant_emails_per_course_name_en[row.course_data.name_en].add(row.student_data.email)

def finalize(self) -> None:
# To reduce database traffic, we only load existing participations for users and evaluations seen in the import
# file. They could still contain false positives, so we need to check each import row against these tuples
seen_user_emails = [
email for email_set in self.participant_emails_per_course_name_en.values() for email in email_set
]
seen_evaluation_names = self.participant_emails_per_course_name_en.keys()

existing_participation_pairs = [
(participation.evaluation.course.name_en, participation.userprofile.email)
for participation in Evaluation.participants.through.objects.filter(
evaluation__course__name_en__in=seen_evaluation_names, userprofile__email__in=seen_user_emails
).prefetch_related("userprofile", "evaluation__course")
]

existing_participant_emails_per_course_name_en = unordered_groupby(existing_participation_pairs)

for course_name_en, import_participant_emails in self.participant_emails_per_course_name_en.items():
existing_participant_emails = set(existing_participant_emails_per_course_name_en.get(course_name_en, []))
colliding_participant_emails = existing_participant_emails.intersection(import_participant_emails)

if colliding_participant_emails:
self.importer_log.add_warning(
_(
"Course {course_name}: {participant_count} participants from the import file already participate in the evaluation."
).format(course_name=course_name_en, participant_count=len(colliding_participant_emails)),
category=ImporterLogEntry.Category.ALREADY_PARTICIPATING,
)


class CourseDataMismatchChecker(Checker):
"""Assert CourseData is consistent between rows"""

Expand All @@ -483,22 +529,26 @@ def __init__(self, *args, **kwargs) -> None:
self.course_data_by_name_en: Dict[str, CourseData] = {}
self.tracker: FirstLocationAndCountTracker = FirstLocationAndCountTracker()

def check_course_data(self, course_data: CourseData, location: ExcelFileLocation):
def check_course_data(self, course_data: CourseData, location: ExcelFileLocation) -> None:
if not all_fields_valid(course_data):
return

stored = self.course_data_by_name_en.setdefault(course_data.name_en, course_data)

# degrees would be merged if course data is equal otherwise.
if not course_data.equals_when_ignoring_degrees(stored):
self.tracker.add_location_for_key(location, course_data.name_en)
differing_fields = course_data.differing_fields(stored) - {"degrees"}
if differing_fields:
self.tracker.add_location_for_key(location, (course_data.name_en, tuple(differing_fields)))

def finalize(self) -> None:
for key, location_string in self.tracker.aggregated_keys_and_location_strings():
for (course_name, differing_fields), location_string in self.tracker.aggregated_keys_and_location_strings():
self.importer_log.add_error(
_('{location}: The data of course "{name}" differs from its data in a previous row.').format(
_(
'{location}: The data of course "{name}" differs from its data in the columns ({columns}) in a previous row.'
).format(
location=location_string,
name=key,
name=course_name,
columns=", ".join(differing_fields),
),
category=ImporterLogEntry.Category.COURSE,
)
Expand Down Expand Up @@ -577,6 +627,7 @@ def import_enrollments(
UserDataAdapter(UserDataEmptyFieldsChecker(test_run, importer_log)),
UserDataAdapter(UserDataMismatchChecker(test_run, importer_log)),
UserDataAdapter(UserDataValidationChecker(test_run, importer_log)),
ExistingParticipationChecker(test_run, importer_log),
]:
checker.check_rows(parsed_rows)

Expand Down Expand Up @@ -635,7 +686,7 @@ def normalize_rows(

assert all_fields_valid(row.course_data)
course_data = course_data_by_name_en.setdefault(row.course_data.name_en, row.course_data)
assert course_data.equals_when_ignoring_degrees(row.course_data)
assert course_data.differing_fields(row.course_data) <= {"degrees"}

# Not a checker to keep merging and notifying about the merge together.
if not row.course_data.degrees.issubset(course_data.degrees):
Expand Down Expand Up @@ -756,13 +807,25 @@ def store_participations_in_db(enrollment_rows: Iterable[EnrollmentParsedRow]):
for evaluation in Evaluation.objects.select_related("course").filter(course__name_en__in=course_names_en)
}

participants_through_objects = [
Evaluation.participants.through(
evaluation=evaluations_by_course_name_en[row.course_data.name_en],
userprofile=users_by_email[row.student_data.email],
)
course_id_participant_id_pairs_in_file = {
(evaluations_by_course_name_en[row.course_data.name_en].pk, users_by_email[row.student_data.email].pk)
for row in enrollment_rows
}

existing_course_id_participant_id_pairs = {
(participation.evaluation_id, participation.userprofile_id)
for participation in Evaluation.participants.through.objects.filter(
evaluation__in=evaluations_by_course_name_en.values()
)
}

course_id_participant_id_pairs = course_id_participant_id_pairs_in_file - existing_course_id_participant_id_pairs

participants_through_objects = [
Evaluation.participants.through(evaluation_id=evaluation_id, userprofile_id=userprofile_id)
for (evaluation_id, userprofile_id) in course_id_participant_id_pairs
]

Evaluation.participants.through.objects.bulk_create(participants_through_objects)
Evaluation.update_log_after_m2m_bulk_create(
evaluations_by_course_name_en.values(),
Expand Down
16 changes: 8 additions & 8 deletions evap/staff/templates/staff_message_rendering_template.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
{% for category, errorlist in importer_log.errors_by_category.items %}
<div class="card collapsible card-outline-danger mb-3">
<div class="card-header">
<a class="collapse-toggle" data-bs-toggle="collapse" href="#{{ category.id }}Card" aria-expanded="false"
aria-controls="{{ category.id }}Card">
{{ category.display_name }}
<a class="collapse-toggle" data-bs-toggle="collapse" href="#{{ category.value.id }}Card" aria-expanded="false"
aria-controls="{{ category.value.id }}Card">
{{ category.value.display_name }}
</a>
</div>
<div class="collapse show" id="{{ category.id }}Card">
<div class="collapse show" id="{{ category.value.id }}Card">
<div class="card-body">
{% for error in errorlist %}
<div class="alert alert-danger alert-dismissible">
Expand All @@ -29,12 +29,12 @@
{% for category, warninglist in importer_log.warnings_by_category.items %}
<div class="card card-outline-warning collapsible mb-3">
<div class="card-header">
<a class="collapse-toggle" data-bs-toggle="collapse" href="#{{ category.id }}Card" aria-expanded="false"
aria-controls="{{ category.id }}Card">
{{ category.display_name }}
<a class="collapse-toggle" data-bs-toggle="collapse" href="#{{ category.value.id }}Card" aria-expanded="false"
aria-controls="{{ category.value.id }}Card">
{{ category.value.display_name }}
</a>
</div>
<div class="collapse show" id="{{ category.id }}Card">
<div class="collapse show" id="{{ category.value.id }}Card">
<div class="card-body">
{% for warning in warninglist %}
<div class="alert alert-warning alert-dismissible">
Expand Down
82 changes: 81 additions & 1 deletion evap/staff/tests/test_importers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from copy import deepcopy
from datetime import date, datetime
from unittest.mock import patch

Expand Down Expand Up @@ -72,6 +73,14 @@ def test_created_users(self):
self.assertTrue(UserProfile.objects.filter(email="[email protected]").exists())
self.assertTrue(UserProfile.objects.filter(email="[email protected]").exists())

@patch("evap.staff.importers.user.clean_email", new=(lambda email: "cleaned_" + email))
def test_emails_are_cleaned(self):
original_user_count = UserProfile.objects.count()
__, __ = import_users(self.valid_excel_file_content, test_run=False)
self.assertEqual(UserProfile.objects.count(), 2 + original_user_count)
self.assertTrue(UserProfile.objects.filter(email="[email protected]").exists())
self.assertTrue(UserProfile.objects.filter(email="[email protected]").exists())

def test_duplicate_warning(self):
user = baker.make(UserProfile, first_name="Lucilia", last_name="Manilium", email="[email protected]")

Expand Down Expand Up @@ -316,6 +325,23 @@ def test_valid_file_import(self):
expected_user_count = old_user_count + 23
self.assertEqual(UserProfile.objects.all().count(), expected_user_count)

@patch("evap.staff.importers.user.clean_email", new=(lambda email: "cleaned_" + email))
@patch("evap.staff.importers.enrollment.clean_email", new=(lambda email: "cleaned_" + email))
def test_emails_are_cleaned(self):
import_enrollments(self.default_excel_content, self.semester, None, None, test_run=True)

old_user_count = UserProfile.objects.all().count()

import_enrollments(
self.default_excel_content, self.semester, self.vote_start_datetime, self.vote_end_date, test_run=False
)
expected_user_count = old_user_count + 23
self.assertEqual(UserProfile.objects.all().count(), expected_user_count)

self.assertTrue(UserProfile.objects.filter(email="[email protected]").exists())
self.assertTrue(UserProfile.objects.filter(email="[email protected]").exists())
self.assertTrue(UserProfile.objects.filter(email="[email protected]").exists())

def test_degrees_are_merged(self):
excel_content = excel_data.create_memory_excel_file(excel_data.test_enrollment_data_degree_merge_filedata)

Expand Down Expand Up @@ -450,7 +476,7 @@ def test_invalid_file_error(self):
[msg.message for msg in importer_log_test.errors_by_category()[ImporterLogEntry.Category.COURSE]],
[
'Sheet "MA Belegungen", row 18: The German name for course "Bought" is already used for another course in the import file.',
'Sheet "MA Belegungen", row 20: The data of course "Cost" differs from its data in a previous row.',
'Sheet "MA Belegungen", row 20: The data of course "Cost" differs from its data in the columns (responsible_email) in a previous row.',
],
)
self.assertEqual(
Expand Down Expand Up @@ -669,6 +695,60 @@ def test_user_data_mismatch_to_database(self):
import_enrollments(excel_content, self.semester, None, None, test_run=True)
self.assertGreater(mock.call_count, 50)

def test_duplicate_participation(self):
input_data = deepcopy(excel_data.test_enrollment_data_filedata)
# create a duplicate participation by duplicating a line
input_data["MA Belegungen"].append(input_data["MA Belegungen"][1])
excel_content = excel_data.create_memory_excel_file(input_data)

importer_log = import_enrollments(excel_content, self.semester, None, None, test_run=True)
self.assertEqual(importer_log.errors_by_category(), {})
self.assertEqual(importer_log.warnings_by_category(), {})

old_user_count = UserProfile.objects.all().count()

importer_log = import_enrollments(
self.default_excel_content, self.semester, self.vote_start_datetime, self.vote_end_date, test_run=False
)
self.assertEqual(importer_log.errors_by_category(), {})
self.assertEqual(importer_log.warnings_by_category(), {})

self.assertEqual(Evaluation.objects.all().count(), 23)
expected_user_count = old_user_count + 23
self.assertEqual(UserProfile.objects.all().count(), expected_user_count)

def test_existing_participation(self):
_, existing_evaluation = self.create_existing_course()
user = baker.make(
UserProfile, first_name="Lucilia", last_name="Manilium", email="[email protected]"
)
existing_evaluation.participants.add(user)

importer_log = import_enrollments(self.default_excel_content, self.semester, None, None, test_run=True)

expected_warnings = ["Course Shake: 1 participants from the import file already participate in the evaluation."]
self.assertEqual(
[
msg.message
for msg in importer_log.warnings_by_category()[ImporterLogEntry.Category.ALREADY_PARTICIPATING]
],
expected_warnings,
)
self.assertFalse(importer_log.has_errors())

importer_log = import_enrollments(
self.default_excel_content, self.semester, self.vote_start_datetime, self.vote_end_date, test_run=False
)

self.assertEqual(
[
msg.message
for msg in importer_log.warnings_by_category()[ImporterLogEntry.Category.ALREADY_PARTICIPATING]
],
expected_warnings,
)
self.assertFalse(importer_log.has_errors())


class TestPersonImport(TestCase):
@classmethod
Expand Down
4 changes: 4 additions & 0 deletions evap/staff/tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,8 @@ def test_warning_handling(self):
form["excel_file"] = ("import.xls", self.valid_excel_file_content)

reply = form.submit(name="operation", value="test")

self.assertContains(reply, "Name mismatches")
self.assertContains(
reply,
"The existing user would be overwritten with the following data:<br />"
Expand Down Expand Up @@ -1075,6 +1077,7 @@ def test_warning_handling(self):
)

reply = form.submit(name="operation", value="test")
self.assertContains(reply, "Name mismatches")
self.assertContains(
reply,
"The existing user would be overwritten with the following data:<br />"
Expand Down Expand Up @@ -2270,6 +2273,7 @@ def test_import_contributors_warning_handling(self):
form["ce-excel_file"] = ("import.xls", self.valid_excel_file_content)

reply = form.submit(name="operation", value="test-contributors")
self.assertContains(reply, "Name mismatches")
self.assertContains(
reply,
"The existing user would be overwritten with the following data:<br />"
Expand Down

0 comments on commit 47afab5

Please sign in to comment.