Skip to content

Commit

Permalink
Merge pull request #757 from ImageMarkup/simplify-metadata-sanitization
Browse files Browse the repository at this point in the history
Simplify metadata sanitization
  • Loading branch information
danlamanna committed Aug 25, 2023
2 parents a25f98c + 9b4ede6 commit 602e119
Show file tree
Hide file tree
Showing 8 changed files with 33 additions and 43 deletions.
2 changes: 1 addition & 1 deletion isic/core/api/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def resolve_metadata(image: Image) -> dict:
"clinical": {},
}

for key, value in image.accession.redacted_metadata.items():
for key, value in image.metadata.items():
# this is the only field that we expose that isn't in the FIELD_REGISTRY
# since it's a derived field.
if key == "age_approx":
Expand Down
28 changes: 22 additions & 6 deletions isic/core/models/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@

from .isic_id import IsicId

RESTRICTED_METADATA_FIELDS = ["age"]


class ImageQuerySet(models.QuerySet):
def from_search_query(self, query: str):
Expand Down Expand Up @@ -79,6 +77,26 @@ def __str__(self):
def get_absolute_url(self):
return reverse("core/image-detail", args=[self.pk])

@staticmethod
def _image_metadata(accession_metadata: dict) -> dict:
"""
Return the metadata for an image given its accession metadata.
Note that the metadata for the image includes a rounded age approximation, but not the
original age.
The static version of this method is useful when dealing with dictionary representations.
"""
if "age" in accession_metadata:
accession_metadata["age_approx"] = Accession._age_approx(accession_metadata["age"])
del accession_metadata["age"]

return accession_metadata

@property
def metadata(self) -> dict:
return Image._image_metadata(self.accession.metadata)

def to_elasticsearch_document(self, body_only=False) -> dict:
# Can only be called on images that were fetched with with_elasticsearch_properties.
document = {
Expand All @@ -93,9 +111,7 @@ def to_elasticsearch_document(self, body_only=False) -> dict:
"collections": self.coll_pks,
}

# Fields in the search index have to be redacted otherwise the documents that match could
# leak what their true metadata values are.
document.update(self.accession.redacted_metadata)
document.update(self.metadata)

if body_only:
return document
Expand Down Expand Up @@ -148,7 +164,7 @@ class ImagePermissions:
def view_full_metadata_list(
user_obj: User, qs: QuerySet[Image] | None = None
) -> QuerySet[Image]:
# Allows viewing unstructured metadata as well as the redacted metadata fields.
# Allows viewing unstructured metadata as well as the metadata history.
#
# This is only used in an SSR context, the API doesn't yet reveal more.
qs = qs if qs is not None else Image._default_manager.all()
Expand Down
4 changes: 2 additions & 2 deletions isic/core/services/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def _image_metadata_csv_headers(*, qs: QuerySet[Image]) -> list[str]:
.distinct()
)

# TODO: this is a very leaky part of RESTRICTED_METADATA_FIELDS that
# TODO: this is a very leaky part of sensitive metadata handling that
# should be refactored.
if "age" in used_metadata_keys:
used_metadata_keys.append("age_approx")
Expand All @@ -45,6 +45,6 @@ def image_metadata_csv_rows(*, qs: QuerySet[Image]) -> Iterator[dict]:
"isic_id": image["isic_id"],
"attribution": image["accession__cohort__attribution"],
"copyright_license": image["accession__copyright_license"],
**Accession._redact_metadata(image["accession__metadata"]),
**Image._image_metadata(image["accession__metadata"]),
}
}
2 changes: 1 addition & 1 deletion isic/core/templates/core/partials/image_modal.html
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
</div>

<div class="my-2 bg-gray-100 p-4">
<pre x-show="selectedTab == 'Metadata'">{{ image.accession.redacted_metadata|formatted }}</pre>
<pre x-show="selectedTab == 'Metadata'">{{ image.metadata|formatted }}</pre>
</div>
</div>
{% endif %}
Expand Down
11 changes: 4 additions & 7 deletions isic/core/tests/test_search.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
import itertools

import pytest
from pytest_lazyfixture import lazy_fixture

from isic.core.models.image import RESTRICTED_METADATA_FIELDS
from isic.core.search import add_to_search_index, get_elasticsearch_client


Expand Down Expand Up @@ -138,17 +135,17 @@ def test_core_api_image_search_invalid_query(route, searchable_images, authentic

@pytest.mark.django_db
@pytest.mark.parametrize(
"restricted_field,route",
itertools.product(RESTRICTED_METADATA_FIELDS, ["/api/v2/images/", "/api/v2/images/search/"]),
"route",
["/api/v2/images/", "/api/v2/images/search/"],
)
def test_core_api_image_hides_fields(
authenticated_client, searchable_image_with_private_field, restricted_field, route
authenticated_client, searchable_image_with_private_field, route
):
r = authenticated_client.get(route)
assert r.status_code == 200, r.json()
assert r.json()["count"] == 1, r.json()
for image in r.json()["results"]:
assert restricted_field not in image["metadata"]
assert "age" not in image["metadata"]


@pytest.mark.django_db
Expand Down
8 changes: 2 additions & 6 deletions isic/core/tests/test_view_image_detail.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
import pytest
from pytest_lazyfixture import lazy_fixture

from isic.core.models.image import RESTRICTED_METADATA_FIELDS


@pytest.mark.django_db
@pytest.mark.parametrize(
Expand Down Expand Up @@ -64,8 +62,7 @@ def test_view_image_detail_public(client, detailed_image):
assert "unstructured_metadata" not in r.context
assert "metadata_versions" not in r.context

for field in RESTRICTED_METADATA_FIELDS:
assert field not in r.context["metadata"]
assert "age" not in r.context["metadata"]

assert all([coll.public for coll in r.context["pinned_collections"]])
assert len(r.context["pinned_collections"]) == 1
Expand All @@ -88,8 +85,7 @@ def test_view_image_detail_uploader(client, detailed_image):
# TODO: uploaders can see all metadata added to their images forever?
assert "metadata_versions" in r.context

for field in RESTRICTED_METADATA_FIELDS:
assert field in r.context["metadata"]
assert "age" not in r.context["metadata"]

assert all([coll.public for coll in r.context["pinned_collections"]])
assert len(r.context["pinned_collections"]) == 1
Expand Down
4 changes: 1 addition & 3 deletions isic/core/views/images.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,12 @@ def image_detail(request, pk):
"studies": studies,
}

ctx["metadata"] = dict(sorted(image.metadata.items()))
if request.user.has_perm("core.view_full_metadata", image):
ctx["metadata"] = dict(sorted(image.accession.metadata.items()))
ctx["unstructured_metadata"] = dict(sorted(image.accession.unstructured_metadata.items()))
ctx["metadata_versions"] = image.accession.metadata_versions.select_related(
"creator"
).differences()
else:
ctx["metadata"] = dict(sorted(image.accession.redacted_metadata.items()))

ctx["sections"] = {
"metadata": "Metadata",
Expand Down
17 changes: 0 additions & 17 deletions isic/ingest/models/accession.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,23 +314,6 @@ def _age_approx(age: int) -> int:
def age_approx(self) -> int | None:
return self._age_approx(self.metadata["age"]) if "age" in self.metadata else None

@staticmethod
def _redact_metadata(metadata: dict) -> dict:
from isic.core.models.image import RESTRICTED_METADATA_FIELDS

if "age" in metadata:
metadata["age_approx"] = Accession._age_approx(metadata["age"])

for f in RESTRICTED_METADATA_FIELDS:
if f in metadata:
del metadata[f]

return metadata

@property
def redacted_metadata(self) -> dict:
return self._redact_metadata(dict(self.metadata))

def _require_unpublished(self):
if self.published:
raise ValidationError("Can't modify the accession as it's already been published.")
Expand Down

0 comments on commit 602e119

Please sign in to comment.