Skip to content

Commit

Permalink
Allow comma-separated parent_sample_id values in URL checking (#1320)
Browse files Browse the repository at this point in the history
* allow comma-separated values in parent_sample_id field
  • Loading branch information
gesinaphillips committed Apr 10, 2024
1 parent e25d263 commit 4fc83e6
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 22 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
- Directory validation changes for "shared" uploads
- Update Phenocycler directory schema
- Remove bad paths from LC-MS directory schema
- Allow multiple comma-separated parent_sample_id values

## v0.0.18

Expand Down
7 changes: 3 additions & 4 deletions examples/dataset-examples/bad-cedar-assay-histology/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,8 @@ Metadata TSV Validation Errors:
examples/dataset-examples/bad-cedar-assay-histology/upload/bad-histology-metadata.tsv:
URL Errors:
- 'On row 2, column "parent_sample_id", value "wrong" fails because of error
"HTTPError": 401 Client Error: Unauthorized for url: https://entity.api.hubmapconsortium.org/entities/wrong.'
- 'On row 3, column "parent_sample_id", value "HBM854.FXDQ.783" fails because
of error "HTTPError": 401 Client Error: Unauthorized for url: https://entity.api.hubmapconsortium.org/entities/HBM854.FXDQ.783'
"HTTPError": Field value is not valid; URL https://entity.api.hubmapconsortium.org/entities/wrong
returned a 400 Error.'
Validation Errors:
- On row 0, column "parent_sample_id", value "wrong" fails because of error
"invalidValueFormat".
Expand All @@ -27,4 +26,4 @@ Reference Errors:
Hint: 'If validation fails because of extra whitespace in the TSV, try:
src/cleanup_whitespace.py --tsv_in original.tsv --tsv_out clean.tsv.'
```
```
54 changes: 36 additions & 18 deletions src/ingest_validation_tools/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -567,26 +567,44 @@ def _check_matching_urls(
for i, row in enumerate(rows):
check = {k: v for k, v in row.items() if k in constrained_fields}
for field, value in check.items():
try:
url = constrained_fields[field] + value
if field != "orcid_id":
headers = self.app_context.get("request_header", {})
headers["Authorization"] = f"Bearer {self.globus_token}"
else:
headers = {}
response = requests.get(url, headers=headers)
response.raise_for_status()
except Exception as e:
error = {
"errorType": type(e).__name__,
"column": field,
"row": i + 2,
"value": value,
"error_text": e.__str__(),
}
url_errors.append(self._get_message(error, report_type))
if field == "parent_sample_id":
ids = value.split(",")
for id in ids:
error = self._check_single_url(field, id.strip(), constrained_fields, i)
if error:
url_errors.append(self._get_message(error, report_type))
else:
error = self._check_single_url(field, value, constrained_fields, i)
if error:
url_errors.append(self._get_message(error, report_type))
return url_errors

def _check_single_url(
self,
field: str,
value: str,
constrained_fields: Dict[str, str],
row_num: int,
) -> Optional[Dict]:
try:
url = constrained_fields[field] + value
if field != "orcid_id":
headers = self.app_context.get("request_header", {})
headers["Authorization"] = f"Bearer {self.globus_token}"
else:
headers = {}
response = requests.get(url, headers=headers)
response.raise_for_status()
except Exception as e:
error = {
"errorType": type(e).__name__,
"column": field,
"row": row_num + 2,
"value": value,
"error_text": e.__str__(),
}
return error

def _get_message(
self,
error: Dict[str, str],
Expand Down

0 comments on commit 4fc83e6

Please sign in to comment.