Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Preparation progress dashboard utility script #579

Merged
merged 33 commits into from
May 10, 2024
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
be72a1a
Add imports to relevant models for user permission
aristizabal95 Apr 15, 2024
db33ada
Implement permission for mlcube owners to see dset emails
aristizabal95 Apr 16, 2024
2d40c02
Implement get_user function
aristizabal95 Apr 17, 2024
c38ea96
Implement rano-dashboard script
aristizabal95 Apr 17, 2024
ebd7788
Remove unnecessary try-except block
aristizabal95 Apr 17, 2024
6d82c54
Merge branch 'main' into preparation-dashboard
aristizabal95 Apr 17, 2024
76ba633
Organize code
aristizabal95 Apr 17, 2024
d3c14c9
Set correct default paths
aristizabal95 Apr 17, 2024
3f9dc0c
Apply linter fixes
aristizabal95 Apr 17, 2024
d06067a
Merge branch 'main' into preparation-dashboard
aristizabal95 Apr 24, 2024
ec5659e
Use int ids for user tests
aristizabal95 Apr 24, 2024
83f21e0
Merge branch 'preparation-dashboard' of https://github.com/aristizaba…
aristizabal95 Apr 24, 2024
74dae67
Check for anon user. Add test
aristizabal95 Apr 24, 2024
d759045
fix linting issues
aristizabal95 Apr 24, 2024
960d686
Fix user tests
aristizabal95 Apr 25, 2024
1d152eb
Increase version number
aristizabal95 Apr 25, 2024
b8873a8
Remove duplicate dashboard
aristizabal95 Apr 26, 2024
9952c59
Move medperf initialization to the very beginning
aristizabal95 May 2, 2024
aff1c3b
Revert to using floats for indices
aristizabal95 May 2, 2024
81d47f1
Use queryset.exists() instead of length check
aristizabal95 May 2, 2024
21da575
Build dashboard as a package
aristizabal95 May 2, 2024
e907e1c
Require stages file to be passed explicitly
aristizabal95 May 2, 2024
b48b2e4
Add a README explaining how to use the dashboard
aristizabal95 May 2, 2024
f03b425
Display a warning about report visibility
aristizabal95 May 2, 2024
82eec07
Add warning of email visibility on dset submission
aristizabal95 May 3, 2024
52019d9
Fix linter issues
aristizabal95 May 3, 2024
8f5d0bf
Fix linter issues
aristizabal95 May 3, 2024
af4a07c
Merge branch 'main' into preparation-dashboard
aristizabal95 May 6, 2024
b987a08
Update server/user/tests/test_pk.py
aristizabal95 May 10, 2024
ffee542
Remove typer app from get_data
aristizabal95 May 10, 2024
134dad4
Merge branch 'preparation-dashboard' of https://github.com/aristizaba…
aristizabal95 May 10, 2024
e64f010
Remove stages.csv from PR
aristizabal95 May 10, 2024
1d0d7b3
Make stages_path a required argument
aristizabal95 May 10, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cli/medperf/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.1.2"
__version__ = "0.1.3"
5 changes: 5 additions & 0 deletions cli/medperf/commands/dataset/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,8 +309,13 @@ def prompt_for_report_sending_approval(self):
+ " dataset subjects have reached Stage 1, and that 60% of your dataset subjects"
+ " have reached Stage 3:"
)
warning = (
"Note that reports will be visible by the Data Preparation MLCube owner and by the"
" Benchmark owner to keep track of progress and provide support if needed."
)
config.ui.print(msg)
dict_pretty_print(example)
config.ui.print_warning(warning)
aristizabal95 marked this conversation as resolved.
Show resolved Hide resolved

msg = (
" \nDo you approve the automatic submission of summaries similar to the one above"
Expand Down
5 changes: 5 additions & 0 deletions cli/medperf/commands/dataset/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,11 @@ def upload(self):
submission_dict = self.dataset.todict()
dict_pretty_print(submission_dict)
msg = "Do you approve the registration of the presented data to MedPerf? [Y/n] "
warning = (
"Upon submission, your email address will be visible to the Data Preparation"
+ " Owner for traceability and debugging purposes."
)
self.ui.print_warning(warning)
self.approved = self.approved or approval_prompt(msg)

if self.approved:
Expand Down
14 changes: 14 additions & 0 deletions cli/medperf/comms/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,3 +285,17 @@ def update_dataset(self, dataset_id: int, data: dict):
dataset_id (int): ID of the dataset to update
data (dict): Updated information of the dataset.
"""

@abstractmethod
def get_user(self, user_id: int) -> dict:
"""Retrieves the specified user. This will only return if
the current user has permission to view the requested user,
either by being himself, an admin or an owner of a data preparation
mlcube used by the requested user

Args:
user_id (int): User UID

Returns:
dict: Requested user information
"""
20 changes: 20 additions & 0 deletions cli/medperf/comms/rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,3 +531,23 @@ def get_mlcube_datasets(self, mlcube_id: int) -> dict:

datasets = self.__get_list(f"{self.server_url}/mlcubes/{mlcube_id}/datasets/")
return datasets

def get_user(self, user_id: int) -> dict:
"""Retrieves the specified user. This will only return if
the current user has permission to view the requested user,
either by being himself, an admin or an owner of a data preparation
mlcube used by the requested user

Args:
user_id (int): User UID

Returns:
dict: Requested user information
"""
url = f"{self.server_url}/users/{user_id}/"
res = self.__auth_get(url)
if res.status_code != 200:
log_response_error(res)
details = format_errors_dict(res.json())
raise CommunicationRequestError(f"Could not retrieve user: {details}")
return res.json()
26 changes: 26 additions & 0 deletions scripts/dashboard/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Medperf Data Preparation Dashboard

The medperf data preparation dashboard provides visualization on the usage of a data preparation mlcube and the stages data owners are at. This will hopefully provide insights into how far along the process is going, and wether users are having trouble specific to the execution of the data preparation pipeline.

## Installation

To install, execute the following command at this folder:

```
pip install -e .
```

## How to use

To use, you need to have a few assets and identifiers beforehand:
- MLCube ID: The ID of the MLCube that is being used as a data preparation MLCube. To be able to see progress, you must be the owner of this MLCube
- Stages File: A `CSV` file that contains the human-readable information of each of the stages that the data preparation MLCube contains. The CSV should have the following columns: `Status Code, status_name, comment, docs_url, color`
- Institutions File: A `CSV` file that maps emails to institutions that are expected to be part of the preparation procedure. The CSV should have the following columns: `institution, email`

Once all requirements are covered, you can execute the following command:

```
medperf-dashboard -m <MLCube ID> -s <Stages File> -i <Institutions File>
```

Running this command will fetch the latest reports from the medperf server, and start a local server that will contain the visualization of the progress. To access this server, head to `http://localhost:8050` on your preferred browser.
5 changes: 5 additions & 0 deletions scripts/dashboard/medperf_dashboard/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from medperf.init import initialize

initialize()

from .preparation_dashboard import t_app # noqa
51 changes: 51 additions & 0 deletions scripts/dashboard/medperf_dashboard/assets/stages.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
Status Code,status_name,comment,docs_url,color
0,Identified,,,cornflowerblue
1,Validated,,,lightgreen
-1.1,Missing Modalities,There are missing modalities. Please check the data,,darkorange
-1.2,Extra Modalities,There are extra modalities. Please check the data,,indigo
-1.3,Validation Failed,,,orangered
2,Converted To Nifti,,,deeppink
-2,Nifti Conversion Failed,,,slategray
3,Brain Extract Finished,,,gold
-3,Brain Extract Failed,,,plum
4,Tumor Extract Finished,,,pink
-4,Tumor Extract Failed,,,rebeccapurple
5,Manual Review Completed,,,mediumslateblue
-5,Manual Review Required,"Baseline tumor segmentations have been generated. Manual corrections are required to proceed. Here are the following recommended methods.

### Local Manual Corrections
If you're doing manual corrections on this machine, and you're running the manual Data Preparation pipeline locally, you may find buttons at the bottom of this page to aid you in the process of manual correction. These buttons automatically start the review process with ITK-Snap using the generated tumor segmentation mask. Please ensure ITK-Snap has been installed with client-line tools to use this method. You may edit the file as many times as needed. Once you're done with this subject, press the ""Mark as finalized"" button to let the pipeline know the subject is done.

In some cases, local manual correction is not possible, or the default baseline segmentation is not sufficiently good to work with. In those cases, it is recommended to do manual correction through the **Packaging method**.

### Packaging Method
If using the monitoring tool, you may package cases for review by pressing the 'Package cases for review' button under **SUMMARY**. This will create a tarball file on the working directory (displayed on the monitoring tool header), containing all the cases for review. You may untar the segmentation with the following command

``` tar -xzf review_cases.tar.gz ```

If working remotely, move the tarball file to your local machine. Untar the tarball file, select the best baseline segmentation and make any necessary corrections. Once you're done, move the finalized file to the `finalized` folder, retaining the original name.

If using the monitoring tool, create a new tarball with the previously untarred files, as well as finalized cases. You can do so with the following command

``` tar -czf reviewed_cases.tar.gz -C review_cases . ```

Place this new tarball file on the directory the monitoring tool is running (displayed on the monitoring tool header). The tool will automatically detect the finalized cases and place them in the expected locations.

### Brain Mask Correction
There might be situations where the brain mask is not correct. If working locally you may use the `Review Brain Mask` button to automatically make brain mask corrections with ITK-Snap. If instead you're using the previously menitoned tarball, you may find the brain mask segmentation as `brainMask_fused.nii.gz`. Please make the necessary corrections under that file, overwriting the contents. Once the new tarball is created and placed within the directory the monitoring tool is running, it will be automatically identified and placed in the correct location.
**NOTE** Modifying the brain mask invalidates the tumor segmentations, and re-runs the tumor segmentation procedure using this new mask.
",,mediumseagreen
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is the text here intentional?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems to me this is just a multiline str value of comment field for the -5 stage. At least csv is parsed successfully

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And I don't see any usage of this comment field in the dashboard. I believe it is here just because stages.csv file was taken as it is used in monitor (?). Anyway, it does not break anything

-5.1,Multiple Annotations Error,More than one segmentation file was identified in the manually corrected output folder. Please ensure that there is only one manually corrected segmentation inside the labels path.,,mediumpurple
-6,Comparison Complete,,,mediumturquoise
-6.1,Exact Match Identified,"The automated and manually corrected segmentation files are identical. Was this intentional? If so, are you certain that no manual corrections were required? If not, please redo manual correction for this exam.",,orchid
-6.2,Annotation Comparison Failed,The original segmentation for the reviewed file was not identified. This most probably means the annotation file was renamed. Please ensure the reviewed file retains its original name.,,peachpuff
7,Annotation Confirmed,,,silver
8,Done,,,green
-0.101,Generate Report Unhandled Error,,,tomato
-1.101,Initial Validation Unhandled Error,,,firebrick
-2.101,Nifti Conversion Unhandled Error,,,teal
-3.101,Brain Extract Unhandled Error,,,saddlebrown
-4.101,Tumor Extract Unhandled Error,,,sandybrown
-5.101,Manual Annotation Unhandled Error,,,peru
-6.101,Label Segmentation Comparison Unhandled Error,,,moccasin
-7.101,Annotation Confirmation Unhandled Error,,,mediumvioletred
aristizabal95 marked this conversation as resolved.
Show resolved Hide resolved
126 changes: 126 additions & 0 deletions scripts/dashboard/medperf_dashboard/get_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import os
import pandas as pd
import datetime

from medperf.entities.dataset import Dataset
from medperf import config

from .utils import get_institution_from_email, get_reports_path, stage_id2name

from typer import Typer, Option, run

app = Typer()


def get_dsets(mlcube_id):
dsets = Dataset.all(filters={"mlcube": mlcube_id})
aristizabal95 marked this conversation as resolved.
Show resolved Hide resolved
dsets = [dset.todict() for dset in dsets]
for dset in dsets:
user_id = dset["owner"]
dset["user"] = config.comms.get_user(user_id)
Comment on lines +14 to +16
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how many API calls are we expecting here? maybe we can have an endpoint that returns all of this

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I expect here a large number of unnecessary API calls (this includes multiple datasets for the same user, so we are talking more than 30 maybe?)
let's have an issue to replace this code later. in the FL PR we will have an endpoint that lists experiment datasets and their owners information so we can reuse the logic defined there to make this better.


return dsets


def build_dset_df(dsets, user2institution, stages_df):
formatted_dsets = []
for dset in dsets:
email = dset["user"]["email"]
institution = get_institution_from_email(email, user2institution)
formatted_dset = {
"name": dset["name"],
"owner": dset["owner"],
"email": email,
"is_valid": dset["is_valid"],
"created_at": dset["created_at"],
"modified_at": dset["modified_at"],
"institution": institution,
}
if len(dset["report"]):
# Contains a readable report
report = dset["report"]
exec_status = report["execution_status"]
formatted_dset["execution_status"] = exec_status
formatted_dset["progress"] = report["progress"]

formatted_dsets.append(formatted_dset)
dsets_df = pd.DataFrame(formatted_dsets)

progress = dsets_df["progress"].fillna({})
progress_df = pd.DataFrame(progress.values.tolist())
progress_df.rename(columns=lambda x: stage_id2name(x, stages_df), inplace=True)
progress_df = progress_df.fillna("0.0%").map(lambda x: float(x[:-1]) / 100)
progress_df = progress_df.groupby(level=0, axis=1).sum()

full_table = dsets_df.join(progress_df)
full_table = full_table[full_table["is_valid"]]
full_table.drop(columns=["owner", "progress"], inplace=True)

return full_table


def write_dsets_df(dsets_df, full_path):
timenow = datetime.datetime.now(datetime.timezone.utc)

full_table = dsets_df
latest_table = full_table.sort_values("modified_at").groupby("institution").last()
latest_table = latest_table.loc[:, (latest_table != 0).any(axis=0)]

full_table_path = os.path.join(full_path, "full_table.csv")
latest_table_path = os.path.join(full_path, "latest_table.csv")
timestamp_path = os.path.join(full_path, f"{timenow}.csv")

full_table.to_csv(full_table_path)
full_table.to_csv(timestamp_path)
latest_table.to_csv(latest_table_path)


def write_sites(dsets_df, institutions_df, full_path):
sites_path = os.path.join(full_path, "sites.txt")

expected_sites = institutions_df["institution"].values.tolist()
registered_sites = dsets_df["institution"].values.tolist()
sites = list(set(expected_sites + registered_sites))

with open(sites_path, "w") as f:
f.write("\n".join(sites))


def get_data(mlcube_id, stages_path, institutions_path, out_path):
dsets = get_dsets(mlcube_id)
full_path = get_reports_path(out_path, mlcube_id)
os.makedirs(full_path, exist_ok=True)

institutions_df = pd.read_csv(institutions_path)
user2institution = {u: i for i, u in institutions_df.values.tolist()}
stages_df = pd.read_csv(stages_path)
stages_df.set_index("Status Code", inplace=True)

dsets_df = build_dset_df(dsets, user2institution, stages_df)
write_dsets_df(dsets_df, full_path)
write_sites(dsets_df, institutions_df, full_path)


@app.command()
def main(
mlcube_id: int = Option(
..., "-m", "--mlcube", help="MLCube ID to inspect prparation from"
),
stages_path: str = Option(
"assets/stages.csv", "-s", "--stages", help="Path to stages.csv"
),
institutions_path: str = Option(
...,
"-i",
"--institutions",
help="Path to a CSV file containing institution-email information",
),
out_path: str = Option(
"reports", "-o", "--out-path", help="location to store progress CSVs"
),
):
get_data(mlcube_id, stages_path, institutions_path, out_path)


if __name__ == "__main__":
run(main)
Loading
Loading