mlcommons · aristizabal95 · May 10, 2024 · Apr 15, 2024 · Apr 16, 2024 · Apr 17, 2024
@@ -1 +1 @@
-__version__ = "0.1.2"
+__version__ = "0.1.3"
@@ -309,8 +309,13 @@ def prompt_for_report_sending_approval(self):
             + " dataset subjects have reached Stage 1, and that 60% of your dataset subjects"
             + " have reached Stage 3:"
         )
+        warning = (
+            "Note that reports will be visible by the Data Preparation MLCube owner and by the"
+            " Benchmark owner to keep track of progress and provide support if needed."
+        )
         config.ui.print(msg)
         dict_pretty_print(example)
+        config.ui.print_warning(warning)
 
         msg = (
             " \nDo you approve the automatic submission of summaries similar to the one above"

@@ -151,6 +151,11 @@ def upload(self):
         submission_dict = self.dataset.todict()
         dict_pretty_print(submission_dict)
         msg = "Do you approve the registration of the presented data to MedPerf? [Y/n] "
+        warning = (
+            "Upon submission, your email address will be visible to the Data Preparation"
+            + " Owner for traceability and debugging purposes."
+        )
+        self.ui.print_warning(warning)
         self.approved = self.approved or approval_prompt(msg)
 
         if self.approved:

@@ -285,3 +285,17 @@ def update_dataset(self, dataset_id: int, data: dict):
             dataset_id (int): ID of the dataset to update
             data (dict): Updated information of the dataset.
         """
+
+    @abstractmethod
+    def get_user(self, user_id: int) -> dict:
+        """Retrieves the specified user. This will only return if
+        the current user has permission to view the requested user,
+        either by being himself, an admin or an owner of a data preparation
+        mlcube used by the requested user
+
+        Args:
+            user_id (int): User UID
+
+        Returns:
+            dict: Requested user information
+        """
@@ -531,3 +531,23 @@ def get_mlcube_datasets(self, mlcube_id: int) -> dict:
 
         datasets = self.__get_list(f"{self.server_url}/mlcubes/{mlcube_id}/datasets/")
         return datasets
+
+    def get_user(self, user_id: int) -> dict:
+        """Retrieves the specified user. This will only return if
+        the current user has permission to view the requested user,
+        either by being himself, an admin or an owner of a data preparation
+        mlcube used by the requested user
+
+        Args:
+            user_id (int): User UID
+
+        Returns:
+            dict: Requested user information
+        """
+        url = f"{self.server_url}/users/{user_id}/"
+        res = self.__auth_get(url)
+        if res.status_code != 200:
+            log_response_error(res)
+            details = format_errors_dict(res.json())
+            raise CommunicationRequestError(f"Could not retrieve user: {details}")
+        return res.json()
@@ -0,0 +1,26 @@
+# Medperf Data Preparation Dashboard
+
+The medperf data preparation dashboard provides visualization on the usage of a data preparation mlcube and the stages data owners are at. This will hopefully provide insights into how far along the process is going, and wether users are having trouble specific to the execution of the data preparation pipeline.
+
+## Installation
+
+To install, execute the following command at this folder:
+
+```
+pip install -e .
+```
+
+## How to use
+
+To use, you need to have a few assets and identifiers beforehand:
+- MLCube ID: The ID of the MLCube that is being used as a data preparation MLCube. To be able to see progress, you must be the owner of this MLCube
+- Stages File: A `CSV` file that contains the human-readable information of each of the stages that the data preparation MLCube contains. The CSV should have the following columns: `Status Code, status_name, comment, docs_url, color`
+- Institutions File: A `CSV` file that maps emails to institutions that are expected to be part of the preparation procedure. The CSV should have the following columns: `institution, email`
+
+Once all requirements are covered, you can execute the following command:
+
+```
+medperf-dashboard -m <MLCube ID> -s <Stages File> -i <Institutions File>
+```
+
+Running this command will fetch the latest reports from the medperf server, and start a local server that will contain the visualization of the progress. To access this server, head to `http://localhost:8050` on your preferred browser.
@@ -0,0 +1,5 @@
+from medperf.init import initialize
+
+initialize()
+
+from .preparation_dashboard import t_app # noqa
diff --git a/scripts/dashboard/medperf_dashboard/assets/stages.csv b/scripts/dashboard/medperf_dashboard/assets/stages.csv
@@ -0,0 +1,51 @@
+Status Code,status_name,comment,docs_url,color
+0,Identified,,,cornflowerblue
+1,Validated,,,lightgreen
+-1.1,Missing Modalities,There are missing modalities. Please check the data,,darkorange
+-1.2,Extra Modalities,There are extra modalities. Please check the data,,indigo
+-1.3,Validation Failed,,,orangered
+2,Converted To Nifti,,,deeppink
+-2,Nifti Conversion Failed,,,slategray
+3,Brain Extract Finished,,,gold
+-3,Brain Extract Failed,,,plum
+4,Tumor Extract Finished,,,pink
+-4,Tumor Extract Failed,,,rebeccapurple
+5,Manual Review Completed,,,mediumslateblue
+-5,Manual Review Required,"Baseline tumor segmentations have been generated. Manual corrections are required to proceed. Here are the following recommended methods.
+
+### Local Manual Corrections
+If you're doing manual corrections on this machine, and you're running the manual Data Preparation pipeline locally,  you may find buttons at the bottom of this page to aid you in the process of manual correction.  These buttons automatically start the review process with ITK-Snap using the generated tumor segmentation mask.  Please ensure ITK-Snap has been installed with client-line tools to use this method. You may edit the file as many times as needed. Once you're done with this subject, press the ""Mark as finalized"" button to let the pipeline know the subject is done.
+
+In some cases, local manual correction is not possible, or the default baseline segmentation is not sufficiently good to work with.  In those cases, it is recommended to do manual correction through the **Packaging method**.
+
+### Packaging Method
+If using the monitoring tool, you may package cases for review by pressing the 'Package cases for review' button under **SUMMARY**. This will create a tarball file on the working directory (displayed on the monitoring tool header), containing all the cases for review. You may untar the segmentation with the following command
+
+``` tar -xzf review_cases.tar.gz ```
+
+If working remotely, move the tarball file to your local machine. Untar the tarball file, select the best baseline segmentation and make any necessary corrections.  Once you're done, move the finalized file to the `finalized` folder, retaining the original name.
+
+If using the monitoring tool, create a new tarball with the previously untarred files, as well as finalized cases. You can do so with the following command
+
+``` tar -czf reviewed_cases.tar.gz -C review_cases . ```
+
+Place this new tarball file on the directory the monitoring tool is running (displayed on the monitoring tool header). The tool will automatically detect the finalized cases and place them in the expected locations.
+
+### Brain Mask Correction
+There might be situations where the brain mask is not correct. If working locally you may use the `Review Brain Mask` button to automatically make brain mask corrections with ITK-Snap. If instead you're using the previously menitoned tarball, you may find the brain mask segmentation as `brainMask_fused.nii.gz`. Please make the necessary corrections under that file, overwriting the contents. Once the new tarball is created and placed within the directory the monitoring tool is running, it will be automatically identified and placed in the correct location.
+**NOTE** Modifying the brain mask invalidates the tumor segmentations, and re-runs the tumor segmentation procedure using this new mask.
+",,mediumseagreen
+-5.1,Multiple Annotations Error,More than one segmentation file was identified in the manually corrected output folder. Please ensure that there is only one manually corrected segmentation inside the labels path.,,mediumpurple
+-6,Comparison Complete,,,mediumturquoise
+-6.1,Exact Match Identified,"The automated and manually corrected segmentation files are identical. Was this intentional? If so, are you certain that no manual corrections were required? If not, please redo manual correction for this exam.",,orchid
+-6.2,Annotation Comparison Failed,The original segmentation for the reviewed file was not identified. This most probably means the annotation file was renamed. Please ensure the reviewed file retains its original name.,,peachpuff
+7,Annotation Confirmed,,,silver
+8,Done,,,green
+-0.101,Generate Report Unhandled Error,,,tomato
+-1.101,Initial Validation Unhandled Error,,,firebrick
+-2.101,Nifti Conversion Unhandled Error,,,teal
+-3.101,Brain Extract Unhandled Error,,,saddlebrown
+-4.101,Tumor Extract Unhandled Error,,,sandybrown
+-5.101,Manual Annotation Unhandled Error,,,peru
+-6.101,Label Segmentation Comparison Unhandled Error,,,moccasin
+-7.101,Annotation Confirmation Unhandled Error,,,mediumvioletred
@@ -0,0 +1,126 @@
+import os
+import pandas as pd
+import datetime
+
+from medperf.entities.dataset import Dataset
+from medperf import config
+
+from .utils import get_institution_from_email, get_reports_path, stage_id2name
+
+from typer import Typer, Option, run
+
+app = Typer()
+
+
+def get_dsets(mlcube_id):
+    dsets = Dataset.all(filters={"mlcube": mlcube_id})
+    dsets = [dset.todict() for dset in dsets]
+    for dset in dsets:
+        user_id = dset["owner"]
+        dset["user"] = config.comms.get_user(user_id)
+
+    return dsets
+
+
+def build_dset_df(dsets, user2institution, stages_df):
+    formatted_dsets = []
+    for dset in dsets:
+        email = dset["user"]["email"]
+        institution = get_institution_from_email(email, user2institution)
+        formatted_dset = {
+            "name": dset["name"],
+            "owner": dset["owner"],
+            "email": email,
+            "is_valid": dset["is_valid"],
+            "created_at": dset["created_at"],
+            "modified_at": dset["modified_at"],
+            "institution": institution,
+        }
+        if len(dset["report"]):
+            # Contains a readable report
+            report = dset["report"]
+            exec_status = report["execution_status"]
+            formatted_dset["execution_status"] = exec_status
+            formatted_dset["progress"] = report["progress"]
+
+        formatted_dsets.append(formatted_dset)
+        dsets_df = pd.DataFrame(formatted_dsets)
+
+        progress = dsets_df["progress"].fillna({})
+        progress_df = pd.DataFrame(progress.values.tolist())
+        progress_df.rename(columns=lambda x: stage_id2name(x, stages_df), inplace=True)
+        progress_df = progress_df.fillna("0.0%").map(lambda x: float(x[:-1]) / 100)
+        progress_df = progress_df.groupby(level=0, axis=1).sum()
+
+        full_table = dsets_df.join(progress_df)
+        full_table = full_table[full_table["is_valid"]]
+        full_table.drop(columns=["owner", "progress"], inplace=True)
+
+    return full_table
+
+
+def write_dsets_df(dsets_df, full_path):
+    timenow = datetime.datetime.now(datetime.timezone.utc)
+
+    full_table = dsets_df
+    latest_table = full_table.sort_values("modified_at").groupby("institution").last()
+    latest_table = latest_table.loc[:, (latest_table != 0).any(axis=0)]
+
+    full_table_path = os.path.join(full_path, "full_table.csv")
+    latest_table_path = os.path.join(full_path, "latest_table.csv")
+    timestamp_path = os.path.join(full_path, f"{timenow}.csv")
+
+    full_table.to_csv(full_table_path)
+    full_table.to_csv(timestamp_path)
+    latest_table.to_csv(latest_table_path)
+
+
+def write_sites(dsets_df, institutions_df, full_path):
+    sites_path = os.path.join(full_path, "sites.txt")
+
+    expected_sites = institutions_df["institution"].values.tolist()
+    registered_sites = dsets_df["institution"].values.tolist()
+    sites = list(set(expected_sites + registered_sites))
+
+    with open(sites_path, "w") as f:
+        f.write("\n".join(sites))
+
+
+def get_data(mlcube_id, stages_path, institutions_path, out_path):
+    dsets = get_dsets(mlcube_id)
+    full_path = get_reports_path(out_path, mlcube_id)
+    os.makedirs(full_path, exist_ok=True)
+
+    institutions_df = pd.read_csv(institutions_path)
+    user2institution = {u: i for i, u in institutions_df.values.tolist()}
+    stages_df = pd.read_csv(stages_path)
+    stages_df.set_index("Status Code", inplace=True)
+
+    dsets_df = build_dset_df(dsets, user2institution, stages_df)
+    write_dsets_df(dsets_df, full_path)
+    write_sites(dsets_df, institutions_df, full_path)
+
+
+@app.command()
+def main(
+    mlcube_id: int = Option(
+        ..., "-m", "--mlcube", help="MLCube ID to inspect prparation from"
+    ),
+    stages_path: str = Option(
+        "assets/stages.csv", "-s", "--stages", help="Path to stages.csv"
+    ),
+    institutions_path: str = Option(
+        ...,
+        "-i",
+        "--institutions",
+        help="Path to a CSV file containing institution-email information",
+    ),
+    out_path: str = Option(
+        "reports", "-o", "--out-path", help="location to store progress CSVs"
+    ),
+):
+    get_data(mlcube_id, stages_path, institutions_path, out_path)
+
+
+if __name__ == "__main__":
+    run(main)