Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

YOLO pipeline for 2d MS lesion detection #10

Open
wants to merge 26 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
182cd6a
Functions for pre-processing our data into the format expected by YOL…
cspino Feb 13, 2024
a821ddc
Only save slices that contain spinal cord
cspino Feb 20, 2024
9de9213
Scripts for data pre-processing
cspino Mar 5, 2024
640e155
remove jupyter notebook
cspino Mar 5, 2024
3f918ea
Scripts for yolo model training and testing
cspino Mar 5, 2024
7e8f227
New scripts for model validation
cspino Mar 31, 2024
533c5d9
remove yolo_testing.py
cspino Mar 31, 2024
639b365
remove datasets folder
cspino Mar 31, 2024
1158360
Update requirements
cspino Mar 31, 2024
d9f81e2
remove runs folder
cspino Mar 31, 2024
355619f
Bug fixes
cspino Mar 31, 2024
263e2ab
Update training params
cspino Mar 31, 2024
5ba4327
Fix metadata
cspino Mar 31, 2024
e5c7352
move post-processing to validation.py
cspino Apr 4, 2024
fb40654
Show boxes on appropriate slices only & 3d validation
cspino Apr 4, 2024
f95da21
Check whether boxes are on consecutive slices before merging
cspino Apr 4, 2024
f18235b
remove unnecessary functions from yolo_inference.py
cspino Apr 4, 2024
fe73665
Update validation tests
cspino Apr 6, 2024
46728a3
Generate PR curve and PR-AUC
cspino Apr 6, 2024
fc1d905
Add unit tests for pre-processing
cspino Apr 6, 2024
0220c17
Add larger pre-processing script that calls does all pre-processing s…
cspino Apr 6, 2024
2ad42e1
remove unnecessary prints and imports
cspino Apr 6, 2024
a26e33c
Training script takes params file as input
cspino Apr 7, 2024
3035c31
Hyperparameter tune script takes params file as input
cspino Apr 7, 2024
9a524fb
Remove PR curve png
cspino Apr 12, 2024
2e7ef51
Last few tweaks
cspino Apr 17, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions 2d_lesion_detection/PR_curve.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
"""
Script for generating Precision-Recall curve and PR-AUC

First, run yolo inference with a low confidence threshold (LOWER_CONF),
then give those predictions as --preds
"""

import os
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
from pathlib import Path
import subprocess
import tempfile
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

LOWER_CONF = 0.01
UPPER_CONF = 0.5

def _main():
parser = ArgumentParser(
prog = 'PR_curve',
description = 'Generate PR curve and AUC-PR for yolo model',
formatter_class = ArgumentDefaultsHelpFormatter)
parser.add_argument('-g', '--gt-path',
required= True,
type = str,
help = 'Path to YOLO dataset folder of ground truth txt files')
parser.add_argument('-p', '--preds',
required = True,
type = Path,
help = 'Path to prediction folder containing txt files with confidence values.')
parser.add_argument('-c', '--canproco',
required= True,
type = str,
help = 'Path to canproco database')
parser.add_argument('-o', '--output',
required = True,
type = Path,
help = 'Output directory to save the PR curve to.')
parser.add_argument('-i', '--iou',
default= 0.2,
type = str,
help = 'IoU threshold for a TP')

args = parser.parse_args()

# Create output folder if it doesn't exist
os.makedirs(args.output, exist_ok=True)

recalls = []
precisions = []
for conf in np.arange(LOWER_CONF, UPPER_CONF, 0.01):
print(f"\n\nComputing metrics for {conf} conf")
with tempfile.TemporaryDirectory() as tmpdir:
(Path(tmpdir)/"preds").mkdir(parents=True, exist_ok=True)
(Path(tmpdir)/"val").mkdir(parents=True, exist_ok=True)

# 1. Create new txt files with only boxes that have confidence higher than conf
# load predictions
txt_names = os.listdir(args.preds)
txt_paths = [os.path.join(args.preds, file) for file in txt_names if file.endswith(".txt")] # only keep txts

print("Copying over txt files")
for txt_path in txt_paths:
# For every file create copy but only keeping boxes with confidence higher than conf
with open(txt_path, "r") as infile:
# Read lines from the input file
lines = infile.readlines()

filtered_lines = [line for line in lines if float(line.split()[-1]) > conf]

if filtered_lines:
# only create file if there are boxes
filename = Path(txt_path).name
with open(Path(tmpdir)/"preds"/filename, "w") as outfile:
outfile.writelines(filtered_lines)

# 2. Call validation and get recall and precision
print("Calling validation")
command = ["python",
"validation.py",
"-g", args.gt_path,
"-p", str(Path(tmpdir)/"preds"),
"-o", str(Path(tmpdir)/"val"),
"-c", args.canproco,
"-i", args.iou]
subprocess.run(command, check=True)

# 3. Get recall and precision and add to dict
print("Getting recall and precision")
df = pd.read_csv(Path(tmpdir)/"val"/"metrics_report.csv")

# Extract Recall and Precision from the last row
recalls.append(df.iloc[-1]['Recall'])
precisions.append(df.iloc[-1]['Precision'])

# Plot
plt.plot(recalls, precisions, marker='.')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title(f'Precision-Recall Curve with {args.iou} iou threshold')
plt.savefig(args.output/f'precision_recall_curve_{args.iou}iou.png')

# Calculate PR-AUC
auc_pr = np.trapz(precisions[::-1], recalls[::-1])
print('Area under Precision-Recall curve (AUC-PR):', auc_pr)


if __name__ == "__main__":
_main()
100 changes: 100 additions & 0 deletions 2d_lesion_detection/complete_pre_process.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
"""
Main script for pre-processing
Calls sc_seg_from_list.py, make_yolo_dataset.py and modify_unlabeled_proportion.py

Generates a YOLO dataset from a list of scans
"""
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
from pathlib import Path
import subprocess
import tempfile


def call_sc_seg_from_list(json_path:str|Path, database:str|Path):
"""
Calls sc_seg_from_list.py
"""
print("Getting spinal cord segmentation...")
command = [
"python",
"sc_seg_from_list.py",
"-j", str(json_path),
"-d", str(database)
]
subprocess.run(command, check=True)

def call_make_yolo_dataset(json_path:str|Path,
database:str|Path,
output_dir:str|Path):
"""
Calls make_yolo_dataset.py
"""
print("Converting to YOLO format...")
command = [
"python",
"make_yolo_dataset.py",
"-j", str(json_path),
"-d", str(database),
"-o", str(output_dir)
]
subprocess.run(command, check=True)

def call_modify_unlabeled_proportion(input_path:str|Path,
output_path:str|Path,
ratio: str|float):
"""
Calls modify_unlabeled_proportion.py
"""
print("Modifying unlabeled proportion...")
command = [
"python",
"modify_unlabeled_proportion.py",
"-i", str(input_path),
"-o", str(output_path),
"-r", str(ratio)
]
subprocess.run(command, check=True)

def _main():
parser = ArgumentParser(
prog = 'complete_pre_process',
description = 'Generates YOLO format dataset from a list of scans and a BIDS database.',
formatter_class = ArgumentDefaultsHelpFormatter)
parser.add_argument('-j', '--json-list',
required = True,
type = Path,
help = 'path to json list of scans to process')
parser.add_argument('-d', '--database',
required = True,
type = Path,
help = 'path to BIDS database (canproco)')
parser.add_argument('-o', '--output-dir',
required = True,
type = Path,
help = 'Output directory for YOLO dataset')
parser.add_argument('-r', '--ratio',
default = None,
type = float,
help = 'Proportion of dataset that should be unlabeled. '
'By default, the ratio is not modified and the whole dataset is kept.')

args = parser.parse_args()

# Make sure all necessary spinal cord segmentations are present
call_sc_seg_from_list(args.json_list, args.database)

if args.ratio:
# If ratio needs to be modified, call make_yolo_dataset in a temp dir
with tempfile.TemporaryDirectory() as tmpdir:
call_make_yolo_dataset(args.json_list, args.database, Path(tmpdir)/"yolo_dataset")
call_modify_unlabeled_proportion(Path(tmpdir)/"yolo_dataset", args.output_dir, args.ratio)

else:
# Otherwise, save dataset to output_dir directly
call_make_yolo_dataset(args.json_list, args.database, args.output_dir)

print(f"Dataset was saved to {args.output_dir}")


if __name__ == "__main__":
_main()
Loading