From c76fbe27698db16ccddbf8ac2fe502063d4da390 Mon Sep 17 00:00:00 2001 From: Benjamin Cretois Date: Fri, 22 Mar 2024 13:36:48 +0100 Subject: [PATCH] [FIX] evaluateDCASE for the baseline model --- CONFIG.yaml | 4 +-- CONFIG_PREDICT.yaml | 5 +-- evaluate/_utils_compute.py | 2 +- evaluate/_utils_writing.py | 4 ++- evaluate/evaluateDCASE.py | 33 ++++++++++++++----- evaluate/evaluation_metrics/evaluation_all.py | 2 +- prototypicalbeats/trainer.py | 2 +- shell_scripts/log.txt | 1 + shell_scripts/train_baseline.sh | 2 +- shell_scripts/validate_baseline.sh | 5 +-- shell_scripts/validate_beats.sh | 10 +++--- 11 files changed, 44 insertions(+), 26 deletions(-) create mode 100644 shell_scripts/log.txt diff --git a/CONFIG.yaml b/CONFIG.yaml index ac66c4a..6f3ee58 100644 --- a/CONFIG.yaml +++ b/CONFIG.yaml @@ -40,9 +40,9 @@ trainer: model: distance: euclidean # other option is mahalanobis lr: 1.0e-05 - model_type: beats # beats, pann or baseline + model_type: baseline # beats, pann or baseline state: train # train or validate - for which model should be loaded - model_path: /data/DCASE/models/BEATs/BEATs_iter3_plus_AS2M.pt + model_path: None #/data/DCASE/models/BEATs/BEATs_iter3_plus_AS2M.pt specaugment_params: null n_way: 20 # specaugment_params: diff --git a/CONFIG_PREDICT.yaml b/CONFIG_PREDICT.yaml index af7fa58..f63387e 100644 --- a/CONFIG_PREDICT.yaml +++ b/CONFIG_PREDICT.yaml @@ -33,7 +33,7 @@ data: # Otherwise the hash of the folders will be different!! trainer: - max_epochs: 5 + max_epochs: 20 default_root_dir: /data accelerator: gpu gpus: 1 @@ -60,7 +60,8 @@ predict: tolerance: 0 filter_by_p_values: False # Whether we filter outliers by their pvalues n_subsample: 1 # Whether each segment should be subsampled - self_detect_support: False # Whether to use the self-training loop + self_detect_support: True # Whether to use the self-training loop + threshold_p_value: 0.2 plot: tsne: True diff --git a/evaluate/_utils_compute.py b/evaluate/_utils_compute.py index c588bbb..4280efb 100644 --- a/evaluate/_utils_compute.py +++ b/evaluate/_utils_compute.py @@ -112,7 +112,7 @@ def train_model( callbacks=[ pl.callbacks.LearningRateMonitor(logging_interval="step"), pl.callbacks.EarlyStopping( - monitor="train_acc", mode="max", patience=max_epochs + monitor="train_acc", mode="max", patience=3 ), ], default_root_dir="logs/", diff --git a/evaluate/_utils_writing.py b/evaluate/_utils_writing.py index 8fa890f..2925276 100644 --- a/evaluate/_utils_writing.py +++ b/evaluate/_utils_writing.py @@ -139,6 +139,7 @@ def plot_2_d_representation(prototypes, q_embeddings, labels, output, + model_type, perplexity=5): import matplotlib.pyplot as plt @@ -157,7 +158,8 @@ def plot_2_d_representation(prototypes, z_pos_supports.to("cpu").detach().numpy(), z_neg_supports.to("cpu").detach().numpy(), q_embeddings]) - feat = feat[:, -1, :] + if model_type == "beats": + feat = feat[:, -1, :] all_labels = np.concatenate([prototypes_labels, pos_supports_labels, diff --git a/evaluate/evaluateDCASE.py b/evaluate/evaluateDCASE.py index c5ec8b0..98acae6 100644 --- a/evaluate/evaluateDCASE.py +++ b/evaluate/evaluateDCASE.py @@ -92,9 +92,14 @@ def compute( support_samples_pos = reshape_support(support_samples_pos, tensor_length=cfg["data"]["tensor_length"], n_subsample=cfg["predict"]["n_subsample"]) - z_pos_supports, _ = model.get_embeddings(support_samples_pos, padding_mask=None) - - _, d_supports_to_POS_prototypes = calculate_distance(model_type, z_pos_supports, prototypes[pos_index]) + + if cfg["model"]["model_type"] == "beats": + z_pos_supports, _ = model.get_embeddings(support_samples_pos, padding_mask=None) + _, d_supports_to_POS_prototypes = calculate_distance(model_type, z_pos_supports, prototypes[pos_index]) + else: + z_pos_supports = model.get_embeddings(support_samples_pos, padding_mask=None) + _, d_supports_to_POS_prototypes = calculate_distance(model_type, z_pos_supports, prototypes[pos_index]) + d_supports_to_POS_prototypes = d_supports_to_POS_prototypes.squeeze() ecdf = ECDF(d_supports_to_POS_prototypes.detach().numpy()) @@ -105,7 +110,13 @@ def compute( support_samples_neg = reshape_support(support_samples_neg, tensor_length=cfg["data"]["tensor_length"], n_subsample=cfg["predict"]["n_subsample"]) - z_neg_supports, _ = model.get_embeddings(support_samples_neg, padding_mask=None) + + #z_neg_supports, _ = model.get_embeddings(support_samples_neg, padding_mask=None) + + if cfg["model"]["model_type"] == "beats": + z_neg_supports, _ = model.get_embeddings(support_samples_neg, padding_mask=None) + else: + z_neg_supports = model.get_embeddings(support_samples_neg, padding_mask=None) ### Get the query dataset ### df_query = to_dataframe(query_spectrograms, query_labels) @@ -152,7 +163,7 @@ def compute( ######################################################### # Detect POS samples - detected_pos_indices = np.where(p_values_pos == 1)[0] + detected_pos_indices = np.where(p_values_pos == cfg["predict"]["threshold_p_value"])[0] print(f"[INFO] SELF DETECTED {detected_pos_indices} POS SAMPLES") # BECAUSE CUDA ERROR WHEN RESAMPLING TOO MANY SAMPLES @@ -171,7 +182,6 @@ def compute( df_extension_neg = df_query.iloc[sampled_neg_indices].copy() df_extension_neg["category"] = "NEG" else: - print(df_neg) df_extension_neg = df_neg # Append both POS and NEG samples to the support set @@ -187,9 +197,13 @@ def compute( tensor_length=cfg["data"]["tensor_length"], n_subsample=cfg["predict"]["n_subsample"]) - z_pos_supports, _ = model.get_embeddings(support_samples_pos.to("cuda"), padding_mask=None) - - _, d_supports_to_POS_prototypes = calculate_distance(model_type, z_pos_supports.to("cuda"), prototypes[pos_index].to("cuda")) + if cfg["model"]["model_type"] == "beats": + z_pos_supports, _ = model.get_embeddings(support_samples_pos.to("cuda"), padding_mask=None) + _, d_supports_to_POS_prototypes = calculate_distance(model_type, z_pos_supports.to("cuda"), prototypes[pos_index].to("cuda")) + else: + z_pos_supports = model.get_embeddings(support_samples_pos.to("cuda"), padding_mask=None) + _, d_supports_to_POS_prototypes = calculate_distance(model_type, z_pos_supports.to("cuda"), prototypes[pos_index].to("cuda")) + d_supports_to_POS_prototypes = d_supports_to_POS_prototypes.squeeze() ecdf = ECDF(d_supports_to_POS_prototypes.to("cpu").detach().numpy()) @@ -284,6 +298,7 @@ def compute( q_embeddings, labels, output, + cfg["model"]["model_type"], cfg["plot"]["perplexity"]) # Compute the scores for the analysed file -- just as information diff --git a/evaluate/evaluation_metrics/evaluation_all.py b/evaluate/evaluation_metrics/evaluation_all.py index a71a158..e47e83e 100644 --- a/evaluate/evaluation_metrics/evaluation_all.py +++ b/evaluate/evaluation_metrics/evaluation_all.py @@ -273,7 +273,7 @@ def evaluate(pred_file_path, ref_file_path, team_name, dataset, savepath, metada if __name__ == "__main__": - all_files = glob.glob("/data/DCASEfewshot/validate/d8f698b184e75c3ef4e830f9da4f148071fb4c56/results/beats/models/**/eval_out.csv", + all_files = glob.glob("/data/DCASEfewshot/validate/d8f698b184e75c3ef4e830f9da4f148071fb4c56/results/beats/models/BEATS_SELF_LEARNING_PTHR=02/**/eval_out.csv", recursive=True) l_fscores = [] diff --git a/prototypicalbeats/trainer.py b/prototypicalbeats/trainer.py index 384d3a8..3c60362 100755 --- a/prototypicalbeats/trainer.py +++ b/prototypicalbeats/trainer.py @@ -20,7 +20,7 @@ def add_arguments_to_parser(self, parser): "trainer.callbacks": [ EarlyStopping( monitor="val_loss", - patience=15, + patience=5, verbose=True, mode="min" ), diff --git a/shell_scripts/log.txt b/shell_scripts/log.txt new file mode 100644 index 0000000..7eeeacc --- /dev/null +++ b/shell_scripts/log.txt @@ -0,0 +1 @@ +Seq Host Starttime JobRuntime Send Receive Exitval Signal Command diff --git a/shell_scripts/train_baseline.sh b/shell_scripts/train_baseline.sh index 1738c7f..bbb72e7 100755 --- a/shell_scripts/train_baseline.sh +++ b/shell_scripts/train_baseline.sh @@ -1,7 +1,7 @@ #!/bin/bash cd .. -BASE_FOLDER=$1 +BASE_FOLDER=/home/benjamin.cretois/data/DCASE CONFIG_PATH="/app/CONFIG.yaml" # Check if BASE_FOLDER is not set or empty diff --git a/shell_scripts/validate_baseline.sh b/shell_scripts/validate_baseline.sh index 394eff1..c78bcea 100755 --- a/shell_scripts/validate_baseline.sh +++ b/shell_scripts/validate_baseline.sh @@ -1,6 +1,7 @@ #!/bin/bash -BASE_DIR=$1 +# Set the base directory +BASE_DIR=/home/benjamin.cretois/data/DCASE cd .. @@ -10,4 +11,4 @@ docker run -v $BASE_DIR:/data -v $PWD:/app \ poetry run python /app/evaluate/evaluateDCASE.py \ 'model.model_type="baseline"' \ 'model.state="validate"' \ - 'model.model_path="/data/lightning_logs/BASELINE/lightning_logs/version_1/checkpoints/epoch=50-step=5100.ckpt"' \ No newline at end of file + 'model.model_path="/data/lightning_logs/BASELINE/lightning_logs/version_0/checkpoints/epoch=59-step=30000.ckpt"' \ No newline at end of file diff --git a/shell_scripts/validate_beats.sh b/shell_scripts/validate_beats.sh index 97f4347..5bbc851 100755 --- a/shell_scripts/validate_beats.sh +++ b/shell_scripts/validate_beats.sh @@ -1,15 +1,13 @@ #!/bin/bash -#BASE_DIR=$1 -BASE_DIR=/home/benjamin.cretois/data/DCASE #/data/Prosjekter3/823001_19_metodesats_analyse_23_36_cretois +# Set the base directory +BASE_DIR=/home/benjamin.cretois/data/DCASE -cd .. - -docker run -v $BASE_DIR:/data -v $PWD:/app \ +exec docker run -v $BASE_DIR:/data -v $PWD/..:/app \ --gpus all \ --shm-size=10gb \ beats \ poetry run python /app/evaluate/evaluateDCASE.py \ 'model.model_type="beats"' \ 'model.state="train"' \ - 'model.model_path="/data/models/BEATs/BEATs_iter3_plus_AS2M.pt"' \ No newline at end of file + 'model.model_path="/data/models/BEATs/BEATs_iter3_plus_AS2M.pt"'