[ADD] F1: 0.48

NINAnor · Mar 18, 2024 · df52251 · df52251
1 parent 4c058e4
commit df52251
Show file tree

Hide file tree

Showing 10 changed files with 41 additions and 20 deletions.
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -0,0 +1,12 @@
+# To get started with Dependabot version updates, you'll need to specify which
+# package ecosystems to update and where the package manifests are located.
+# Please see the documentation for more information:
+# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
+# https://containers.dev/guide/dependabot
+
+version: 2
+updates:
+ - package-ecosystem: "devcontainers"
+   directory: "/"
+   schedule:
+     interval: weekly
diff --git a/CONFIG.yaml b/CONFIG.yaml
@@ -8,16 +8,17 @@
 # PARAMETERS FOR DATA PROCESSING #
 ##################################
 data:
-  n_task_train: 100
-  n_task_val: 100
+  n_task_train: 500
+  n_task_val: 500
   target_fs: 16000 # used in preprocessing
-  resample: true # used in preprocessing
-  denoise: False # used in preprocessing
+  resample: True # used in preprocessing
+  denoise: True # used in preprocessing
   normalize: true # used in preprocessing
   frame_length: 25.0 # used in preprocessing
   tensor_length: 128 # used in preprocessing
   n_shot: 3 # number of images PER CLASS in the support set
   n_query: 2 # number of images PER CLASS in the query set
+  n_way: 20
   overlap: 0.5 # used in preprocessing
   n_subsample: 1 
   num_mel_bins: 128 # used in preprocessing
@@ -41,8 +42,9 @@ model:
   lr: 1.0e-05
   model_type: beats # beats, pann or baseline
   state: train # train or validate - for which model should be loaded
-  model_path:  None
+  model_path:  /data/DCASE/models/BEATs/BEATs_iter3_plus_AS2M.pt
   specaugment_params: null
+  n_way: 20
   # specaugment_params:
   #   application_ratio: 1.0
   #   time_mask: 40  

diff --git a/CONFIG_PREDICT.yaml b/CONFIG_PREDICT.yaml
@@ -42,7 +42,7 @@ model:
   distance: euclidean # other option is mahalanobis
   lr: 1.0e-05
   model_type: beats # beats, pann or baseline
-  state: validate # train or validate - for which model should be loaded
+  state: train # train or validate - for which model should be loaded
   model_path: None
   specaugment_params: null
   # specaugment_params:
@@ -54,11 +54,11 @@ model:
 # PARAMETERS FOR MODEL PREDICTION #
 ###################################
 predict:
-  wav_save: True
+  wav_save: False
   overwrite: True
   n_self_detected_supports: 0
   tolerance: 0
-  filter_by_p_values: True # Whether we filter outliers by their pvalues
+  filter_by_p_values: False # Whether we filter outliers by their pvalues
   n_subsample: 1 # Whether each segment should be subsampled
   self_detect_support: False # Whether to use the self-training loop
 

diff --git a/callbacks/callbacks.py b/callbacks/callbacks.py
@@ -3,7 +3,7 @@
 from pytorch_lightning.callbacks.finetuning import BaseFinetuning
 
 class MilestonesFinetuning(BaseFinetuning):
-    def __init__(self, milestones: int = 10):
+    def __init__(self, milestones: int = 1):
         super().__init__()
         self.unfreeze_at_epoch = milestones
 

diff --git a/datamodules/DCASEDataModule.py b/datamodules/DCASEDataModule.py
@@ -99,7 +99,7 @@ def __init__(
         set_type: str = "Training_Set",
         n_shot: int = 5,
         n_query: int = 10,
-        n_way: int = 5,
+        n_way: int = 20,
         n_subsample: int = 1,
         overlap: float = 0.5,
         num_mel_bins: int = 128,

diff --git a/dcase_fine_tune/CONFIG.yaml b/dcase_fine_tune/CONFIG.yaml
@@ -17,8 +17,8 @@ data:
   overlap: 0.5 # used in preprocessing
   num_mel_bins: 128 # used in preprocessing
   max_segment_length: 1.0 # used in preprocessing
-  status: validate # used in preprocessing, train or validate or evaluate
-  set_type: "Validation_Set"
+  status: train # used in preprocessing, train or validate or evaluate
+  set_type: "Training_Set"
 
 
 #################################
@@ -29,12 +29,12 @@ data:
 
 trainer:
   max_epochs: 10000
-  default_root_dir: /data/lightning_logs/BEATs
+  default_root_dir: /data/lightning_logs/baseline
   accelerator: gpu
   gpus: 1
   batch_size: 64
   num_workers: 4
-  patience: 20
+  patience: 10
   min_sample_per_category: 10
   test_size: 0.2
 

diff --git a/dcase_fine_tune/FTDataModule.py b/dcase_fine_tune/FTDataModule.py
@@ -7,6 +7,7 @@
 import numpy as np
 
 from torch.utils.data import WeightedRandomSampler
+from torchsampler import ImbalancedDatasetSampler
 
 class TrainAudioDatasetDCASE(Dataset):
     def __init__(
@@ -80,12 +81,15 @@ def divide_train_val(self):
         samples_weight = np.array([weight[t] for t in data_frame_train["category"]])
         samples_weight = torch.from_numpy(samples_weight)
         samples_weight = samples_weight.double()
-        self.sampler = WeightedRandomSampler(samples_weight, len(samples_weight))
+        self.sampler = WeightedRandomSampler(samples_weight, len(samples_weight)*10)
 
         # Make the validation set
         data_frame_validation = self.data_frame.loc[validation_indices]
         data_frame_validation.reset_index(drop=True, inplace=True)
 
+        #print(data_frame_train["category"].value_counts())
+        #print(data_frame_validation["category"].value_counts())
+
         # generate subset based on indices
         self.train_set = TrainAudioDatasetDCASE(
             data_frame=data_frame_train,
@@ -100,7 +104,7 @@ def train_dataloader(self):
                                   num_workers=self.num_workers, 
                                   pin_memory=False, 
                                   collate_fn=self.collate_fn,
-                                  sampler=self.sampler
+                                  sampler=ImbalancedDatasetSampler(self.train_set)       #self.sampler
                                   )
         return train_loader
 

diff --git a/prototypicalbeats/prototraining.py b/prototypicalbeats/prototraining.py
@@ -19,7 +19,7 @@
 class ProtoBEATsModel(pl.LightningModule):
     def __init__(
         self,
-        n_way: int = 5,
+        n_way: int = 20,
         milestones: int = 5,
         lr: float = 1e-5,
         lr_scheduler_gamma: float = 1e-1,

diff --git a/shell_scripts/train_beats.sh b/shell_scripts/train_beats.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 
-BASE_FOLDER=$1
+#BASE_FOLDER=$1
+BASE_FOLDER=/home/benjamin.cretois/data/DCASE 
 CONFIG_PATH="/app/CONFIG.yaml"
 
 cd ..

diff --git a/shell_scripts/validate_beats.sh b/shell_scripts/validate_beats.sh
@@ -1,13 +1,15 @@
 #!/bin/bash
 
-BASE_DIR=$1
+#BASE_DIR=$1
+BASE_DIR=/home/benjamin.cretois/data/DCASE #/data/Prosjekter3/823001_19_metodesats_analyse_23_36_cretois
 
 cd ..
 
 docker run -v $BASE_DIR:/data -v $PWD:/app \
             --gpus all \
+            --shm-size=10gb \
             beats \
             poetry run python /app/evaluate/evaluateDCASE.py \
             'model.model_type="beats"' \
             'model.state="train"' \
-            'model.model_path="/data/models/BEATs/BEATs_iter3_plus_AS2M.pt"' 
+            'model.model_path="/data/models/BEATs/BEATs_iter3_plus_AS2M.pt"'