Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WHY IS BINARY SEGMENTATION SO DIFFICULT? #3753

Open
Mayolov opened this issue Aug 6, 2024 · 2 comments
Open

WHY IS BINARY SEGMENTATION SO DIFFICULT? #3753

Mayolov opened this issue Aug 6, 2024 · 2 comments

Comments

@Mayolov
Copy link

Mayolov commented Aug 6, 2024

Hi

Im trying to do some binary segmentation and am receiving values that I should on validation and test data. After 3 iterations it says that the IOU is perfect. when it shouldn't be. its just black and white masks with shape (512,512). Im following this git repo for mae_unet https://github.com/implus/mae_segmentation.

I cant figure out how im supposed to make it work.

Test data:

+----------------------+-------+-------+
| Class                | IoU   | Acc   |
+----------------------+-------+-------+
| obj of Interest | 100.0 | 100.0 |
| Background           | 0.0   | nan   |
+----------------------+-------+-------+
Summary:

+--------+------+-------+-------+
| Scope  | mIoU | mAcc  | aAcc  |
+--------+------+-------+-------+
| global | 50.0 | 100.0 | 100.0 |
+--------+------+-------+-------+

validation:

+----------------------+-------+-------+
| Class                | IoU   | Acc   |
+----------------------+-------+-------+
| obj of Interest | 100.0 | 100.0 |
| Background           | 0.0   | nan   |
+----------------------+-------+-------+
Summary:

+--------+------+-------+-------+
| Scope  | mIoU | mAcc  | aAcc  |
+--------+------+-------+-------+
| global | 50.0 | 100.0 | 100.0 |
+--------+------+-------+-------+

Custom Dataloader:

from .custom import CustomDataset
from .builder import DATASETS
import os.path as osp
    
@DATASETS.register_module()
class CustomBinarySegDataset(CustomDataset):
    CLASSES = ('Particle of Interest', 'Background', )
    PALETTE = [ [1],[0],]

    def __init__(self, **kwargs):
        super(CustomBinarySegDataset, self).__init__(
            img_suffix='.png',
            seg_map_suffix='_mask.png',
            reduce_zero_label=False,
            **kwargs
        )
        assert osp.exists(self.img_dir)

config:

norm_cfg = dict(type='SyncBN', requires_grad=True)
model = dict(
    type='EncoderDecoder',
    pretrained='/vast/home/mayolo/mae_git/mae/output_dir/checkpoint-799.pth',
    backbone=dict(
        type='MAE',
        patch_size=16,
        embed_dim=768,
        depth=12,
        num_heads=12,
        mlp_ratio=4,
        qkv_bias=True,
        use_abs_pos_emb=True,
        use_rel_pos_bias=True,
        img_size=512,
        init_values=1.0,
        drop_path_rate=0.1,
        out_indices=[3, 5, 7, 11]),
    decode_head=dict(
        type='UPerHead',
        in_channels=[768, 768, 768, 768],
        in_index=[0, 1, 2, 3],
        pool_scales=(1, 2, 3, 6),
        channels=768,
        dropout_ratio=0.1,
        num_classes=2,
        norm_cfg=dict(type='SyncBN', requires_grad=True),
        loss_decode=dict(
            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
    auxiliary_head=dict(
        type='FCNHead',
        in_channels=768,
        in_index=2,
        channels=256,
        num_convs=1,
        concat_input=False,
        dropout_ratio=0.1,
        num_classes=2,
        norm_cfg=dict(type='SyncBN', requires_grad=True),
        align_corners=False,
        loss_decode=dict(
            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
    train_cfg=dict(),
    test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(341, 341)))
dataset_type = 'CustomBinarySegDataset'
data_root = '/vast/home/mayolo/512x512_Seg_Aug_images/base'
img_norm_cfg = dict(mean=[0, 0, 0], std=[254, 254, 254], to_rgb=True)
crop_size = (512, 512)
train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(reduce_zero_label=False, type='LoadAnnotations'),
    dict(type='ConvertToGrayScaleMask'),
    dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
    dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=1),
    dict(type='RandomFlip', prob=0.5),
    dict(type='PhotoMetricDistortion'),
    dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255),
    dict(type='DefaultFormatBundle'),
    dict(type='Collect', keys=['img', 'gt_semantic_seg'])
]
test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(2048, 512),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(type='ConvertToGrayScaleMask'),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img'])
        ])
]
data = dict(
    samples_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type='CustomBinarySegDataset',
        data_root='/vast/home/mayolo/512x512_Seg_Aug_images/base',
        img_dir='/vast/home/mayolo/512x512_Seg_Aug_images/base/images/train',
        ann_dir=
        '/vast/home/mayolo/512x512_Seg_Aug_images/base/annotations/train',
        pipeline=[
            dict(type='LoadImageFromFile'),
            dict(reduce_zero_label=False, type='LoadAnnotations'),
            dict(type='ConvertToGrayScaleMask'),
            dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
            dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=1),
            dict(type='RandomFlip', prob=0.5),
            dict(type='PhotoMetricDistortion'),
            dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255),
            dict(type='DefaultFormatBundle'),
            dict(type='Collect', keys=['img', 'gt_semantic_seg'])
        ]),
    val=dict(
        type='CustomBinarySegDataset',
        data_root='/vast/home/mayolo/512x512_Seg_Aug_images/base',
        img_dir='/vast/home/mayolo/512x512_Seg_Aug_images/base/images/val',
        ann_dir='/vast/home/mayolo/512x512_Seg_Aug_images/base/annotations/val',
        pipeline=[
            dict(type='LoadImageFromFile'),
            dict(
                type='MultiScaleFlipAug',
                img_scale=(2048, 512),
                flip=False,
                transforms=[
                    dict(type='Resize', keep_ratio=True),
                    dict(type='RandomFlip'),
                    dict(type='ConvertToGrayScaleMask'),
                    dict(type='ImageToTensor', keys=['img']),
                    dict(type='Collect', keys=['img'])
                ])
        ]),
    test=dict(
        type='CustomBinarySegDataset',
        data_root='/vast/home/mayolo/512x512_Seg_Aug_images/base',
        img_dir='/vast/home/mayolo/512x512_Seg_Aug_images/base/images/val',
        ann_dir='/vast/home/mayolo/512x512_Seg_Aug_images/base/annotations/val',
        pipeline=[
            dict(type='LoadImageFromFile'),
            dict(
                type='MultiScaleFlipAug',
                img_scale=(2048, 512),
                flip=False,
                transforms=[
                    dict(type='Resize', keep_ratio=True),
                    dict(type='RandomFlip'),
                    dict(type='ConvertToGrayScaleMask'),
                    dict(type='ImageToTensor', keys=['img']),
                    dict(type='Collect', keys=['img'])
                ])
        ]))
log_config = dict(
    interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
cudnn_benchmark = True
optimizer = dict(
    type='AdamW',
    lr=0.0001,
    betas=(0.9, 0.999),
    weight_decay=0.01,
    constructor='LayerDecayOptimizerConstructor',
    paramwise_cfg=dict(num_layers=12, layer_decay_rate=0.65))
optimizer_config = dict(
    type='DistOptimizerHook',
    update_interval=1,
    grad_clip=None,
    coalesce=True,
    bucket_size_mb=-1,
    use_fp16=True)
lr_config = dict(
    policy='poly',
    warmup='linear',
    warmup_iters=1500,
    warmup_ratio=1e-06,
    power=1.0,
    min_lr=0.0,
    by_epoch=False)
max_iters = 200000
runner = dict(type='IterBasedRunnerAmp', max_iters=200000)
checkpoint_config = dict(by_epoch=False, interval=2000)
evaluation = dict(interval=100, metric='mIoU')
fp16 = None
work_dir = './work_dirs/upernet_mae_base_12_512_slide_160k_ade20k'
gpu_ids = range(0, 1)

greyscale transform acts as a normalization function also:

@PIPELINES.register_module()
class ConvertToGrayScaleMask(object):
    """Converts a segmentation map to a binary mask rather than the color coded one."""
    def __init__(self):
        pass

    def __call__(self, results):
        """Call function to convert seg map to binary mask."""
        for key in results.get("seg_fields", []):
            if len(results[key].shape) == 3 and results[key].shape[2] == 3:
                results[key] = cv2.cvtColor(results[key], cv2.COLOR_BGR2GRAY)
                results[key] = cv2.normalize(results[key], None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX)
            results[key] = np.where(results[key] > 0, 1, 0).astype(np.float32)#allows for test.py to work
        return results

    def __repr__(self):
        return self.__class__.__name__
@dongxinyu1030
Copy link

try to use smaller learning rate and warmup

@0xD4rky
Copy link

0xD4rky commented Aug 19, 2024

the main thing you can do is introduce dice loss along with IoU metric and then optimize your model based on results of both the losses.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants