diff --git a/mmseg/configs/_base_/datasets/ade20k.py b/mmseg/configs/_base_/datasets/ade20k.py new file mode 100644 index 0000000000..cd2d1aa50d --- /dev/null +++ b/mmseg/configs/_base_/datasets/ade20k.py @@ -0,0 +1,77 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmcv.transforms import RandomFlip, RandomResize, Resize, TestTimeAug +from mmcv.transforms.loading import LoadImageFromFile +from mmengine.dataset.sampler import DefaultSampler, InfiniteSampler + +from mmseg.datasets.ade import ADE20KDataset +from mmseg.datasets.transforms import (LoadAnnotations, PackSegInputs, + PhotoMetricDistortion, RandomCrop) +from mmseg.evaluation import IoUMetric + +# dataset settings +dataset_type = ADE20KDataset +data_root = 'data/ade/ADEChallengeData2016' +crop_size = (512, 512) +train_pipeline = [ + dict(type=LoadImageFromFile), + dict(type=LoadAnnotations), + dict( + type=RandomResize, + scale=(2048, 512), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type=RandomCrop, crop_size=crop_size, cat_max_ratio=0.75), + dict(type=RandomFlip, prob=0.5), + dict(type=PhotoMetricDistortion), + dict(type=PackSegInputs) +] +test_pipeline = [ + dict(type=LoadImageFromFile), + dict(type=Resize, scale=(2048, 512), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type=LoadAnnotations), + dict(type=PackSegInputs) +] +img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] +tta_pipeline = [ + dict(type=LoadImageFromFile, backend_args=None), + dict( + type=TestTimeAug, + transforms=[[ + dict(type=Resize, scale_factor=r, keep_ratio=True) + for r in img_ratios + ], + [ + dict(type=RandomFlip, prob=0., direction='horizontal'), + dict(type=RandomFlip, prob=1., direction='horizontal') + ], [dict(type=LoadAnnotations)], + [dict(type=PackSegInputs)]]) +] +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type=InfiniteSampler, shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/training', seg_map_path='annotations/training'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type=DefaultSampler, shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/validation', + seg_map_path='annotations/validation'), + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type=IoUMetric, iou_metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/mmseg/configs/_base_/datasets/ade20k_640x640.py b/mmseg/configs/_base_/datasets/ade20k_640x640.py new file mode 100644 index 0000000000..c68d774a74 --- /dev/null +++ b/mmseg/configs/_base_/datasets/ade20k_640x640.py @@ -0,0 +1,77 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmcv.transforms import RandomFlip, RandomResize, Resize, TestTimeAug +from mmcv.transforms.loading import LoadImageFromFile +from mmengine.dataset.sampler import DefaultSampler, InfiniteSampler + +from mmseg.datasets.ade import ADE20KDataset +from mmseg.datasets.transforms import (LoadAnnotations, PackSegInputs, + PhotoMetricDistortion, RandomCrop) +from mmseg.evaluation import IoUMetric + +# dataset settings +dataset_type = ADE20KDataset +data_root = 'data/ade/ADEChallengeData2016' +crop_size = (640, 640) +train_pipeline = [ + dict(type=LoadImageFromFile), + dict(type=LoadAnnotations), + dict( + type=RandomResize, + scale=(2560, 640), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type=RandomCrop, crop_size=crop_size, cat_max_ratio=0.75), + dict(type=RandomFlip, prob=0.5), + dict(type=PhotoMetricDistortion), + dict(type=PackSegInputs) +] +test_pipeline = [ + dict(type=LoadImageFromFile), + dict(type=Resize, scale=(2560, 640), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type=LoadAnnotations), + dict(type=PackSegInputs) +] +img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] +tta_pipeline = [ + dict(type=LoadImageFromFile, backend_args=None), + dict( + type=TestTimeAug, + transforms=[[ + dict(type=Resize, scale_factor=r, keep_ratio=True) + for r in img_ratios + ], + [ + dict(type=RandomFlip, prob=0., direction='horizontal'), + dict(type=RandomFlip, prob=1., direction='horizontal') + ], [dict(type=LoadAnnotations)], + [dict(type=PackSegInputs)]]) +] +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type=InfiniteSampler, shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/training', seg_map_path='annotations/training'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type=DefaultSampler, shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/validation', + seg_map_path='annotations/validation'), + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type=IoUMetric, iou_metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/mmseg/configs/_base_/default_runtime.py b/mmseg/configs/_base_/default_runtime.py new file mode 100644 index 0000000000..75d02275b9 --- /dev/null +++ b/mmseg/configs/_base_/default_runtime.py @@ -0,0 +1,24 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.runner import LogProcessor +from mmengine.visualization import LocalVisBackend + +from mmseg.models.segmentors import SegTTAModel +from mmseg.visualization import SegLocalVisualizer + +default_scope = None +env_cfg = dict( + cudnn_benchmark=False, + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + dist_cfg=dict(backend='nccl'), +) + +vis_backends = [dict(type=LocalVisBackend)] +visualizer = dict( + type=SegLocalVisualizer, vis_backends=vis_backends, name='visualizer') +log_processor = dict(type=LogProcessor, window_size=50, by_epoch=False) + +log_level = 'INFO' +load_from = None +resume = False + +tta_model = dict(type=SegTTAModel) diff --git a/mmseg/configs/_base_/models/segformer_mit_b0.py b/mmseg/configs/_base_/models/segformer_mit_b0.py new file mode 100644 index 0000000000..58b89eca2b --- /dev/null +++ b/mmseg/configs/_base_/models/segformer_mit_b0.py @@ -0,0 +1,49 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmseg.models.backbones import MixVisionTransformer +from mmseg.models.data_preprocessor import SegDataPreProcessor +from mmseg.models.decode_heads import SegformerHead +from mmseg.models.losses import CrossEntropyLoss +from mmseg.models.segmentors import EncoderDecoder + +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type=SegDataPreProcessor, + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type=EncoderDecoder, + data_preprocessor=data_preprocessor, + pretrained=None, + backbone=dict( + type=MixVisionTransformer, + in_channels=3, + embed_dims=32, + num_stages=4, + num_layers=[2, 2, 2, 2], + num_heads=[1, 2, 5, 8], + patch_sizes=[7, 3, 3, 3], + sr_ratios=[8, 4, 2, 1], + out_indices=(0, 1, 2, 3), + mlp_ratio=4, + qkv_bias=True, + drop_rate=0.0, + attn_drop_rate=0.0, + drop_path_rate=0.1), + decode_head=dict( + type=SegformerHead, + in_channels=[32, 64, 160, 256], + in_index=[0, 1, 2, 3], + channels=256, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/mmseg/configs/_base_/schedules/schedule_160k.py b/mmseg/configs/_base_/schedules/schedule_160k.py new file mode 100644 index 0000000000..232c67f299 --- /dev/null +++ b/mmseg/configs/_base_/schedules/schedule_160k.py @@ -0,0 +1,35 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper +from mmengine.optim.scheduler.lr_scheduler import PolyLR +from mmengine.runner.loops import IterBasedTrainLoop, TestLoop, ValLoop +from torch.optim.sgd import SGD + +from mmseg.engine.hooks import SegVisualizationHook + +# optimizer +optimizer = dict(type=SGD, lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type=OptimWrapper, optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type=PolyLR, + eta_min=1e-4, + power=0.9, + begin=0, + end=160000, + by_epoch=False) +] +# training schedule for 160k +train_cfg = dict(type=IterBasedTrainLoop, max_iters=160000, val_interval=16000) +val_cfg = dict(type=ValLoop) +test_cfg = dict(type=TestLoop) + +default_hooks = dict( + timer=dict(type=IterTimerHook), + logger=dict(type=LoggerHook, interval=50, log_metric_by_epoch=False), + param_scheduler=dict(type=ParamSchedulerHook), + checkpoint=dict(type=CheckpointHook, by_epoch=False, interval=16000), + sampler_seed=dict(type=DistSamplerSeedHook), + visualization=dict(type=SegVisualizationHook)) diff --git a/mmseg/configs/segformer/segformer_mit_b0_8xb2_160k_ade20k_512x512.py b/mmseg/configs/segformer/segformer_mit_b0_8xb2_160k_ade20k_512x512.py new file mode 100644 index 0000000000..1e74ba739b --- /dev/null +++ b/mmseg/configs/segformer/segformer_mit_b0_8xb2_160k_ade20k_512x512.py @@ -0,0 +1,47 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.config import read_base + +with read_base(): + from .._base_.models.segformer_mit_b0 import * # noqa: F401,F403 + from .._base_.datasets.ade20k import * # noqa: F401,F403 + from .._base_.schedules.schedule_160k import * # noqa: F401,F403 + from .._base_.default_runtime import * # noqa: F401,F403 + +from mmengine.model.weight_init import PretrainedInit +from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper +from mmengine.optim.scheduler.lr_scheduler import LinearLR, PolyLR +from torch.optim.adamw import AdamW + +crop_size = (512, 512) +data_preprocessor.update(size=crop_size) # noqa: F405 +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b0_20220624-7e0fe6dd.pth' # noqa +model.update( # noqa: F405 + data_preprocessor=data_preprocessor, # noqa: F405 + backbone=dict(init_cfg=dict(type=PretrainedInit, checkpoint=checkpoint)), + decode_head=dict(num_classes=150)) + +optim_wrapper = dict( + type=OptimWrapper, + optimizer=dict( + type=AdamW, lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), + paramwise_cfg=dict( + custom_keys={ + 'pos_block': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.), + 'head': dict(lr_mult=10.) + })) + +param_scheduler = [ + dict(type=LinearLR, start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type=PolyLR, + eta_min=0.0, + power=1.0, + begin=1500, + end=160000, + by_epoch=False, + ) +] +train_dataloader.update(batch_size=2, num_workers=2) # noqa: F405 +val_dataloader.update(batch_size=1, num_workers=4) # noqa: F405 +test_dataloader = val_dataloader # noqa: F405 diff --git a/mmseg/configs/segformer/segformer_mit_b1_8xb2_160k_ade20k_512x512.py b/mmseg/configs/segformer/segformer_mit_b1_8xb2_160k_ade20k_512x512.py new file mode 100644 index 0000000000..4db72226a0 --- /dev/null +++ b/mmseg/configs/segformer/segformer_mit_b1_8xb2_160k_ade20k_512x512.py @@ -0,0 +1,16 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.config import read_base + +with read_base(): + from .segformer_mit_b0_8xb2_160k_ade20k_512x512 import * # noqa: F401,F403 + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b1_20220624-02e5a6a1.pth' # noqa + +# model settings +model.update( # noqa: F405 + backbone=dict( + init_cfg=dict(checkpoint=checkpoint), + embed_dims=64, + num_heads=[1, 2, 5, 8], + num_layers=[2, 2, 2, 2]), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/mmseg/configs/segformer/segformer_mit_b2_8xb2_160k_ade20k_512x512.py b/mmseg/configs/segformer/segformer_mit_b2_8xb2_160k_ade20k_512x512.py new file mode 100644 index 0000000000..32100009a2 --- /dev/null +++ b/mmseg/configs/segformer/segformer_mit_b2_8xb2_160k_ade20k_512x512.py @@ -0,0 +1,16 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.config import read_base + +with read_base(): + from .segformer_mit_b0_8xb2_160k_ade20k_512x512 import * # noqa: F401,F403 + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b2_20220624-66e8bf70.pth' # noqa + +# model settings +model.update( # noqa: F405 + backbone=dict( + init_cfg=dict(checkpoint=checkpoint), + embed_dims=64, + num_heads=[1, 2, 5, 8], + num_layers=[3, 4, 6, 3]), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/mmseg/configs/segformer/segformer_mit_b3_8xb2_160k_ade20k_512x512.py b/mmseg/configs/segformer/segformer_mit_b3_8xb2_160k_ade20k_512x512.py new file mode 100644 index 0000000000..1474defddd --- /dev/null +++ b/mmseg/configs/segformer/segformer_mit_b3_8xb2_160k_ade20k_512x512.py @@ -0,0 +1,16 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.config import read_base + +with read_base(): + from .segformer_mit_b0_8xb2_160k_ade20k_512x512 import * # noqa: F401,F403 + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b3_20220624-13b1141c.pth' # noqa + +# model settings +model.update( # noqa: F405 + backbone=dict( + init_cfg=dict(checkpoint=checkpoint), + embed_dims=64, + num_heads=[1, 2, 5, 8], + num_layers=[3, 4, 18, 3]), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/mmseg/configs/segformer/segformer_mit_b4_8xb2_160k_ade20k_512x512.py b/mmseg/configs/segformer/segformer_mit_b4_8xb2_160k_ade20k_512x512.py new file mode 100644 index 0000000000..779cc3abae --- /dev/null +++ b/mmseg/configs/segformer/segformer_mit_b4_8xb2_160k_ade20k_512x512.py @@ -0,0 +1,16 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.config import read_base + +with read_base(): + from .segformer_mit_b0_8xb2_160k_ade20k_512x512 import * # noqa: F401,F403 + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b4_20220624-d588d980.pth' # noqa + +# model settings +model.update( # noqa: F405 + backbone=dict( + init_cfg=dict(checkpoint=checkpoint), + embed_dims=64, + num_heads=[1, 2, 5, 8], + num_layers=[3, 8, 27, 3]), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/mmseg/configs/segformer/segformer_mit_b5_8xb2_160k_ade20k_512x512.py b/mmseg/configs/segformer/segformer_mit_b5_8xb2_160k_ade20k_512x512.py new file mode 100644 index 0000000000..98418d5d04 --- /dev/null +++ b/mmseg/configs/segformer/segformer_mit_b5_8xb2_160k_ade20k_512x512.py @@ -0,0 +1,16 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.config import read_base + +with read_base(): + from .segformer_mit_b0_8xb2_160k_ade20k_512x512 import * # noqa: F401,F403 + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b5_20220624-658746d9.pth' # noqa + +# model settings +model.update( # noqa: F405 + backbone=dict( + init_cfg=dict(checkpoint=checkpoint), + embed_dims=64, + num_heads=[1, 2, 5, 8], + num_layers=[3, 6, 40, 3]), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/mmseg/configs/segformer/segformer_mit_b5_8xb2_160k_ade20k_640x640.py b/mmseg/configs/segformer/segformer_mit_b5_8xb2_160k_ade20k_640x640.py new file mode 100644 index 0000000000..48b532708a --- /dev/null +++ b/mmseg/configs/segformer/segformer_mit_b5_8xb2_160k_ade20k_640x640.py @@ -0,0 +1,24 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.config import read_base + +with read_base(): + from .._base_.datasets.ade20k_640x640 import * # noqa: F401,F403 + from .segformer_mit_b0_8xb2_160k_ade20k_512x512 import * # noqa: F401,F403 + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b5_20220624-658746d9.pth' # noqa + +train_dataloader.update(batch_size=2, num_workers=2) # noqa: F405 +val_dataloader.update(batch_size=1, num_workers=4) # noqa: F405 +test_dataloader = val_dataloader # noqa: F405 + +# model settings +crop_size = (640, 640) +data_preprocessor.update(size=crop_size) # noqa: F405 +model.update( # noqa: F405 + data_preprocessor=data_preprocessor, # noqa: F405 + backbone=dict( + init_cfg=dict(checkpoint=checkpoint), + embed_dims=64, + num_heads=[1, 2, 5, 8], + num_layers=[3, 6, 40, 3]), + decode_head=dict(in_channels=[64, 128, 320, 512]))