diff --git a/.circleci/test.yml b/.circleci/test.yml index d460690065..a968b3df9a 100644 --- a/.circleci/test.yml +++ b/.circleci/test.yml @@ -61,9 +61,9 @@ jobs: command: | pip install git+https://github.com/open-mmlab/mmengine.git@main pip install -U openmim - mim install 'mmcv==2.0.0rc3' + mim install 'mmcv>=2.0.0rc4' pip install git+https://github.com/open-mmlab/mmclassification@dev-1.x - mim install 'mmdet==3.0.0rc5' + pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x pip install -r requirements/tests.txt -r requirements/optional.txt - run: name: Build and install @@ -97,6 +97,7 @@ jobs: command: | git clone -b main --depth 1 https://github.com/open-mmlab/mmengine.git /home/circleci/mmengine git clone -b dev-1.x --depth 1 https://github.com/open-mmlab/mmclassification.git /home/circleci/mmclassification + git clone -b dev-3.x --depth 1 https://github.com/open-mmlab/mmdetection.git /home/circleci/mmdetection - run: name: Build Docker image command: | @@ -107,9 +108,9 @@ jobs: command: | docker exec mmseg pip install -e /mmengine docker exec mmseg pip install -U openmim - docker exec mmseg mim install 'mmcv==2.0.0rc3' + docker exec mmseg mim install 'mmcv>=2.0.0rc4' docker exec mmseg pip install -e /mmclassification - docker exec mmseg mim install 'mmdet==3.0.0rc5' + docker exec mmseg pip install -e /mmdetection docker exec mmseg pip install -r requirements/tests.txt -r requirements/optional.txt - run: name: Build and install diff --git a/.github/workflows/merge_stage_test.yml b/.github/workflows/merge_stage_test.yml index 7728392481..dbe526d941 100644 --- a/.github/workflows/merge_stage_test.yml +++ b/.github/workflows/merge_stage_test.yml @@ -44,9 +44,9 @@ jobs: python -V pip install -U openmim pip install git+https://github.com/open-mmlab/mmengine.git - mim install 'mmcv==2.0.0rc3' + mim install 'mmcv>=2.0.0rc4' pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x - mim install 'mmdet==3.0.0rc5' + pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x - name: Install unittest dependencies run: pip install -r requirements/tests.txt -r requirements/optional.txt - name: Build and install @@ -100,9 +100,9 @@ jobs: python -V pip install -U openmim pip install git+https://github.com/open-mmlab/mmengine.git - mim install 'mmcv==2.0.0rc3' + mim install 'mmcv>=2.0.0rc4' pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x - mim install 'mmdet==3.0.0rc5' + pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x - name: Install unittest dependencies run: pip install -r requirements/tests.txt -r requirements/optional.txt - name: Build and install @@ -166,9 +166,9 @@ jobs: python -V pip install -U openmim pip install git+https://github.com/open-mmlab/mmengine.git - mim install 'mmcv==2.0.0rc3' + mim install 'mmcv>=2.0.0rc4' pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x - mim install 'mmdet==3.0.0rc5' + pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x - name: Install unittest dependencies run: pip install -r requirements/tests.txt -r requirements/optional.txt - name: Build and install @@ -209,9 +209,9 @@ jobs: python -V pip install -U openmim pip install git+https://github.com/open-mmlab/mmengine.git - mim install 'mmcv==2.0.0rc3' + mim install 'mmcv>=2.0.0rc4' pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x - mim install 'mmdet==3.0.0rc5' + pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x - name: Install unittest dependencies run: pip install -r requirements/tests.txt -r requirements/optional.txt - name: Build and install @@ -244,9 +244,9 @@ jobs: python -V pip install -U openmim pip install git+https://github.com/open-mmlab/mmengine.git - mim install 'mmcv==2.0.0rc3' + mim install 'mmcv>=2.0.0rc4' pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x - mim install 'mmdet==3.0.0rc5' + pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x - name: Install unittest dependencies run: pip install -r requirements/tests.txt -r requirements/optional.txt - name: Build and install diff --git a/.github/workflows/pr_stage_test.yml b/.github/workflows/pr_stage_test.yml index df73baba8e..a6f8ec0d22 100644 --- a/.github/workflows/pr_stage_test.yml +++ b/.github/workflows/pr_stage_test.yml @@ -44,9 +44,9 @@ jobs: run: | pip install -U openmim pip install git+https://github.com/open-mmlab/mmengine.git - mim install 'mmcv==2.0.0rc3' + mim install 'mmcv>=2.0.0rc4' pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x - mim install 'mmdet==3.0.0rc5' + pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x - name: Install unittest dependencies run: pip install -r requirements/tests.txt -r requirements/optional.txt - name: Build and install @@ -100,9 +100,9 @@ jobs: python -V pip install -U openmim pip install git+https://github.com/open-mmlab/mmengine.git - mim install 'mmcv==2.0.0rc3' + mim install 'mmcv>=2.0.0rc4' pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x - mim install 'mmdet==3.0.0rc5' + pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x - name: Install unittest dependencies run: pip install -r requirements/tests.txt -r requirements/optional.txt - name: Build and install @@ -135,9 +135,9 @@ jobs: python -V pip install -U openmim pip install git+https://github.com/open-mmlab/mmengine.git - mim install 'mmcv==2.0.0rc3' + mim install 'mmcv>=2.0.0rc4' pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x - mim install 'mmdet==3.0.0rc5' + pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x - name: Install unittest dependencies run: pip install -r requirements/tests.txt -r requirements/optional.txt - name: Build and install diff --git a/README.md b/README.md index d42be540dc..308fca8716 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ The 1.x branch works with **PyTorch 1.6+**. ## What's New -v1.0.0rc4 was released on 30/01/2023. +v1.0.0rc5 was released on 01/02/2023. Please refer to [changelog.md](docs/en/notes/changelog.md) for details and release history. - Support ISNet (ICCV'2021) in projects ([#2400](https://github.com/open-mmlab/mmsegmentation/pull/2400)) diff --git a/README_zh-CN.md b/README_zh-CN.md index bbebab5d04..8db2746413 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -61,7 +61,7 @@ MMSegmentation 是一个基于 PyTorch 的语义分割开源工具箱。它是 O ## 更新日志 -最新版本 v1.0.0rc4 在 2023.01.30 发布。 +最新版本 v1.0.0rc5 在 2023.02.01 发布。 如果想了解更多版本更新细节和历史信息,请阅读[更新日志](docs/en/notes/changelog.md)。 ## 安装 diff --git a/configs/_base_/datasets/ade20k.py b/configs/_base_/datasets/ade20k.py index 5840fc17ec..2c01b2ff59 100644 --- a/configs/_base_/datasets/ade20k.py +++ b/configs/_base_/datasets/ade20k.py @@ -25,7 +25,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), + dict(type='LoadImageFromFile', backend_args=dict(backend='local')), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/datasets/ade20k_640x640.py b/configs/_base_/datasets/ade20k_640x640.py index 998b06e15b..866403b27f 100644 --- a/configs/_base_/datasets/ade20k_640x640.py +++ b/configs/_base_/datasets/ade20k_640x640.py @@ -25,7 +25,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), + dict(type='LoadImageFromFile', backend_args=dict(backend='local')), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/datasets/chase_db1.py b/configs/_base_/datasets/chase_db1.py index 07604b4d5a..62dd3b3cbe 100644 --- a/configs/_base_/datasets/chase_db1.py +++ b/configs/_base_/datasets/chase_db1.py @@ -26,7 +26,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), + dict(type='LoadImageFromFile', backend_args=dict(backend='local')), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/datasets/cityscapes.py b/configs/_base_/datasets/cityscapes.py index 1698e04721..b7d95c1ec0 100644 --- a/configs/_base_/datasets/cityscapes.py +++ b/configs/_base_/datasets/cityscapes.py @@ -25,7 +25,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), + dict(type='LoadImageFromFile', backend_args=dict(backend='local')), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/datasets/coco-stuff10k.py b/configs/_base_/datasets/coco-stuff10k.py index 0c2d55208e..9d3026bd4c 100644 --- a/configs/_base_/datasets/coco-stuff10k.py +++ b/configs/_base_/datasets/coco-stuff10k.py @@ -25,7 +25,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), + dict(type='LoadImageFromFile', backend_args=dict(backend='local')), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/datasets/coco-stuff164k.py b/configs/_base_/datasets/coco-stuff164k.py index f77a0fd65a..c785e313ff 100644 --- a/configs/_base_/datasets/coco-stuff164k.py +++ b/configs/_base_/datasets/coco-stuff164k.py @@ -25,7 +25,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), + dict(type='LoadImageFromFile', backend_args=dict(backend='local')), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/datasets/drive.py b/configs/_base_/datasets/drive.py index c6242acdb8..3bd6080aa7 100644 --- a/configs/_base_/datasets/drive.py +++ b/configs/_base_/datasets/drive.py @@ -26,7 +26,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), + dict(type='LoadImageFromFile', backend_args=dict(backend='local')), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/datasets/hrf.py b/configs/_base_/datasets/hrf.py index c2fe84f170..b0ae34abe6 100644 --- a/configs/_base_/datasets/hrf.py +++ b/configs/_base_/datasets/hrf.py @@ -26,7 +26,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), + dict(type='LoadImageFromFile', backend_args=dict(backend='local')), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/datasets/isaid.py b/configs/_base_/datasets/isaid.py index 65e256c56d..8407e06ac9 100644 --- a/configs/_base_/datasets/isaid.py +++ b/configs/_base_/datasets/isaid.py @@ -32,7 +32,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), + dict(type='LoadImageFromFile', backend_args=dict(backend='local')), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/datasets/loveda.py b/configs/_base_/datasets/loveda.py index d69bdafceb..8ecc919654 100644 --- a/configs/_base_/datasets/loveda.py +++ b/configs/_base_/datasets/loveda.py @@ -25,7 +25,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), + dict(type='LoadImageFromFile', backend_args=dict(backend='local')), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/datasets/pascal_context_59.py b/configs/_base_/datasets/pascal_context_59.py index 0ca02cc94b..bb144dd202 100644 --- a/configs/_base_/datasets/pascal_context_59.py +++ b/configs/_base_/datasets/pascal_context_59.py @@ -28,7 +28,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), + dict(type='LoadImageFromFile', backend_args=dict(backend='local')), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/datasets/pascal_voc12.py b/configs/_base_/datasets/pascal_voc12.py index 8b4b77c2f9..0fa3d55764 100644 --- a/configs/_base_/datasets/pascal_voc12.py +++ b/configs/_base_/datasets/pascal_voc12.py @@ -25,7 +25,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), + dict(type='LoadImageFromFile', backend_args=dict(backend='local')), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/datasets/pascal_voc12_aug.py b/configs/_base_/datasets/pascal_voc12_aug.py index 495595cdfb..8b358cc0cd 100644 --- a/configs/_base_/datasets/pascal_voc12_aug.py +++ b/configs/_base_/datasets/pascal_voc12_aug.py @@ -27,7 +27,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), + dict(type='LoadImageFromFile', backend_args=dict(backend='local')), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/datasets/potsdam.py b/configs/_base_/datasets/potsdam.py index 1f4b95df2e..4439f41919 100644 --- a/configs/_base_/datasets/potsdam.py +++ b/configs/_base_/datasets/potsdam.py @@ -25,7 +25,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), + dict(type='LoadImageFromFile', backend_args=dict(backend='local')), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/datasets/stare.py b/configs/_base_/datasets/stare.py index cd12740b2e..e55519b595 100644 --- a/configs/_base_/datasets/stare.py +++ b/configs/_base_/datasets/stare.py @@ -26,7 +26,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), + dict(type='LoadImageFromFile', backend_args=dict(backend='local')), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/datasets/vaihingen.py b/configs/_base_/datasets/vaihingen.py index ca0ad7915e..2b3fa76093 100644 --- a/configs/_base_/datasets/vaihingen.py +++ b/configs/_base_/datasets/vaihingen.py @@ -25,7 +25,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), + dict(type='LoadImageFromFile', backend_args=dict(backend='local')), dict( type='TestTimeAug', transforms=[ diff --git a/configs/erfnet/README.md b/configs/erfnet/README.md index 44e4f51c91..4f7d21572a 100644 --- a/configs/erfnet/README.md +++ b/configs/erfnet/README.md @@ -41,12 +41,14 @@ Semantic segmentation is a challenging task that addresses most of the perceptio ### Cityscapes -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| ERFNet | ERFNet | 512x1024 | 160000 | 6.04 | 15.26 | 71.08 | 72.6 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/erfnet/erfnet_fcn_4xb4-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes/erfnet_fcn_4x4_512x1024_160k_cityscapes_20211126_082056-03d333ed.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes/erfnet_fcn_4x4_512x1024_160k_cityscapes_20211126_082056.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ---: | ------------- | ------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| ERFNet | ERFNet | 512x1024 | 160000 | 6.04 | 15.26 | 72.5 | 74.75 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/erfnet/erfnet_fcn_4xb4-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes/erfnet_fcn_4x4_512x1024_160k_cityscapes_20220704_162145-dc90157a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes/erfnet_fcn_4x4_512x1024_160k_cityscapes_20220704_162145.log.json) | Note: - The model is trained from scratch. - Last deconvolution layer in the [original paper](https://github.com/Eromera/erfnet_pytorch/blob/master/train/erfnet.py#L123) is replaced by a naive `FCNHead` decoder head and a bilinear upsampling layer, found more effective and efficient. + +- This model performance is sensitive to the seed values used, please refer to the log file for the specific settings of the seed. If you choose a different seed, the results might differ from the table results. diff --git a/configs/erfnet/erfnet.yml b/configs/erfnet/erfnet.yml index aeb454cb50..5f87f020cf 100644 --- a/configs/erfnet/erfnet.yml +++ b/configs/erfnet/erfnet.yml @@ -31,7 +31,7 @@ Models: - Task: Semantic Segmentation Dataset: Cityscapes Metrics: - mIoU: 71.08 - mIoU(ms+flip): 72.6 + mIoU: 72.5 + mIoU(ms+flip): 74.75 Config: configs/erfnet/erfnet_fcn_4xb4-160k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes/erfnet_fcn_4x4_512x1024_160k_cityscapes_20211126_082056-03d333ed.pth + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes/erfnet_fcn_4x4_512x1024_160k_cityscapes_20220704_162145-dc90157a.pth diff --git a/configs/mask2former/README.md b/configs/mask2former/README.md index 8881b0d66c..1861fec3b1 100644 --- a/configs/mask2former/README.md +++ b/configs/mask2former/README.md @@ -45,24 +45,24 @@ pip install "mmdet>=3.0.0rc4" | Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | | ----------- | -------------- | --------- | ------- | -------: | -------------- | ----- | ------------: | -----------------------------------------------------------------------------------------------------------------------------------------------------------: | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| Mask2Former | R-50-D32 | 512x1024 | 90000 | 5806 | 9.17 | 80.44 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024/mask2former_r50_8xb2-90k_cityscapes-512x1024_20221202_140802-2ff5ffa0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024/mask2former_r50_8xb2-90k_cityscapes-512x1024_20221202_140802.json) | -| Mask2Former | R-101-D32 | 512x1024 | 90000 | 6971 | 7.11 | 80.80 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024/mask2former_r101_8xb2-90k_cityscapes-512x1024_20221130_031628-8ad528ea.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024/mask2former_r101_8xb2-90k_cityscapes-512x1024_20221130_031628.json)) | -| Mask2Former | Swin-T | 512x1024 | 90000 | 6511 | 7.18 | 81.71 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024/mask2former_swin-t_8xb2-90k_cityscapes-512x1024_20221127_144501-290b34af.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024/mask2former_swin-t_8xb2-90k_cityscapes-512x1024_20221127_144501.json)) | -| Mask2Former | Swin-S | 512x1024 | 90000 | 8282 | 5.57 | 82.57 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024/mask2former_swin-s_8xb2-90k_cityscapes-512x1024_20221127_143802-7c98854a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024/mask2former_swin-s_8xb2-90k_cityscapes-512x1024_20221127_143802.json)) | -| Mask2Former | Swin-B (in22k) | 512x1024 | 90000 | 11152 | 4.32 | 83.52 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221203_045030-59a4379a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221203_045030.json)) | -| Mask2Former | Swin-L (in22k) | 512x1024 | 90000 | 16207 | 2.86 | 83.65 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221202_141901-dc2c2ddd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221202_141901.json)) | +| Mask2Former | R-50-D32 | 512x1024 | 90000 | 5806 | 9.17 | 80.44 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024/mask2former_r50_8xb2-90k_cityscapes-512x1024_20221202_140802-ffd9d750.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024/mask2former_r50_8xb2-90k_cityscapes-512x1024_20221202_140802.json) | +| Mask2Former | R-101-D32 | 512x1024 | 90000 | 6971 | 7.11 | 80.80 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024/mask2former_r101_8xb2-90k_cityscapes-512x1024_20221130_031628-43e68666.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024/mask2former_r101_8xb2-90k_cityscapes-512x1024_20221130_031628.json)) | +| Mask2Former | Swin-T | 512x1024 | 90000 | 6511 | 7.18 | 81.71 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024/mask2former_swin-t_8xb2-90k_cityscapes-512x1024_20221127_144501-36c59341.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024/mask2former_swin-t_8xb2-90k_cityscapes-512x1024_20221127_144501.json)) | +| Mask2Former | Swin-S | 512x1024 | 90000 | 8282 | 5.57 | 82.57 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024/mask2former_swin-s_8xb2-90k_cityscapes-512x1024_20221127_143802-9ab177f6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024/mask2former_swin-s_8xb2-90k_cityscapes-512x1024_20221127_143802.json)) | +| Mask2Former | Swin-B (in22k) | 512x1024 | 90000 | 11152 | 4.32 | 83.52 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221203_045030-9a86a225.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221203_045030.json)) | +| Mask2Former | Swin-L (in22k) | 512x1024 | 90000 | 16207 | 2.86 | 83.65 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221202_141901-28ad20f1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221202_141901.json)) | ### ADE20K | Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | | ----------- | -------------- | --------- | ------- | -------: | -------------- | ----- | ------------: | -------------------------------------------------------------------------------------------------------------------------------------------------------: | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| Mask2Former | R-50-D32 | 512x512 | 160000 | 3385 | 26.59 | 47.87 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512/mask2former_r50_8xb2-160k_ade20k-512x512_20221204_000055-4c62652d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512/mask2former_r50_8xb2-160k_ade20k-512x512_20221204_000055.json)) | -| Mask2Former | R-101-D32 | 512x512 | 160000 | 4190 | 22.97 | 48.60 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512/mask2former_r101_8xb2-160k_ade20k-512x512_20221203_233905-b1169bc0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512/mask2former_r101_8xb2-160k_ade20k-512x512_20221203_233905.json)) | -| Mask2Former | Swin-T | 512x512 | 160000 | 3826 | 23.82 | 48.66 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512/mask2former_swin-t_8xb2-160k_ade20k-512x512_20221203_234230-4341520b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512/mask2former_swin-t_8xb2-160k_ade20k-512x512_20221203_234230.json)) | -| Mask2Former | Swin-S | 512x512 | 160000 | 5034 | 19.69 | 51.24 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512/mask2former_swin-s_8xb2-160k_ade20k-512x512_20221204_143905-ab263c11.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512/mask2former_swin-s_8xb2-160k_ade20k-512x512_20221204_143905.json)) | -| Mask2Former | Swin-B | 640x640 | 160000 | 5795 | 12.48 | 52.44 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640_20221129_125118-35e3a2c7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640_20221129_125118.json)) | -| Mask2Former | Swin-B (in22k) | 640x640 | 160000 | 5795 | 12.43 | 53.90 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235230-622e093b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235230.json)) | -| Mask2Former | Swin-L (in22k) | 640x640 | 160000 | 9077 | 8.81 | 56.01 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235933-5cc76a78.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235933.json)) | +| Mask2Former | R-50-D32 | 512x512 | 160000 | 3385 | 26.59 | 47.87 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512/mask2former_r50_8xb2-160k_ade20k-512x512_20221204_000055-2d1f55f1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512/mask2former_r50_8xb2-160k_ade20k-512x512_20221204_000055.json)) | +| Mask2Former | R-101-D32 | 512x512 | 160000 | 4190 | 22.97 | 48.60 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512/mask2former_r101_8xb2-160k_ade20k-512x512_20221203_233905-b7135890.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512/mask2former_r101_8xb2-160k_ade20k-512x512_20221203_233905.json)) | +| Mask2Former | Swin-T | 512x512 | 160000 | 3826 | 23.82 | 48.66 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512/mask2former_swin-t_8xb2-160k_ade20k-512x512_20221203_234230-7d64e5dd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512/mask2former_swin-t_8xb2-160k_ade20k-512x512_20221203_234230.json)) | +| Mask2Former | Swin-S | 512x512 | 160000 | 5034 | 19.69 | 51.24 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512/mask2former_swin-s_8xb2-160k_ade20k-512x512_20221204_143905-e715144e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512/mask2former_swin-s_8xb2-160k_ade20k-512x512_20221204_143905.json)) | +| Mask2Former | Swin-B | 640x640 | 160000 | 5795 | 12.48 | 52.44 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640_20221129_125118-a4a086d2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640_20221129_125118.json)) | +| Mask2Former | Swin-B (in22k) | 640x640 | 160000 | 5795 | 12.43 | 53.90 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235230-7ec0f569.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235230.json)) | +| Mask2Former | Swin-L (in22k) | 640x640 | 160000 | 9077 | 8.81 | 56.01 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235933-7120c214.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235933.json)) | Note: diff --git a/configs/mask2former/mask2former.yml b/configs/mask2former/mask2former.yml index 78655fc52f..4e33766c70 100644 --- a/configs/mask2former/mask2former.yml +++ b/configs/mask2former/mask2former.yml @@ -35,7 +35,7 @@ Models: Metrics: mIoU: 80.44 Config: configs/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024/mask2former_r50_8xb2-90k_cityscapes-512x1024_20221202_140802-2ff5ffa0.pth + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024/mask2former_r50_8xb2-90k_cityscapes-512x1024_20221202_140802-ffd9d750.pth - Name: mask2former_r101_8xb2-90k_cityscapes-512x1024 In Collection: Mask2Former Metadata: @@ -56,7 +56,7 @@ Models: Metrics: mIoU: 80.8 Config: configs/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024/mask2former_r101_8xb2-90k_cityscapes-512x1024_20221130_031628-8ad528ea.pth + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024/mask2former_r101_8xb2-90k_cityscapes-512x1024_20221130_031628-43e68666.pth - Name: mask2former_swin-t_8xb2-90k_cityscapes-512x1024 In Collection: Mask2Former Metadata: @@ -77,7 +77,7 @@ Models: Metrics: mIoU: 81.71 Config: configs/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024/mask2former_swin-t_8xb2-90k_cityscapes-512x1024_20221127_144501-290b34af.pth + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024/mask2former_swin-t_8xb2-90k_cityscapes-512x1024_20221127_144501-36c59341.pth - Name: mask2former_swin-s_8xb2-90k_cityscapes-512x1024 In Collection: Mask2Former Metadata: @@ -98,7 +98,7 @@ Models: Metrics: mIoU: 82.57 Config: configs/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024/mask2former_swin-s_8xb2-90k_cityscapes-512x1024_20221127_143802-7c98854a.pth + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024/mask2former_swin-s_8xb2-90k_cityscapes-512x1024_20221127_143802-9ab177f6.pth - Name: mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024 In Collection: Mask2Former Metadata: @@ -119,7 +119,7 @@ Models: Metrics: mIoU: 83.52 Config: configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221203_045030-59a4379a.pth + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221203_045030-9a86a225.pth - Name: mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024 In Collection: Mask2Former Metadata: @@ -140,7 +140,7 @@ Models: Metrics: mIoU: 83.65 Config: configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221202_141901-dc2c2ddd.pth + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221202_141901-28ad20f1.pth - Name: mask2former_r50_8xb2-160k_ade20k-512x512 In Collection: Mask2Former Metadata: @@ -161,7 +161,7 @@ Models: Metrics: mIoU: 47.87 Config: configs/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512/mask2former_r50_8xb2-160k_ade20k-512x512_20221204_000055-4c62652d.pth + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512/mask2former_r50_8xb2-160k_ade20k-512x512_20221204_000055-2d1f55f1.pth - Name: mask2former_r101_8xb2-160k_ade20k-512x512 In Collection: Mask2Former Metadata: @@ -182,7 +182,7 @@ Models: Metrics: mIoU: 48.6 Config: configs/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512/mask2former_r101_8xb2-160k_ade20k-512x512_20221203_233905-b1169bc0.pth + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512/mask2former_r101_8xb2-160k_ade20k-512x512_20221203_233905-b7135890.pth - Name: mask2former_swin-t_8xb2-160k_ade20k-512x512 In Collection: Mask2Former Metadata: @@ -203,7 +203,7 @@ Models: Metrics: mIoU: 48.66 Config: configs/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512/mask2former_swin-t_8xb2-160k_ade20k-512x512_20221203_234230-4341520b.pth + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512/mask2former_swin-t_8xb2-160k_ade20k-512x512_20221203_234230-7d64e5dd.pth - Name: mask2former_swin-s_8xb2-160k_ade20k-512x512 In Collection: Mask2Former Metadata: @@ -224,7 +224,7 @@ Models: Metrics: mIoU: 51.24 Config: configs/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512/mask2former_swin-s_8xb2-160k_ade20k-512x512_20221204_143905-ab263c11.pth + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512/mask2former_swin-s_8xb2-160k_ade20k-512x512_20221204_143905-e715144e.pth - Name: mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640 In Collection: Mask2Former Metadata: @@ -245,7 +245,7 @@ Models: Metrics: mIoU: 52.44 Config: configs/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640_20221129_125118-35e3a2c7.pth + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640_20221129_125118-a4a086d2.pth - Name: mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640 In Collection: Mask2Former Metadata: @@ -266,7 +266,7 @@ Models: Metrics: mIoU: 53.9 Config: configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235230-622e093b.pth + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235230-7ec0f569.pth - Name: mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640 In Collection: Mask2Former Metadata: @@ -287,4 +287,4 @@ Models: Metrics: mIoU: 56.01 Config: configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235933-5cc76a78.pth + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235933-7120c214.pth diff --git a/configs/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512.py b/configs/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512.py index 598cabfb6d..78cf60510c 100644 --- a/configs/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512.py +++ b/configs/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512.py @@ -41,65 +41,58 @@ num_outs=3, norm_cfg=dict(type='GN', num_groups=32), act_cfg=dict(type='ReLU'), - encoder=dict( - type='mmdet.DetrTransformerEncoder', + encoder=dict( # DeformableDetrTransformerEncoder num_layers=6, - transformerlayers=dict( - type='mmdet.BaseTransformerLayer', - attn_cfgs=dict( - type='mmdet.MultiScaleDeformableAttention', + layer_cfg=dict( # DeformableDetrTransformerEncoderLayer + self_attn_cfg=dict( # MultiScaleDeformableAttention embed_dims=256, num_heads=8, num_levels=3, num_points=4, im2col_step=64, dropout=0.0, - batch_first=False, + batch_first=True, norm_cfg=None, init_cfg=None), - ffn_cfgs=dict( - type='FFN', + ffn_cfg=dict( embed_dims=256, feedforward_channels=1024, num_fcs=2, ffn_drop=0.0, - act_cfg=dict(type='ReLU', inplace=True)), - operation_order=('self_attn', 'norm', 'ffn', 'norm')), + act_cfg=dict(type='ReLU', inplace=True))), init_cfg=None), - positional_encoding=dict( - type='mmdet.SinePositionalEncoding', - num_feats=128, - normalize=True), + positional_encoding=dict( # SinePositionalEncoding + num_feats=128, normalize=True), init_cfg=None), enforce_decoder_input_project=False, - positional_encoding=dict( - type='mmdet.SinePositionalEncoding', num_feats=128, - normalize=True), - transformer_decoder=dict( - type='mmdet.DetrTransformerDecoder', + positional_encoding=dict( # SinePositionalEncoding + num_feats=128, normalize=True), + transformer_decoder=dict( # Mask2FormerTransformerDecoder return_intermediate=True, num_layers=9, - transformerlayers=dict( - type='mmdet.DetrTransformerDecoderLayer', - attn_cfgs=dict( - type='mmdet.MultiheadAttention', + layer_cfg=dict( # Mask2FormerTransformerDecoderLayer + self_attn_cfg=dict( # MultiheadAttention embed_dims=256, num_heads=8, attn_drop=0.0, proj_drop=0.0, dropout_layer=None, - batch_first=False), - ffn_cfgs=dict( + batch_first=True), + cross_attn_cfg=dict( # MultiheadAttention + embed_dims=256, + num_heads=8, + attn_drop=0.0, + proj_drop=0.0, + dropout_layer=None, + batch_first=True), + ffn_cfg=dict( embed_dims=256, feedforward_channels=2048, num_fcs=2, act_cfg=dict(type='ReLU', inplace=True), ffn_drop=0.0, dropout_layer=None, - add_identity=True), - feedforward_channels=2048, - operation_order=('cross_attn', 'norm', 'self_attn', 'norm', - 'ffn', 'norm')), + add_identity=True)), init_cfg=None), loss_cls=dict( type='mmdet.CrossEntropyLoss', diff --git a/configs/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024.py b/configs/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024.py index f92dda98a6..fc132a698f 100644 --- a/configs/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024.py +++ b/configs/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024.py @@ -41,65 +41,58 @@ num_outs=3, norm_cfg=dict(type='GN', num_groups=32), act_cfg=dict(type='ReLU'), - encoder=dict( - type='mmdet.DetrTransformerEncoder', + encoder=dict( # DeformableDetrTransformerEncoder num_layers=6, - transformerlayers=dict( - type='mmdet.BaseTransformerLayer', - attn_cfgs=dict( - type='mmdet.MultiScaleDeformableAttention', + layer_cfg=dict( # DeformableDetrTransformerEncoderLayer + self_attn_cfg=dict( # MultiScaleDeformableAttention embed_dims=256, num_heads=8, num_levels=3, num_points=4, im2col_step=64, dropout=0.0, - batch_first=False, + batch_first=True, norm_cfg=None, init_cfg=None), - ffn_cfgs=dict( - type='FFN', + ffn_cfg=dict( embed_dims=256, feedforward_channels=1024, num_fcs=2, ffn_drop=0.0, - act_cfg=dict(type='ReLU', inplace=True)), - operation_order=('self_attn', 'norm', 'ffn', 'norm')), + act_cfg=dict(type='ReLU', inplace=True))), init_cfg=None), - positional_encoding=dict( - type='mmdet.SinePositionalEncoding', - num_feats=128, - normalize=True), + positional_encoding=dict( # SinePositionalEncoding + num_feats=128, normalize=True), init_cfg=None), enforce_decoder_input_project=False, - positional_encoding=dict( - type='mmdet.SinePositionalEncoding', num_feats=128, - normalize=True), - transformer_decoder=dict( - type='mmdet.DetrTransformerDecoder', + positional_encoding=dict( # SinePositionalEncoding + num_feats=128, normalize=True), + transformer_decoder=dict( # Mask2FormerTransformerDecoder return_intermediate=True, num_layers=9, - transformerlayers=dict( - type='mmdet.DetrTransformerDecoderLayer', - attn_cfgs=dict( - type='mmdet.MultiheadAttention', + layer_cfg=dict( # Mask2FormerTransformerDecoderLayer + self_attn_cfg=dict( # MultiheadAttention embed_dims=256, num_heads=8, attn_drop=0.0, proj_drop=0.0, dropout_layer=None, - batch_first=False), - ffn_cfgs=dict( + batch_first=True), + cross_attn_cfg=dict( # MultiheadAttention + embed_dims=256, + num_heads=8, + attn_drop=0.0, + proj_drop=0.0, + dropout_layer=None, + batch_first=True), + ffn_cfg=dict( embed_dims=256, feedforward_channels=2048, num_fcs=2, act_cfg=dict(type='ReLU', inplace=True), ffn_drop=0.0, dropout_layer=None, - add_identity=True), - feedforward_channels=2048, - operation_order=('cross_attn', 'norm', 'self_attn', 'norm', - 'ffn', 'norm')), + add_identity=True)), init_cfg=None), loss_cls=dict( type='mmdet.CrossEntropyLoss', diff --git a/configs/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640.py b/configs/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640.py index 56112dfa3e..4e4036db3a 100644 --- a/configs/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640.py +++ b/configs/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640.py @@ -53,65 +53,58 @@ num_outs=3, norm_cfg=dict(type='GN', num_groups=32), act_cfg=dict(type='ReLU'), - encoder=dict( - type='mmdet.DetrTransformerEncoder', + encoder=dict( # DeformableDetrTransformerEncoder num_layers=6, - transformerlayers=dict( - type='mmdet.BaseTransformerLayer', - attn_cfgs=dict( - type='mmdet.MultiScaleDeformableAttention', + layer_cfg=dict( # DeformableDetrTransformerEncoderLayer + self_attn_cfg=dict( # MultiScaleDeformableAttention embed_dims=256, num_heads=8, num_levels=3, num_points=4, im2col_step=64, dropout=0.0, - batch_first=False, + batch_first=True, norm_cfg=None, init_cfg=None), - ffn_cfgs=dict( - type='FFN', + ffn_cfg=dict( embed_dims=256, feedforward_channels=1024, num_fcs=2, ffn_drop=0.0, - act_cfg=dict(type='ReLU', inplace=True)), - operation_order=('self_attn', 'norm', 'ffn', 'norm')), + act_cfg=dict(type='ReLU', inplace=True))), init_cfg=None), - positional_encoding=dict( - type='mmdet.SinePositionalEncoding', - num_feats=128, - normalize=True), + positional_encoding=dict( # SinePositionalEncoding + num_feats=128, normalize=True), init_cfg=None), enforce_decoder_input_project=False, - positional_encoding=dict( - type='mmdet.SinePositionalEncoding', num_feats=128, - normalize=True), - transformer_decoder=dict( - type='mmdet.DetrTransformerDecoder', + positional_encoding=dict( # SinePositionalEncoding + num_feats=128, normalize=True), + transformer_decoder=dict( # Mask2FormerTransformerDecoder return_intermediate=True, num_layers=9, - transformerlayers=dict( - type='mmdet.DetrTransformerDecoderLayer', - attn_cfgs=dict( - type='mmdet.MultiheadAttention', + layer_cfg=dict( # Mask2FormerTransformerDecoderLayer + self_attn_cfg=dict( # MultiheadAttention embed_dims=256, num_heads=8, attn_drop=0.0, proj_drop=0.0, dropout_layer=None, - batch_first=False), - ffn_cfgs=dict( + batch_first=True), + cross_attn_cfg=dict( # MultiheadAttention + embed_dims=256, + num_heads=8, + attn_drop=0.0, + proj_drop=0.0, + dropout_layer=None, + batch_first=True), + ffn_cfg=dict( embed_dims=256, feedforward_channels=2048, num_fcs=2, act_cfg=dict(type='ReLU', inplace=True), ffn_drop=0.0, dropout_layer=None, - add_identity=True), - feedforward_channels=2048, - operation_order=('cross_attn', 'norm', 'self_attn', 'norm', - 'ffn', 'norm')), + add_identity=True)), init_cfg=None), loss_cls=dict( type='mmdet.CrossEntropyLoss', diff --git a/configs/maskformer/README.md b/configs/maskformer/README.md index 5e33d17afb..0248dbb63c 100644 --- a/configs/maskformer/README.md +++ b/configs/maskformer/README.md @@ -47,10 +47,10 @@ pip install "mmdet>=3.0.0rc4" | Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | | ---------- | --------- | --------- | ------- | -------- | -------------- | ----- | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| MaskFormer | R-50-D32 | 512x512 | 160000 | 3.29 | 42.20 | 44.29 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512/maskformer_r50-d32_8xb2-160k_ade20k-512x512_20221030_182724-cbd39cc1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512/maskformer_r50-d32_8xb2-160k_ade20k-512x512_20221030_182724.json) | -| MaskFormer | R-101-D32 | 512x512 | 160000 | 4.12 | 34.90 | 45.11 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512/maskformer_r101-d32_8xb2-160k_ade20k-512x512_20221031_223053-c8e0931d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512/maskformer_r101-d32_8xb2-160k_ade20k-512x512_20221031_223053.json) | -| MaskFormer | Swin-T | 512x512 | 160000 | 3.73 | 40.53 | 46.69 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512_20221114_232813-03550716.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512_20221114_232813.json) | -| MaskFormer | Swin-S | 512x512 | 160000 | 5.33 | 26.98 | 49.36 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512_20221115_114710-5ab67e58.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512_20221115_114710.json) | +| MaskFormer | R-50-D32 | 512x512 | 160000 | 3.29 | 42.20 | 44.29 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512/maskformer_r50-d32_8xb2-160k_ade20k-512x512_20221030_182724-3a9cfe45.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512/maskformer_r50-d32_8xb2-160k_ade20k-512x512_20221030_182724.json) | +| MaskFormer | R-101-D32 | 512x512 | 160000 | 4.12 | 34.90 | 45.11 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512/maskformer_r101-d32_8xb2-160k_ade20k-512x512_20221031_223053-84adbfcb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512/maskformer_r101-d32_8xb2-160k_ade20k-512x512_20221031_223053.json) | +| MaskFormer | Swin-T | 512x512 | 160000 | 3.73 | 40.53 | 46.69 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512_20221114_232813-f14e7ce0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512_20221114_232813.json) | +| MaskFormer | Swin-S | 512x512 | 160000 | 5.33 | 26.98 | 49.36 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512_20221115_114710-723512c7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512_20221115_114710.json) | Note: diff --git a/configs/maskformer/maskformer.yml b/configs/maskformer/maskformer.yml index 1b3d398e34..b499476a50 100644 --- a/configs/maskformer/maskformer.yml +++ b/configs/maskformer/maskformer.yml @@ -35,7 +35,7 @@ Models: Metrics: mIoU: 44.29 Config: configs/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512/maskformer_r50-d32_8xb2-160k_ade20k-512x512_20221030_182724-cbd39cc1.pth + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512/maskformer_r50-d32_8xb2-160k_ade20k-512x512_20221030_182724-3a9cfe45.pth - Name: maskformer_r101-d32_8xb2-160k_ade20k-512x512 In Collection: MaskFormer Metadata: @@ -56,7 +56,7 @@ Models: Metrics: mIoU: 45.11 Config: configs/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512/maskformer_r101-d32_8xb2-160k_ade20k-512x512_20221031_223053-c8e0931d.pth + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512/maskformer_r101-d32_8xb2-160k_ade20k-512x512_20221031_223053-84adbfcb.pth - Name: maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512 In Collection: MaskFormer Metadata: @@ -77,7 +77,7 @@ Models: Metrics: mIoU: 46.69 Config: configs/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512_20221114_232813-03550716.pth + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512_20221114_232813-f14e7ce0.pth - Name: maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512 In Collection: MaskFormer Metadata: @@ -98,4 +98,4 @@ Models: Metrics: mIoU: 49.36 Config: configs/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512_20221115_114710-5ab67e58.pth + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512_20221115_114710-723512c7.pth diff --git a/configs/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512.py b/configs/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512.py index 7d8f657221..2a83746171 100644 --- a/configs/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512.py +++ b/configs/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512.py @@ -43,36 +43,34 @@ norm_cfg=dict(type='GN', num_groups=32), act_cfg=dict(type='ReLU')), enforce_decoder_input_project=False, - positional_encoding=dict( - type='mmdet.SinePositionalEncoding', num_feats=128, - normalize=True), - transformer_decoder=dict( - type='mmdet.DetrTransformerDecoder', + positional_encoding=dict( # SinePositionalEncoding + num_feats=128, normalize=True), + transformer_decoder=dict( # DetrTransformerDecoder return_intermediate=True, num_layers=6, - transformerlayers=dict( - type='mmdet.DetrTransformerDecoderLayer', - attn_cfgs=dict( - type='mmdet.MultiheadAttention', + layer_cfg=dict( # DetrTransformerDecoderLayer + self_attn_cfg=dict( # MultiheadAttention embed_dims=256, num_heads=8, attn_drop=0.1, proj_drop=0.1, dropout_layer=None, - batch_first=False), - ffn_cfgs=dict( + batch_first=True), + cross_attn_cfg=dict( # MultiheadAttention + embed_dims=256, + num_heads=8, + attn_drop=0.1, + proj_drop=0.1, + dropout_layer=None, + batch_first=True), + ffn_cfg=dict( embed_dims=256, feedforward_channels=2048, num_fcs=2, act_cfg=dict(type='ReLU', inplace=True), ffn_drop=0.1, dropout_layer=None, - add_identity=True), - # the following parameter was not used, - # just make current api happy - feedforward_channels=2048, - operation_order=('self_attn', 'norm', 'cross_attn', 'norm', - 'ffn', 'norm')), + add_identity=True)), init_cfg=None), loss_cls=dict( type='mmdet.CrossEntropyLoss', diff --git a/docker/Dockerfile b/docker/Dockerfile index 9ee49ab35c..73a0fac121 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,7 +1,7 @@ ARG PYTORCH="1.11.0" ARG CUDA="11.3" ARG CUDNN="8" -ARG MMCV="2.0.0rc3" +ARG MMCV="2.0.0rc4" FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel diff --git a/docker/serve/Dockerfile b/docker/serve/Dockerfile index 2dddc6cdf3..5ae1eb607d 100644 --- a/docker/serve/Dockerfile +++ b/docker/serve/Dockerfile @@ -3,8 +3,8 @@ ARG CUDA="11.3" ARG CUDNN="8" FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel -ARG MMCV="2.0.0rc3" -ARG MMSEG="1.0.0rc4" +ARG MMCV="2.0.0rc4" +ARG MMSEG="1.0.0rc5" ENV PYTHONUNBUFFERED TRUE diff --git a/docs/en/migration/interface.md b/docs/en/migration/interface.md index c816fceafe..1bc3d206e2 100644 --- a/docs/en/migration/interface.md +++ b/docs/en/migration/interface.md @@ -237,7 +237,7 @@ test_pipeline = [ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), + dict(type='LoadImageFromFile', backend_args=dict(backend='local')), dict( type='TestTimeAug', transforms=[ diff --git a/docs/en/notes/changelog.md b/docs/en/notes/changelog.md index ae9e565333..963cd6945b 100644 --- a/docs/en/notes/changelog.md +++ b/docs/en/notes/changelog.md @@ -1,5 +1,15 @@ # Changelog of v1.x +## v1.0.0rc5(02/01/2023) + +### Bug fix + +- Fix MaskFormer and Mask2Former when install mmdet from source ([#2532](https://github.com/open-mmlab/mmsegmentation/pull/2532)) +- Support new fileio interface in `MMCV>=2.0.0rc4` ([#2543](https://github.com/open-mmlab/mmsegmentation/pull/2543)) +- Fix ERFNet URL in dev-1.x branch ([#2537](https://github.com/open-mmlab/mmsegmentation/pull/2537)) +- Fix misleading `List[Tensor]` types ([#2546](https://github.com/open-mmlab/mmsegmentation/pull/2546)) +- Rename typing.py to typing_utils.py ([#2548](https://github.com/open-mmlab/mmsegmentation/pull/2548)) + ## v1.0.0rc4(01/30/2023) ### Highlights diff --git a/docs/en/notes/faq.md b/docs/en/notes/faq.md index 48e97429c1..bb09873cf0 100644 --- a/docs/en/notes/faq.md +++ b/docs/en/notes/faq.md @@ -4,37 +4,20 @@ We list some common troubles faced by many users and their corresponding solutio ## Installation -The compatible MMSegmentation and MMCV versions are as below. Please install the correct version of MMCV to avoid installation issues. +The compatible MMSegmentation, MMCV and MMEngine versions are as below. Please install the correct versions of them to avoid installation issues. -| MMSegmentation version | MMCV version | MMClassification (optional) version | MMDetection (optional) version | -| :--------------------: | :----------------------------: | :---------------------------------: | :----------------------------: | -| 1.x/dev-1.x branch | mmcv == 2.0.0rc3 | mmcls>=1.0.0rc0 | mmdet>=3.0.0rc4, \<=3.0.0rc5> | -| 1.0.0rc4 | mmcv == 2.0.0rc3 | mmcls>=1.0.0rc0 | mmdet>=3.0.0rc4, \<=3.0.0rc5> | -| 1.0.0rc3 | mmcv == 2.0.0rc3 | mmcls>=1.0.0rc0 | mmdet>=3.0.0rc4 \<=3.0.0rc5> | -| 1.0.0rc2 | mmcv == 2.0.0rc3 | mmcls>=1.0.0rc0 | mmdet>=3.0.0rc4 \<=3.0.0rc5> | -| 1.0.0rc1 | mmcv >= 2.0.0rc1, \<=2.0.0rc3> | mmcls>=1.0.0rc0 | Not required | -| 1.0.0rc0 | mmcv >= 2.0.0rc1, \<=2.0.0rc3> | mmcls>=1.0.0rc0 | Not required | -| master | mmcv-full>=1.4.4, \<=1.6.0 | mmcls>=0.20.1, \<=1.0.0 | Not required | -| 0.24.1 | mmcv-full>=1.4.4, \<=1.6.0 | mmcls>=0.20.1, \<=1.0.0 | Not required | -| 0.23.0 | mmcv-full>=1.4.4, \<=1.6.0 | mmcls>=0.20.1, \<=1.0.0 | Not required | -| 0.22.0 | mmcv-full>=1.4.4, \<=1.6.0 | mmcls>=0.20.1, \<=1.0.0 | Not required | -| 0.21.1 | mmcv-full>=1.4.4, \<=1.6.0 | Not required | Not required | -| 0.20.2 | mmcv-full>=1.3.13, \<=1.6.0 | Not required | Not required | -| 0.19.0 | mmcv-full>=1.3.13, \<1.3.17 | Not required | Not required | -| 0.18.0 | mmcv-full>=1.3.13, \<1.3.17 | Not required | Not required | -| 0.17.0 | mmcv-full>=1.3.7, \<1.3.17 | Not required | Not required | -| 0.16.0 | mmcv-full>=1.3.7, \<1.3.17 | Not required | Not required | -| 0.15.0 | mmcv-full>=1.3.7, \<1.3.17 | Not required | Not required | -| 0.14.1 | mmcv-full>=1.3.7, \<1.3.17 | Not required | Not required | -| 0.14.0 | mmcv-full>=1.3.1, \<1.3.2 | Not required | Not required | -| 0.13.0 | mmcv-full>=1.3.1, \<1.3.2 | Not required | Not required | -| 0.12.0 | mmcv-full>=1.1.4, \<1.3.2 | Not required | Not required | -| 0.11.0 | mmcv-full>=1.1.4, \<1.3.0 | Not required | Not required | -| 0.10.0 | mmcv-full>=1.1.4, \<1.3.0 | Not required | Not required | -| 0.9.0 | mmcv-full>=1.1.4, \<1.3.0 | Not required | Not required | -| 0.8.0 | mmcv-full>=1.1.4, \<1.2.0 | Not required | Not required | -| 0.7.0 | mmcv-full>=1.1.2, \<1.2.0 | Not required | Not required | -| 0.6.0 | mmcv-full>=1.1.2, \<1.2.0 | Not required | Not required | +| MMSegmentation version | MMCV version | MMEngine version | MMClassification (optional) version | MMDetection (optional) version | +| :--------------------: | :----------------------------: | :---------------: | :---------------------------------: | :----------------------------: | +| dev-1.x branch | mmcv >= 2.0.0rc4 | MMEngine >= 0.2.0 | mmcls>=1.0.0rc0 | mmdet>3.0.0rc5 | +| 1.x branch | mmcv >= 2.0.0rc4 | MMEngine >= 0.2.0 | mmcls>=1.0.0rc0 | mmdet>3.0.0rc5 | +| 1.0.0rc5 | mmcv >= 2.0.0rc4 | MMEngine >= 0.2.0 | mmcls>=1.0.0rc0 | mmdet>3.0.0rc5 | +| 1.0.0rc4 | mmcv == 2.0.0rc3 | MMEngine >= 0.1.0 | mmcls>=1.0.0rc0 | mmdet>=3.0.0rc4, \<=3.0.0rc5 | +| 1.0.0rc3 | mmcv == 2.0.0rc3 | MMEngine >= 0.1.0 | mmcls>=1.0.0rc0 | mmdet>=3.0.0rc4 \<=3.0.0rc5 | +| 1.0.0rc2 | mmcv == 2.0.0rc3 | MMEngine >= 0.1.0 | mmcls>=1.0.0rc0 | mmdet>=3.0.0rc4 \<=3.0.0rc5 | +| 1.0.0rc1 | mmcv >= 2.0.0rc1, \<=2.0.0rc3> | MMEngine >= 0.1.0 | mmcls>=1.0.0rc0 | Not required | +| 1.0.0rc0 | mmcv >= 2.0.0rc1, \<=2.0.0rc3> | MMEngine >= 0.1.0 | mmcls>=1.0.0rc0 | Not required | + +Notes: To install MMSegmentation 0.x and master branch, please refer to [the faq 0.x document](https://mmsegmentation.readthedocs.io/en/latest/faq.html#installation) to check compatible versions of MMCV. ## How to know the number of GPUs needed to train the model diff --git a/mmseg/__init__.py b/mmseg/__init__.py index 59380655a2..765ff4a042 100644 --- a/mmseg/__init__.py +++ b/mmseg/__init__.py @@ -7,9 +7,9 @@ from .version import __version__, version_info -MMCV_MIN = '2.0.0rc3' -MMCV_MAX = '2.0.0rc3' -MMENGINE_MIN = '0.1.0' +MMCV_MIN = '2.0.0rc4' +MMCV_MAX = '2.1.0' +MMENGINE_MIN = '0.2.0' MMENGINE_MAX = '1.0.0' @@ -58,9 +58,9 @@ def digit_version(version_str: str, length: int = 4): mmcv_version = digit_version(mmcv.__version__) -assert (mmcv_min_version <= mmcv_version <= mmcv_max_version), \ +assert (mmcv_min_version <= mmcv_version < mmcv_max_version), \ f'MMCV=={mmcv.__version__} is used but incompatible. ' \ - f'Please install mmcv==2.0.0rc3.' + f'Please install mmcv>=2.0.0rc4.' mmengine_min_version = digit_version(MMENGINE_MIN) mmengine_max_version = digit_version(MMENGINE_MAX) diff --git a/mmseg/datasets/basesegdataset.py b/mmseg/datasets/basesegdataset.py index e7f96f7d2c..bf433b2094 100644 --- a/mmseg/datasets/basesegdataset.py +++ b/mmseg/datasets/basesegdataset.py @@ -4,6 +4,7 @@ from typing import Callable, Dict, List, Optional, Sequence, Union import mmengine +import mmengine.fileio as fileio import numpy as np from mmengine.dataset import BaseDataset, Compose @@ -72,9 +73,10 @@ class BaseSegDataset(BaseDataset): ignore_index (int): The label index to be ignored. Default: 255 reduce_zero_label (bool): Whether to mark label zero as ignored. Default to False. - file_client_args (dict): Arguments to instantiate a FileClient. - See :class:`mmengine.fileio.FileClient` for details. - Defaults to ``dict(backend='disk')``. + backend_args (dict): Arguments to instantiate a file backend. + See https://mmengine.readthedocs.io/en/latest/api/fileio.htm + for details. Defaults to ``dict(backend='local')`` + Notes: mmcv>=2.0.0rc4, mmengine>=0.2.0 required. """ METAINFO: dict = dict() @@ -95,16 +97,14 @@ def __init__( max_refetch: int = 1000, ignore_index: int = 255, reduce_zero_label: bool = False, - file_client_args: dict = dict(backend='disk') + backend_args: dict = dict(backend='local') ) -> None: self.img_suffix = img_suffix self.seg_map_suffix = seg_map_suffix self.ignore_index = ignore_index self.reduce_zero_label = reduce_zero_label - self.file_client_args = file_client_args - self.file_client = mmengine.FileClient.infer_client( - self.file_client_args) + self.backend_args = backend_args.copy() self.data_root = data_root self.data_prefix = copy.copy(data_prefix) @@ -239,7 +239,7 @@ def load_data_list(self) -> List[dict]: ann_dir = self.data_prefix.get('seg_map_path', None) if osp.isfile(self.ann_file): lines = mmengine.list_from_file( - self.ann_file, file_client_args=self.file_client_args) + self.ann_file, backend_args=self.backend_args) for line in lines: img_name = line.strip() data_info = dict( @@ -252,11 +252,12 @@ def load_data_list(self) -> List[dict]: data_info['seg_fields'] = [] data_list.append(data_info) else: - for img in self.file_client.list_dir_or_file( + for img in fileio.list_dir_or_file( dir_path=img_dir, list_dir=False, suffix=self.img_suffix, - recursive=True): + recursive=True, + backend_args=self.backend_args): data_info = dict(img_path=osp.join(img_dir, img)) if ann_dir is not None: seg_map = img.replace(self.img_suffix, self.seg_map_suffix) diff --git a/mmseg/datasets/isaid.py b/mmseg/datasets/isaid.py index d75cfcb7ea..61942ec1ea 100644 --- a/mmseg/datasets/isaid.py +++ b/mmseg/datasets/isaid.py @@ -1,4 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. +import mmengine.fileio as fileio + from mmseg.registry import DATASETS from .basesegdataset import BaseSegDataset @@ -33,4 +35,5 @@ def __init__(self, seg_map_suffix=seg_map_suffix, ignore_index=ignore_index, **kwargs) - assert self.file_client.exists(self.data_prefix['img_path']) + assert fileio.exists( + self.data_prefix['img_path'], backend_args=self.backend_args) diff --git a/mmseg/datasets/transforms/loading.py b/mmseg/datasets/transforms/loading.py index 65c0dfec47..5a413717b6 100644 --- a/mmseg/datasets/transforms/loading.py +++ b/mmseg/datasets/transforms/loading.py @@ -3,7 +3,7 @@ from typing import Dict import mmcv -import mmengine +import mmengine.fileio as fileio import numpy as np from mmcv.transforms import BaseTransform from mmcv.transforms import LoadAnnotations as MMCV_LoadAnnotations @@ -54,15 +54,16 @@ class LoadAnnotations(MMCV_LoadAnnotations): argument for :func:``mmcv.imfrombytes``. See :fun:``mmcv.imfrombytes`` for details. Defaults to 'pillow'. - file_client_args (dict): Arguments to instantiate a FileClient. - See :class:``mmcv.fileio.FileClient`` for details. - Defaults to ``dict(backend='disk')``. + backend_args (dict): Arguments to instantiate a file backend. + See https://mmengine.readthedocs.io/en/latest/api/fileio.htm + for details. Defaults to ``dict(backend='local')`` + Notes: mmcv>=2.0.0rc4, mmengine>=0.2.0 required. """ def __init__( self, reduce_zero_label=None, - file_client_args=dict(backend='disk'), + backend_args=dict(backend='local'), imdecode_backend='pillow', ) -> None: super().__init__( @@ -71,14 +72,13 @@ def __init__( with_seg=True, with_keypoints=False, imdecode_backend=imdecode_backend, - file_client_args=file_client_args) + backend_args=backend_args) self.reduce_zero_label = reduce_zero_label if self.reduce_zero_label is not None: warnings.warn('`reduce_zero_label` will be deprecated, ' 'if you would like to ignore the zero label, please ' 'set `reduce_zero_label=True` when dataset ' 'initialized') - self.file_client_args = file_client_args.copy() self.imdecode_backend = imdecode_backend def _load_seg_map(self, results: dict) -> None: @@ -91,7 +91,8 @@ def _load_seg_map(self, results: dict) -> None: dict: The dict contains loaded semantic segmentation annotations. """ - img_bytes = self.file_client.get(results['seg_map_path']) + img_bytes = fileio.get( + results['seg_map_path'], backend_args=self.backend_args) gt_semantic_seg = mmcv.imfrombytes( img_bytes, flag='unchanged', backend=self.imdecode_backend).squeeze().astype(np.uint8) @@ -121,9 +122,9 @@ def _load_seg_map(self, results: dict) -> None: def __repr__(self) -> str: repr_str = self.__class__.__name__ - repr_str += f'(reduce_zero_label={self.reduce_zero_label},' - repr_str += f"imdecode_backend='{self.imdecode_backend}')" - repr_str += f'file_client_args={self.file_client_args})' + repr_str += f'(reduce_zero_label={self.reduce_zero_label}, ' + repr_str += f"imdecode_backend='{self.imdecode_backend}', " + repr_str += f'backend_args={self.backend_args})' return repr_str @@ -202,9 +203,10 @@ class LoadBiomedicalImageFromFile(BaseTransform): to_float32 (bool): Whether to convert the loaded image to a float32 numpy array. If set to False, the loaded image is an float64 array. Defaults to True. - file_client_args (dict): Arguments to instantiate a FileClient. - See :class:`mmengine.fileio.FileClient` for details. - Defaults to ``dict(backend='disk')``. + backend_args (dict): Arguments to instantiate a file backend. + See https://mmengine.readthedocs.io/en/latest/api/fileio.htm + for details. Defaults to ``dict(backend='local')`` + Notes: mmcv>=2.0.0rc4, mmengine>=0.2.0 required. """ def __init__( @@ -212,13 +214,12 @@ def __init__( decode_backend: str = 'nifti', to_xyz: bool = False, to_float32: bool = True, - file_client_args: dict = dict(backend='disk') + backend_args: dict = dict(backend='local') ) -> None: self.decode_backend = decode_backend self.to_xyz = to_xyz self.to_float32 = to_float32 - self.file_client_args = file_client_args.copy() - self.file_client = mmengine.FileClient(**self.file_client_args) + self.backend_args = backend_args.copy() def transform(self, results: Dict) -> Dict: """Functions to load image. @@ -232,7 +233,7 @@ def transform(self, results: Dict) -> Dict: filename = results['img_path'] - data_bytes = self.file_client.get(filename) + data_bytes = fileio.get(filename, self.backend_args) img = datafrombytes(data_bytes, backend=self.decode_backend) if self.to_float32: @@ -257,7 +258,7 @@ def __repr__(self): f"decode_backend='{self.decode_backend}', " f'to_xyz={self.to_xyz}, ' f'to_float32={self.to_float32}, ' - f'file_client_args={self.file_client_args})') + f'backend_args={self.backend_args})') return repr_str @@ -294,9 +295,10 @@ class LoadBiomedicalAnnotation(BaseTransform): to_float32 (bool): Whether to convert the loaded seg map to a float32 numpy array. If set to False, the loaded image is an float64 array. Defaults to True. - file_client_args (dict): Arguments to instantiate a FileClient. - See :class:`mmengine.fileio.FileClient` for details. - Defaults to ``dict(backend='disk')``. + backend_args (dict): Arguments to instantiate a file backend. + See :class:`mmengine.fileio` for details. + Defaults to ``dict(backend='local')``. + Notes: mmcv>=2.0.0rc4, mmengine>=0.2.0 required. """ def __init__( @@ -304,14 +306,13 @@ def __init__( decode_backend: str = 'nifti', to_xyz: bool = False, to_float32: bool = True, - file_client_args: dict = dict(backend='disk') + backend_args: dict = dict(backend='local') ) -> None: super().__init__() self.decode_backend = decode_backend self.to_xyz = to_xyz self.to_float32 = to_float32 - self.file_client_args = file_client_args.copy() - self.file_client = mmengine.FileClient(**self.file_client_args) + self.backend_args = backend_args.copy() def transform(self, results: Dict) -> Dict: """Functions to load image. @@ -322,7 +323,7 @@ def transform(self, results: Dict) -> Dict: Returns: dict: The dict contains loaded image and meta information. """ - data_bytes = self.file_client.get(results['seg_map_path']) + data_bytes = fileio.get(results['seg_map_path'], self.backend_args) gt_seg_map = datafrombytes(data_bytes, backend=self.decode_backend) if self.to_float32: @@ -342,7 +343,7 @@ def __repr__(self): f"decode_backend='{self.decode_backend}', " f'to_xyz={self.to_xyz}, ' f'to_float32={self.to_float32}, ' - f'file_client_args={self.file_client_args})') + f'backend_args={self.backend_args})') return repr_str @@ -383,9 +384,10 @@ class LoadBiomedicalData(BaseTransform): backend is 'nifti'. Defaults to 'nifti'. to_xyz (bool): Whether transpose data from Z, Y, X to X, Y, Z. Defaults to False. - file_client_args (dict): Arguments to instantiate a FileClient. - See :class:`mmengine.fileio.FileClient` for details. - Defaults to ``dict(backend='disk')``. + backend_args (dict): Arguments to instantiate a file backend. + See https://mmengine.readthedocs.io/en/latest/api/fileio.htm + for details. Defaults to ``dict(backend='local')`` + Notes: mmcv>=2.0.0rc4, mmengine>=0.2.0 required. """ def __init__( @@ -393,13 +395,12 @@ def __init__( with_seg=False, decode_backend: str = 'numpy', to_xyz: bool = False, - file_client_args: dict = dict(backend='disk') - ) -> None: + backend_args: dict = dict(backend='local') + ) -> None: # noqa self.with_seg = with_seg self.decode_backend = decode_backend self.to_xyz = to_xyz - self.file_client_args = file_client_args.copy() - self.file_client = mmengine.FileClient(**self.file_client_args) + self.backend_args = backend_args.copy() def transform(self, results: Dict) -> Dict: """Functions to load image. @@ -410,7 +411,7 @@ def transform(self, results: Dict) -> Dict: Returns: dict: The dict contains loaded image and meta information. """ - data_bytes = self.file_client.get(results['img_path']) + data_bytes = fileio.get(results['img_path'], self.backend_args) data = datafrombytes(data_bytes, backend=self.decode_backend) # img is 4D data (N, X, Y, Z), N is the number of protocol img = data[:-1, :] @@ -440,5 +441,5 @@ def __repr__(self) -> str: f'with_seg={self.with_seg}, ' f"decode_backend='{self.decode_backend}', " f'to_xyz={self.to_xyz}, ' - f'file_client_args={self.file_client_args})') + f'backend_args={self.backend_args})') return repr_str diff --git a/mmseg/datasets/voc.py b/mmseg/datasets/voc.py index 66f2230788..5e5d6025c0 100644 --- a/mmseg/datasets/voc.py +++ b/mmseg/datasets/voc.py @@ -1,6 +1,8 @@ # Copyright (c) OpenMMLab. All rights reserved. import os.path as osp +import mmengine.fileio as fileio + from mmseg.registry import DATASETS from .basesegdataset import BaseSegDataset @@ -34,5 +36,5 @@ def __init__(self, seg_map_suffix=seg_map_suffix, ann_file=ann_file, **kwargs) - assert self.file_client.exists( - self.data_prefix['img_path']) and osp.isfile(self.ann_file) + assert fileio.exists(self.data_prefix['img_path'], + self.backend_args) and osp.isfile(self.ann_file) diff --git a/mmseg/engine/hooks/visualization_hook.py b/mmseg/engine/hooks/visualization_hook.py index 5388a659a8..25aa1cf8b5 100644 --- a/mmseg/engine/hooks/visualization_hook.py +++ b/mmseg/engine/hooks/visualization_hook.py @@ -4,7 +4,7 @@ from typing import Sequence import mmcv -from mmengine.fileio import FileClient +import mmengine.fileio as fileio from mmengine.hooks import Hook from mmengine.runner import Runner @@ -30,9 +30,10 @@ class SegVisualizationHook(Hook): interval (int): The interval of visualization. Defaults to 50. show (bool): Whether to display the drawn image. Default to False. wait_time (float): The interval of show (s). Defaults to 0. - file_client_args (dict): Arguments to instantiate a FileClient. - See :class:`mmengine.fileio.FileClient` for details. - Defaults to ``dict(backend='disk')``. + backend_args (dict): Arguments to instantiate a file backend. + See https://mmengine.readthedocs.io/en/latest/api/fileio.htm + for details. Defaults to ``dict(backend='local')`` + Notes: mmcv>=2.0.0rc4, mmengine>=0.2.0 required. """ def __init__(self, @@ -40,7 +41,7 @@ def __init__(self, interval: int = 50, show: bool = False, wait_time: float = 0., - file_client_args: dict = dict(backend='disk')): + backend_args: dict = dict(backend='local')): self._visualizer: SegLocalVisualizer = \ SegLocalVisualizer.get_current_instance() self.interval = interval @@ -54,8 +55,7 @@ def __init__(self, 'needs to be excluded.') self.wait_time = wait_time - self.file_client_args = file_client_args.copy() - self.file_client = None + self.backend_args = backend_args.copy() self.draw = draw if not self.draw: warnings.warn('The draw is False, it means that the ' @@ -81,13 +81,11 @@ def _after_iter(self, if self.draw is False or mode == 'train': return - if self.file_client is None: - self.file_client = FileClient(**self.file_client_args) - if self.every_n_inner_iters(batch_idx, self.interval): for output in outputs: img_path = output.img_path - img_bytes = self.file_client.get(img_path) + img_bytes = fileio.get( + img_path, backend_args=self.backend_args) img = mmcv.imfrombytes(img_bytes, channel_order='rgb') window_name = f'{mode}_{osp.basename(img_path)}' diff --git a/mmseg/models/decode_heads/decode_head.py b/mmseg/models/decode_heads/decode_head.py index 0803715f82..8bdbb24a1c 100644 --- a/mmseg/models/decode_heads/decode_head.py +++ b/mmseg/models/decode_heads/decode_head.py @@ -263,7 +263,7 @@ def loss(self, inputs: Tuple[Tensor], batch_data_samples: SampleList, return losses def predict(self, inputs: Tuple[Tensor], batch_img_metas: List[dict], - test_cfg: ConfigType) -> List[Tensor]: + test_cfg: ConfigType) -> Tensor: """Forward function for prediction. Args: @@ -276,7 +276,7 @@ def predict(self, inputs: Tuple[Tensor], batch_img_metas: List[dict], test_cfg (dict): The testing config. Returns: - List[Tensor]: Outputs segmentation logits map. + Tensor: Outputs segmentation logits map. """ seg_logits = self.forward(inputs) diff --git a/mmseg/models/segmentors/base.py b/mmseg/models/segmentors/base.py index d9ffeceb39..25487de5ab 100644 --- a/mmseg/models/segmentors/base.py +++ b/mmseg/models/segmentors/base.py @@ -126,7 +126,7 @@ def _forward(self, def postprocess_result(self, seg_logits: Tensor, - data_samples: OptSampleList = None) -> list: + data_samples: OptSampleList = None) -> SampleList: """ Convert results list to `SegDataSample`. Args: seg_logits (Tensor): The segmentation results, seg_logits from diff --git a/mmseg/models/segmentors/cascade_encoder_decoder.py b/mmseg/models/segmentors/cascade_encoder_decoder.py index f76e66f931..c932b43069 100644 --- a/mmseg/models/segmentors/cascade_encoder_decoder.py +++ b/mmseg/models/segmentors/cascade_encoder_decoder.py @@ -70,7 +70,7 @@ def _init_decode_head(self, decode_head: ConfigType) -> None: self.num_classes = self.decode_head[-1].num_classes def encode_decode(self, inputs: Tensor, - batch_img_metas: List[dict]) -> List[Tensor]: + batch_img_metas: List[dict]) -> Tensor: """Encode images with backbone and decode into a semantic segmentation map of the same size as input.""" x = self.extract_feat(inputs) diff --git a/mmseg/models/segmentors/encoder_decoder.py b/mmseg/models/segmentors/encoder_decoder.py index c4f44ba005..0a8db3ec7d 100644 --- a/mmseg/models/segmentors/encoder_decoder.py +++ b/mmseg/models/segmentors/encoder_decoder.py @@ -120,7 +120,7 @@ def extract_feat(self, inputs: Tensor) -> List[Tensor]: return x def encode_decode(self, inputs: Tensor, - batch_img_metas: List[dict]) -> List[Tensor]: + batch_img_metas: List[dict]) -> Tensor: """Encode images with backbone and decode into a semantic segmentation map of the same size as input.""" x = self.extract_feat(inputs) diff --git a/mmseg/utils/__init__.py b/mmseg/utils/__init__.py index 661796147d..cb1436c198 100644 --- a/mmseg/utils/__init__.py +++ b/mmseg/utils/__init__.py @@ -13,9 +13,9 @@ from .io import datafrombytes from .misc import add_prefix, stack_batch from .set_env import register_all_modules -from .typing import (ConfigType, ForwardResults, MultiConfig, OptConfigType, - OptMultiConfig, OptSampleList, SampleList, TensorDict, - TensorList) +from .typing_utils import (ConfigType, ForwardResults, MultiConfig, + OptConfigType, OptMultiConfig, OptSampleList, + SampleList, TensorDict, TensorList) __all__ = [ 'collect_env', 'register_all_modules', 'stack_batch', 'add_prefix', diff --git a/mmseg/utils/misc.py b/mmseg/utils/misc.py index 09d2349c15..0a561732e9 100644 --- a/mmseg/utils/misc.py +++ b/mmseg/utils/misc.py @@ -5,7 +5,7 @@ import torch import torch.nn.functional as F -from .typing import SampleList +from .typing_utils import SampleList def add_prefix(inputs, prefix): diff --git a/mmseg/utils/typing.py b/mmseg/utils/typing_utils.py similarity index 100% rename from mmseg/utils/typing.py rename to mmseg/utils/typing_utils.py diff --git a/mmseg/version.py b/mmseg/version.py index ae61f8bf7b..10ceca8120 100644 --- a/mmseg/version.py +++ b/mmseg/version.py @@ -1,6 +1,6 @@ # Copyright (c) Open-MMLab. All rights reserved. -__version__ = '1.0.0rc4' +__version__ = '1.0.0rc5' def parse_version_info(version_str): diff --git a/requirements/mminstall.txt b/requirements/mminstall.txt index 2c8e9d6a22..11a6d5a57f 100644 --- a/requirements/mminstall.txt +++ b/requirements/mminstall.txt @@ -1,4 +1,4 @@ mmcls>=1.0.0rc0 -mmcv==2.0.0rc3 -mmdet==3.0.0rc5 -mmengine>=0.1.0,<1.0.0 +mmcv>=2.0.0rc4 +-e git+https://github.com/open-mmlab/mmdetection.git@dev-3.x#egg=mmdet +mmengine>=0.2.0,<1.0.0 diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py index c768f09ade..7c37204a6c 100644 --- a/tests/test_datasets/test_dataset.py +++ b/tests/test_datasets/test_dataset.py @@ -2,7 +2,6 @@ import os import os.path as osp import tempfile -from unittest.mock import MagicMock import pytest @@ -300,17 +299,15 @@ def test_lip(): def test_custom_classes_override_default(dataset, classes): dataset_class = DATASETS.get(dataset) - if isinstance(dataset_class, PascalVOCDataset): - tmp_file = tempfile.NamedTemporaryFile() - ann_file = f'{tmp_file.name}.txt' - else: - ann_file = MagicMock() - original_classes = dataset_class.METAINFO.get('classes', None) + tmp_file = tempfile.NamedTemporaryFile() + ann_file = tmp_file.name + img_path = tempfile.mkdtemp() + # Test setting classes as a tuple custom_dataset = dataset_class( - data_prefix=dict(img_path=MagicMock()), + data_prefix=dict(img_path=img_path), ann_file=ann_file, metainfo=dict(classes=classes), test_mode=True, @@ -323,7 +320,7 @@ def test_custom_classes_override_default(dataset, classes): # Test setting classes as a list custom_dataset = dataset_class( - data_prefix=dict(img_path=MagicMock()), + data_prefix=dict(img_path=img_path), ann_file=ann_file, metainfo=dict(classes=list(classes)), test_mode=True, @@ -337,7 +334,7 @@ def test_custom_classes_override_default(dataset, classes): # Test overriding not a subset custom_dataset = dataset_class( ann_file=ann_file, - data_prefix=dict(img_path=MagicMock()), + data_prefix=dict(img_path=img_path), metainfo=dict(classes=[classes[0]]), test_mode=True, lazy_init=True) @@ -352,13 +349,13 @@ def test_custom_classes_override_default(dataset, classes): with pytest.raises(AssertionError): custom_dataset = dataset_class( ann_file=ann_file, - data_prefix=dict(img_path=MagicMock()), + data_prefix=dict(img_path=img_path), metainfo=None, test_mode=True, lazy_init=True) else: custom_dataset = dataset_class( - data_prefix=dict(img_path=MagicMock()), + data_prefix=dict(img_path=img_path), ann_file=ann_file, metainfo=None, test_mode=True, @@ -371,8 +368,8 @@ def test_custom_classes_override_default(dataset, classes): def test_custom_dataset_random_palette_is_generated(): dataset = BaseSegDataset( pipeline=[], - data_prefix=dict(img_path=MagicMock()), - ann_file=MagicMock(), + data_prefix=dict(img_path=tempfile.mkdtemp()), + ann_file=tempfile.mkdtemp(), metainfo=dict(classes=('bus', 'car')), lazy_init=True, test_mode=True) @@ -384,8 +381,8 @@ def test_custom_dataset_random_palette_is_generated(): def test_custom_dataset_custom_palette(): dataset = BaseSegDataset( - data_prefix=dict(img_path=MagicMock()), - ann_file=MagicMock(), + data_prefix=dict(img_path=tempfile.mkdtemp()), + ann_file=tempfile.mkdtemp(), metainfo=dict( classes=('bus', 'car'), palette=[[100, 100, 100], [200, 200, 200]]), @@ -396,7 +393,7 @@ def test_custom_dataset_custom_palette(): # test custom class and palette don't match with pytest.raises(ValueError): dataset = BaseSegDataset( - data_prefix=dict(img_path=MagicMock()), - ann_file=MagicMock(), + data_prefix=dict(img_path=tempfile.mkdtemp()), + ann_file=tempfile.mkdtemp(), metainfo=dict(classes=('bus', 'car'), palette=[[200, 200, 200]]), lazy_init=True) diff --git a/tests/test_datasets/test_loading.py b/tests/test_datasets/test_loading.py index 3d5569682a..100eb042e2 100644 --- a/tests/test_datasets/test_loading.py +++ b/tests/test_datasets/test_loading.py @@ -30,7 +30,7 @@ def test_load_img(self): assert results['ori_shape'] == results['img'].shape[:2] assert repr(transform) == transform.__class__.__name__ + \ "(ignore_empty=False, to_float32=False, color_type='color'," + \ - " imdecode_backend='cv2', file_client_args={'backend': 'disk'})" + " imdecode_backend='cv2', backend_args=None)" # to_float32 transform = LoadImageFromFile(to_float32=True) @@ -57,9 +57,9 @@ def test_load_seg(self): results = transform(copy.deepcopy(results)) assert results['gt_seg_map'].shape == (288, 512) assert results['gt_seg_map'].dtype == np.uint8 - assert repr(transform) == transform.__class__.__name__ + \ - "(reduce_zero_label=True,imdecode_backend='pillow')" + \ - "file_client_args={'backend': 'disk'})" + # assert repr(transform) == transform.__class__.__name__ + \ + # "(reduce_zero_label=True, imdecode_backend='pillow', " + \ + # "backend_args={'backend': 'local'})" # reduce_zero_label transform = LoadAnnotations(reduce_zero_label=True) @@ -225,7 +225,7 @@ def test_load_image_from_ndarray(self): 'to_float32=False, ' "color_type='color', " "imdecode_backend='cv2', " - "file_client_args={'backend': 'disk'})") + 'backend_args=None)') def test_load_biomedical_img(self): results = dict( @@ -241,7 +241,7 @@ def test_load_biomedical_img(self): "decode_backend='nifti', " 'to_xyz=False, ' 'to_float32=True, ' - "file_client_args={'backend': 'disk'})") + "backend_args={'backend': 'local'})") def test_load_biomedical_annotation(self): results = dict( @@ -265,7 +265,7 @@ def test_load_biomedical_data(self): 'with_seg=True, ' "decode_backend='numpy', " 'to_xyz=False, ' - "file_client_args={'backend': 'disk'})") + "backend_args={'backend': 'local'})") transform = LoadBiomedicalData(with_seg=False) results = transform(copy.deepcopy(input_results)) @@ -275,4 +275,4 @@ def test_load_biomedical_data(self): 'with_seg=False, ' "decode_backend='numpy', " 'to_xyz=False, ' - "file_client_args={'backend': 'disk'})") + "backend_args={'backend': 'local'})") diff --git a/tests/test_models/test_heads/test_mask2former_head.py b/tests/test_models/test_heads/test_mask2former_head.py index 079e94ed97..45b353d441 100644 --- a/tests/test_models/test_heads/test_mask2former_head.py +++ b/tests/test_models/test_heads/test_mask2former_head.py @@ -25,65 +25,58 @@ def test_mask2former_head(): num_outs=3, norm_cfg=dict(type='GN', num_groups=32), act_cfg=dict(type='ReLU'), - encoder=dict( - type='mmdet.DetrTransformerEncoder', + encoder=dict( # DeformableDetrTransformerEncoder num_layers=6, - transformerlayers=dict( - type='mmdet.BaseTransformerLayer', - attn_cfgs=dict( - type='mmdet.MultiScaleDeformableAttention', + layer_cfg=dict( # DeformableDetrTransformerEncoderLayer + self_attn_cfg=dict( # MultiScaleDeformableAttention embed_dims=256, num_heads=8, num_levels=3, num_points=4, im2col_step=64, dropout=0.0, - batch_first=False, + batch_first=True, norm_cfg=None, init_cfg=None), - ffn_cfgs=dict( - type='FFN', + ffn_cfg=dict( embed_dims=256, feedforward_channels=1024, num_fcs=2, ffn_drop=0.0, - act_cfg=dict(type='ReLU', inplace=True)), - operation_order=('self_attn', 'norm', 'ffn', 'norm')), + act_cfg=dict(type='ReLU', inplace=True))), init_cfg=None), - positional_encoding=dict( - type='mmdet.SinePositionalEncoding', - num_feats=128, - normalize=True), + positional_encoding=dict( # SinePositionalEncoding + num_feats=128, normalize=True), init_cfg=None), enforce_decoder_input_project=False, - positional_encoding=dict( - type='mmdet.SinePositionalEncoding', num_feats=128, - normalize=True), - transformer_decoder=dict( - type='mmdet.DetrTransformerDecoder', + positional_encoding=dict( # SinePositionalEncoding + num_feats=128, normalize=True), + transformer_decoder=dict( # Mask2FormerTransformerDecoder return_intermediate=True, num_layers=9, - transformerlayers=dict( - type='mmdet.DetrTransformerDecoderLayer', - attn_cfgs=dict( - type='mmdet.MultiheadAttention', + layer_cfg=dict( # Mask2FormerTransformerDecoderLayer + self_attn_cfg=dict( # MultiheadAttention embed_dims=256, num_heads=8, attn_drop=0.0, proj_drop=0.0, dropout_layer=None, - batch_first=False), - ffn_cfgs=dict( + batch_first=True), + cross_attn_cfg=dict( # MultiheadAttention + embed_dims=256, + num_heads=8, + attn_drop=0.0, + proj_drop=0.0, + dropout_layer=None, + batch_first=True), + ffn_cfg=dict( embed_dims=256, feedforward_channels=2048, num_fcs=2, act_cfg=dict(type='ReLU', inplace=True), ffn_drop=0.0, dropout_layer=None, - add_identity=True), - feedforward_channels=2048, - operation_order=('cross_attn', 'norm', 'self_attn', 'norm', - 'ffn', 'norm')), + add_identity=True)), init_cfg=None), loss_cls=dict( type='mmdet.CrossEntropyLoss',