diff --git a/.dev/md2yml.py b/.dev/md2yml.py deleted file mode 100755 index fc9c67e470..0000000000 --- a/.dev/md2yml.py +++ /dev/null @@ -1,317 +0,0 @@ -#!/usr/bin/env python - -# Copyright (c) OpenMMLab. All rights reserved. -# This tool is used to update model-index.yml which is required by MIM, and -# will be automatically called as a pre-commit hook. The updating will be -# triggered if any change of model information (.md files in configs/) has been -# detected before a commit. - -import glob -import os -import os.path as osp -import re -import sys - -from lxml import etree -from mmengine.fileio import dump - -MMSEG_ROOT = osp.dirname(osp.dirname(osp.dirname(__file__))) - -COLLECTIONS = [ - 'ANN', 'APCNet', 'BiSeNetV1', 'BiSeNetV2', 'CCNet', 'CGNet', 'DANet', - 'DeepLabV3', 'DeepLabV3+', 'DMNet', 'DNLNet', 'DPT', 'EMANet', 'EncNet', - 'ERFNet', 'FastFCN', 'FastSCNN', 'FCN', 'GCNet', 'ICNet', 'ISANet', 'KNet', - 'NonLocalNet', 'OCRNet', 'PointRend', 'PSANet', 'PSPNet', 'Segformer', - 'Segmenter', 'FPN', 'SETR', 'STDC', 'UNet', 'UPerNet' -] -COLLECTIONS_TEMP = [] - - -def dump_yaml_and_check_difference(obj, filename, sort_keys=False): - """Dump object to a yaml file, and check if the file content is different - from the original. - - Args: - obj (any): The python object to be dumped. - filename (str): YAML filename to dump the object to. - sort_keys (str); Sort key by dictionary order. - Returns: - Bool: If the target YAML file is different from the original. - """ - - str_dump = dump(obj, None, file_format='yaml', sort_keys=sort_keys) - if osp.isfile(filename): - file_exists = True - with open(filename, encoding='utf-8') as f: - str_orig = f.read() - else: - file_exists = False - str_orig = None - - if file_exists and str_orig == str_dump: - is_different = False - else: - is_different = True - with open(filename, 'w', encoding='utf-8') as f: - f.write(str_dump) - - return is_different - - -def parse_md(md_file): - """Parse .md file and convert it to a .yml file which can be used for MIM. - - Args: - md_file (str): Path to .md file. - Returns: - Bool: If the target YAML file is different from the original. - """ - collection_name = osp.split(osp.dirname(md_file))[1] - configs = os.listdir(osp.dirname(md_file)) - - collection = dict( - Name=collection_name, - Metadata={'Training Data': []}, - Paper={ - 'URL': '', - 'Title': '' - }, - README=md_file, - Code={ - 'URL': '', - 'Version': '' - }) - collection.update({'Converted From': {'Weights': '', 'Code': ''}}) - models = [] - datasets = [] - paper_url = None - paper_title = None - code_url = None - code_version = None - repo_url = None - - # To avoid re-counting number of backbone model in OpenMMLab, - # if certain model in configs folder is backbone whose name is already - # recorded in MMClassification, then the `COLLECTION` dict of this model - # in MMSegmentation should be deleted, and `In Collection` in `Models` - # should be set with head or neck of this config file. - is_backbone = None - - with open(md_file, encoding='UTF-8') as md: - lines = md.readlines() - i = 0 - current_dataset = '' - while i < len(lines): - line = lines[i].strip() - # In latest README.md the title and url are in the third line. - if i == 2: - paper_url = lines[i].split('](')[1].split(')')[0] - paper_title = lines[i].split('](')[0].split('[')[1] - if len(line) == 0: - i += 1 - continue - elif line[:3] == ' List[str]: + """Get the list of collection names.""" + collection_name_list: List[str] = [] + for md_file in md_file_list: + with open(md_file) as f: + lines = f.readlines() + collection_name = lines[0].split('#')[1].strip() + collection_name_list.append(collection_name) + return collection_name_list + + +def get_md_file_list() -> Tuple[List[str], List[str]]: + """Get the list of md files.""" + md_file_list: List[str] = [] + md_dir_list: List[str] = [] + for root, _, files in os.walk(osp.join(MMSEG_ROOT, 'configs')): + for file in files: + if file.endswith('.md'): + md_file_list.append(osp.join(root, file)) + md_dir_list.append(root) + break + return md_file_list, md_dir_list + + +def get_model_info(md_file: str, config_dir: str, + collection_name_list: List[str]) -> Tuple[dict, str]: + """Get model information from md file.""" + datasets: List[str] = [] + models: List[dict] = [] + current_dataset: str = '' + paper_name: str = '' + paper_url: str = '' + code_url: str = '' + is_backbone: bool = False + is_dataset: bool = False + collection_name: str = '' + with open(md_file) as f: + lines: List[str] = f.readlines() + i: int = 0 + + while i < len(lines): + line: str = lines[i].strip() + if len(line) == 0: + i += 1 + continue + # get paper name and url + if re.match(r'> \[.*\]+\([a-zA-Z]+://[^\s]*\)', line): + paper_info = line.split('](') + paper_name = paper_info[0][paper_info[0].index('[') + 1:] + paper_url = paper_info[1][:len(paper_info[1]) - 1] + + # get code info + if 'Code Snippet' in line: + code_url = line.split('"')[1].split('"')[0] + + if line.startswith(' -
- +
+DEEPLABv3_ResNet-D8 +DEEPLABv3_ResNet-D8 model structure
-## Citation - -```bibtext -@article{chen2017rethinking, - title={Rethinking atrous convolution for semantic image segmentation}, - author={Chen, Liang-Chieh and Papandreou, George and Schroff, Florian and Adam, Hartwig}, - journal={arXiv preprint arXiv:1706.05587}, - year={2017} -} -``` - ## Results and models ### Cityscapes -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ---------------- | --------------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| DeepLabV3 | R-50-D8 | 512x1024 | 40000 | 6.1 | 2.57 | 79.09 | 80.45 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes/deeplabv3_r50-d8_512x1024_40k_cityscapes_20200605_022449-acadc2f8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes/deeplabv3_r50-d8_512x1024_40k_cityscapes_20200605_022449.log.json) | -| DeepLabV3 | R-101-D8 | 512x1024 | 40000 | 9.6 | 1.92 | 77.12 | 79.61 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes/deeplabv3_r101-d8_512x1024_40k_cityscapes_20200605_012241-7fd3f799.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes/deeplabv3_r101-d8_512x1024_40k_cityscapes_20200605_012241.log.json) | -| DeepLabV3 | R-50-D8 | 769x769 | 40000 | 6.9 | 1.11 | 78.58 | 79.89 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes/deeplabv3_r50-d8_769x769_40k_cityscapes_20200606_113723-7eda553c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes/deeplabv3_r50-d8_769x769_40k_cityscapes_20200606_113723.log.json) | -| DeepLabV3 | R-101-D8 | 769x769 | 40000 | 10.9 | 0.83 | 79.27 | 80.11 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes/deeplabv3_r101-d8_769x769_40k_cityscapes_20200606_113809-c64f889f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes/deeplabv3_r101-d8_769x769_40k_cityscapes_20200606_113809.log.json) | -| DeepLabV3 | R-18-D8 | 512x1024 | 80000 | 1.7 | 13.78 | 76.70 | 78.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r18-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_512x1024_80k_cityscapes/deeplabv3_r18-d8_512x1024_80k_cityscapes_20201225_021506-23dffbe2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_512x1024_80k_cityscapes/deeplabv3_r18-d8_512x1024_80k_cityscapes-20201225_021506.log.json) | -| DeepLabV3 | R-50-D8 | 512x1024 | 80000 | - | - | 79.32 | 80.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes/deeplabv3_r50-d8_512x1024_80k_cityscapes_20200606_113404-b92cfdd4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes/deeplabv3_r50-d8_512x1024_80k_cityscapes_20200606_113404.log.json) | -| DeepLabV3 | R-101-D8 | 512x1024 | 80000 | - | - | 80.20 | 81.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes/deeplabv3_r101-d8_512x1024_80k_cityscapes_20200606_113503-9e428899.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes/deeplabv3_r101-d8_512x1024_80k_cityscapes_20200606_113503.log.json) | -| DeepLabV3 (FP16) | R-101-D8 | 512x1024 | 80000 | 5.75 | 3.86 | 80.48 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920-774d9cec.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920.log.json) | -| DeepLabV3 | R-18-D8 | 769x769 | 80000 | 1.9 | 5.55 | 76.60 | 78.26 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r18-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_769x769_80k_cityscapes/deeplabv3_r18-d8_769x769_80k_cityscapes_20201225_021506-6452126a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_769x769_80k_cityscapes/deeplabv3_r18-d8_769x769_80k_cityscapes-20201225_021506.log.json) | -| DeepLabV3 | R-50-D8 | 769x769 | 80000 | - | - | 79.89 | 81.06 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes/deeplabv3_r50-d8_769x769_80k_cityscapes_20200606_221338-788d6228.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes/deeplabv3_r50-d8_769x769_80k_cityscapes_20200606_221338.log.json) | -| DeepLabV3 | R-101-D8 | 769x769 | 80000 | - | - | 79.67 | 80.81 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes/deeplabv3_r101-d8_769x769_80k_cityscapes_20200607_013353-60e95418.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes/deeplabv3_r101-d8_769x769_80k_cityscapes_20200607_013353.log.json) | -| DeepLabV3 | R-101-D16-MG124 | 512x1024 | 40000 | 4.7 | - 6.96 | 76.71 | 78.63 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r101-d16-mg124_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes_20200908_005644-67b0c992.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes-20200908_005644.log.json) | -| DeepLabV3 | R-101-D16-MG124 | 512x1024 | 80000 | - | - | 78.36 | 79.84 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r101-d16-mg124_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes_20200908_005644-57bb8425.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes-20200908_005644.log.json) | -| DeepLabV3 | R-18b-D8 | 512x1024 | 80000 | 1.6 | 13.93 | 76.26 | 77.88 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r18b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes/deeplabv3_r18b-d8_512x1024_80k_cityscapes_20201225_094144-46040cef.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes/deeplabv3_r18b-d8_512x1024_80k_cityscapes-20201225_094144.log.json) | -| DeepLabV3 | R-50b-D8 | 512x1024 | 80000 | 6.0 | 2.74 | 79.63 | 80.98 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r50b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes/deeplabv3_r50b-d8_512x1024_80k_cityscapes_20201225_155148-ec368954.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes/deeplabv3_r50b-d8_512x1024_80k_cityscapes-20201225_155148.log.json) | -| DeepLabV3 | R-101b-D8 | 512x1024 | 80000 | 9.5 | 1.81 | 80.01 | 81.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r101b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes/deeplabv3_r101b-d8_512x1024_80k_cityscapes_20201226_171821-8fd49503.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes/deeplabv3_r101b-d8_512x1024_80k_cityscapes-20201226_171821.log.json) | -| DeepLabV3 | R-18b-D8 | 769x769 | 80000 | 1.8 | 5.79 | 75.63 | 77.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r18b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes/deeplabv3_r18b-d8_769x769_80k_cityscapes_20201225_094144-fdc985d9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes/deeplabv3_r18b-d8_769x769_80k_cityscapes-20201225_094144.log.json) | -| DeepLabV3 | R-50b-D8 | 769x769 | 80000 | 6.8 | 1.16 | 78.80 | 80.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r50b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes/deeplabv3_r50b-d8_769x769_80k_cityscapes_20201225_155404-87fb0cf4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes/deeplabv3_r50b-d8_769x769_80k_cityscapes-20201225_155404.log.json) | -| DeepLabV3 | R-101b-D8 | 769x769 | 80000 | 10.7 | 0.82 | 79.41 | 80.73 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r101b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes/deeplabv3_r101b-d8_769x769_80k_cityscapes_20201226_190843-9142ee57.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes/deeplabv3_r101b-d8_769x769_80k_cityscapes-20201226_190843.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------------- | --------------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| DeepLabV3 | R-50-D8 | 512x1024 | 40000 | 6.1 | 2.57 | V100 | 79.09 | 80.45 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes/deeplabv3_r50-d8_512x1024_40k_cityscapes_20200605_022449-acadc2f8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes/deeplabv3_r50-d8_512x1024_40k_cityscapes_20200605_022449.log.json) | +| DeepLabV3 | R-101-D8 | 512x1024 | 40000 | 9.6 | 1.92 | V100 | 77.12 | 79.61 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes/deeplabv3_r101-d8_512x1024_40k_cityscapes_20200605_012241-7fd3f799.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes/deeplabv3_r101-d8_512x1024_40k_cityscapes_20200605_012241.log.json) | +| DeepLabV3 | R-50-D8 | 769x769 | 40000 | 6.9 | 1.11 | V100 | 78.58 | 79.89 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes/deeplabv3_r50-d8_769x769_40k_cityscapes_20200606_113723-7eda553c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes/deeplabv3_r50-d8_769x769_40k_cityscapes_20200606_113723.log.json) | +| DeepLabV3 | R-101-D8 | 769x769 | 40000 | 10.9 | 0.83 | V100 | 79.27 | 80.11 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes/deeplabv3_r101-d8_769x769_40k_cityscapes_20200606_113809-c64f889f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes/deeplabv3_r101-d8_769x769_40k_cityscapes_20200606_113809.log.json) | +| DeepLabV3 | R-18-D8 | 512x1024 | 80000 | 1.7 | 13.78 | V100 | 76.70 | 78.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r18-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_512x1024_80k_cityscapes/deeplabv3_r18-d8_512x1024_80k_cityscapes_20201225_021506-23dffbe2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_512x1024_80k_cityscapes/deeplabv3_r18-d8_512x1024_80k_cityscapes-20201225_021506.log.json) | +| DeepLabV3 | R-50-D8 | 512x1024 | 80000 | - | - | V100 | 79.32 | 80.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes/deeplabv3_r50-d8_512x1024_80k_cityscapes_20200606_113404-b92cfdd4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes/deeplabv3_r50-d8_512x1024_80k_cityscapes_20200606_113404.log.json) | +| DeepLabV3 | R-101-D8 | 512x1024 | 80000 | - | - | V100 | 80.20 | 81.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes/deeplabv3_r101-d8_512x1024_80k_cityscapes_20200606_113503-9e428899.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes/deeplabv3_r101-d8_512x1024_80k_cityscapes_20200606_113503.log.json) | +| DeepLabV3 (FP16) | R-101-D8 | 512x1024 | 80000 | 5.75 | 3.86 | V100 | 80.48 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920-774d9cec.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920.log.json) | +| DeepLabV3 | R-18-D8 | 769x769 | 80000 | 1.9 | 5.55 | V100 | 76.60 | 78.26 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r18-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_769x769_80k_cityscapes/deeplabv3_r18-d8_769x769_80k_cityscapes_20201225_021506-6452126a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_769x769_80k_cityscapes/deeplabv3_r18-d8_769x769_80k_cityscapes-20201225_021506.log.json) | +| DeepLabV3 | R-50-D8 | 769x769 | 80000 | - | - | V100 | 79.89 | 81.06 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes/deeplabv3_r50-d8_769x769_80k_cityscapes_20200606_221338-788d6228.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes/deeplabv3_r50-d8_769x769_80k_cityscapes_20200606_221338.log.json) | +| DeepLabV3 | R-101-D8 | 769x769 | 80000 | - | - | V100 | 79.67 | 80.81 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes/deeplabv3_r101-d8_769x769_80k_cityscapes_20200607_013353-60e95418.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes/deeplabv3_r101-d8_769x769_80k_cityscapes_20200607_013353.log.json) | +| DeepLabV3 | R-101-D16-MG124 | 512x1024 | 40000 | 4.7 | 6.96 | V100 | 76.71 | 78.63 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d16-mg124_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes_20200908_005644-67b0c992.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes-20200908_005644.log.json) | +| DeepLabV3 | R-101-D16-MG124 | 512x1024 | 80000 | - | - | V100 | 78.36 | 79.84 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d16-mg124_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes_20200908_005644-57bb8425.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes-20200908_005644.log.json) | +| DeepLabV3 | R-18b-D8 | 512x1024 | 80000 | 1.6 | 13.93 | V100 | 76.26 | 77.88 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r18b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes/deeplabv3_r18b-d8_512x1024_80k_cityscapes_20201225_094144-46040cef.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes/deeplabv3_r18b-d8_512x1024_80k_cityscapes-20201225_094144.log.json) | +| DeepLabV3 | R-50b-D8 | 512x1024 | 80000 | 6.0 | 2.74 | V100 | 79.63 | 80.98 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r50b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes/deeplabv3_r50b-d8_512x1024_80k_cityscapes_20201225_155148-ec368954.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes/deeplabv3_r50b-d8_512x1024_80k_cityscapes-20201225_155148.log.json) | +| DeepLabV3 | R-101b-D8 | 512x1024 | 80000 | 9.5 | 1.81 | V100 | 80.01 | 81.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes/deeplabv3_r101b-d8_512x1024_80k_cityscapes_20201226_171821-8fd49503.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes/deeplabv3_r101b-d8_512x1024_80k_cityscapes-20201226_171821.log.json) | +| DeepLabV3 | R-18b-D8 | 769x769 | 80000 | 1.8 | 5.79 | V100 | 75.63 | 77.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r18b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes/deeplabv3_r18b-d8_769x769_80k_cityscapes_20201225_094144-fdc985d9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes/deeplabv3_r18b-d8_769x769_80k_cityscapes-20201225_094144.log.json) | +| DeepLabV3 | R-50b-D8 | 769x769 | 80000 | 6.8 | 1.16 | V100 | 78.80 | 80.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r50b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes/deeplabv3_r50b-d8_769x769_80k_cityscapes_20201225_155404-87fb0cf4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes/deeplabv3_r50b-d8_769x769_80k_cityscapes-20201225_155404.log.json) | +| DeepLabV3 | R-101b-D8 | 769x769 | 80000 | 10.7 | 0.82 | V100 | 79.41 | 80.73 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes/deeplabv3_r101b-d8_769x769_80k_cityscapes_20201226_190843-9142ee57.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes/deeplabv3_r101b-d8_769x769_80k_cityscapes-20201226_190843.log.json) | ### ADE20K -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| --------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| DeepLabV3 | R-50-D8 | 512x512 | 80000 | 8.9 | 14.76 | 42.42 | 43.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k/deeplabv3_r50-d8_512x512_80k_ade20k_20200614_185028-0bb3f844.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k/deeplabv3_r50-d8_512x512_80k_ade20k_20200614_185028.log.json) | -| DeepLabV3 | R-101-D8 | 512x512 | 80000 | 12.4 | 10.14 | 44.08 | 45.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k/deeplabv3_r101-d8_512x512_80k_ade20k_20200615_021256-d89c7fa4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k/deeplabv3_r101-d8_512x512_80k_ade20k_20200615_021256.log.json) | -| DeepLabV3 | R-50-D8 | 512x512 | 160000 | - | - | 42.66 | 44.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k/deeplabv3_r50-d8_512x512_160k_ade20k_20200615_123227-5d0ee427.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k/deeplabv3_r50-d8_512x512_160k_ade20k_20200615_123227.log.json) | -| DeepLabV3 | R-101-D8 | 512x512 | 160000 | - | - | 45.00 | 46.66 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k/deeplabv3_r101-d8_512x512_160k_ade20k_20200615_105816-b1f72b3b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k/deeplabv3_r101-d8_512x512_160k_ade20k_20200615_105816.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3 | R-50-D8 | 512x512 | 80000 | 8.9 | 14.76 | V100 | 42.42 | 43.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k/deeplabv3_r50-d8_512x512_80k_ade20k_20200614_185028-0bb3f844.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k/deeplabv3_r50-d8_512x512_80k_ade20k_20200614_185028.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 80000 | 12.4 | 10.14 | V100 | 44.08 | 45.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k/deeplabv3_r101-d8_512x512_80k_ade20k_20200615_021256-d89c7fa4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k/deeplabv3_r101-d8_512x512_80k_ade20k_20200615_021256.log.json) | +| DeepLabV3 | R-50-D8 | 512x512 | 160000 | - | - | V100 | 42.66 | 44.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k/deeplabv3_r50-d8_512x512_160k_ade20k_20200615_123227-5d0ee427.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k/deeplabv3_r50-d8_512x512_160k_ade20k_20200615_123227.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 160000 | - | - | V100 | 45.00 | 46.66 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k/deeplabv3_r101-d8_512x512_160k_ade20k_20200615_105816-b1f72b3b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k/deeplabv3_r101-d8_512x512_160k_ade20k_20200615_105816.log.json) | ### Pascal VOC 2012 + Aug -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| --------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| DeepLabV3 | R-50-D8 | 512x512 | 20000 | 6.1 | 13.88 | 76.17 | 77.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r50-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug/deeplabv3_r50-d8_512x512_20k_voc12aug_20200617_010906-596905ef.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug/deeplabv3_r50-d8_512x512_20k_voc12aug_20200617_010906.log.json) | -| DeepLabV3 | R-101-D8 | 512x512 | 20000 | 9.6 | 9.81 | 78.70 | 79.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r101-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug/deeplabv3_r101-d8_512x512_20k_voc12aug_20200617_010932-8d13832f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug/deeplabv3_r101-d8_512x512_20k_voc12aug_20200617_010932.log.json) | -| DeepLabV3 | R-50-D8 | 512x512 | 40000 | - | - | 77.68 | 78.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r50-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug/deeplabv3_r50-d8_512x512_40k_voc12aug_20200613_161546-2ae96e7e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug/deeplabv3_r50-d8_512x512_40k_voc12aug_20200613_161546.log.json) | -| DeepLabV3 | R-101-D8 | 512x512 | 40000 | - | - | 77.92 | 79.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug/deeplabv3_r101-d8_512x512_40k_voc12aug_20200613_161432-0017d784.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug/deeplabv3_r101-d8_512x512_40k_voc12aug_20200613_161432.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3 | R-50-D8 | 512x512 | 20000 | 6.1 | 13.88 | V100 | 76.17 | 77.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r50-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug/deeplabv3_r50-d8_512x512_20k_voc12aug_20200617_010906-596905ef.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug/deeplabv3_r50-d8_512x512_20k_voc12aug_20200617_010906.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 20000 | 9.6 | 9.81 | V100 | 78.70 | 79.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug/deeplabv3_r101-d8_512x512_20k_voc12aug_20200617_010932-8d13832f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug/deeplabv3_r101-d8_512x512_20k_voc12aug_20200617_010932.log.json) | +| DeepLabV3 | R-50-D8 | 512x512 | 40000 | - | - | V100 | 77.68 | 78.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r50-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug/deeplabv3_r50-d8_512x512_40k_voc12aug_20200613_161546-2ae96e7e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug/deeplabv3_r50-d8_512x512_40k_voc12aug_20200613_161546.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 40000 | - | - | V100 | 77.92 | 79.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug/deeplabv3_r101-d8_512x512_40k_voc12aug_20200613_161432-0017d784.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug/deeplabv3_r101-d8_512x512_40k_voc12aug_20200613_161432.log.json) | ### Pascal Context -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| --------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| DeepLabV3 | R-101-D8 | 480x480 | 40000 | 9.2 | 7.09 | 46.55 | 47.81 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_pascal-context-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context/deeplabv3_r101-d8_480x480_40k_pascal_context_20200911_204118-1aa27336.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context/deeplabv3_r101-d8_480x480_40k_pascal_context-20200911_204118.log.json) | -| DeepLabV3 | R-101-D8 | 480x480 | 80000 | - | - | 46.42 | 47.53 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_pascal-context-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context/deeplabv3_r101-d8_480x480_80k_pascal_context_20200911_170155-2a21fff3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context/deeplabv3_r101-d8_480x480_80k_pascal_context-20200911_170155.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3 | R-101-D8 | 480x480 | 40000 | 9.2 | 7.09 | V100 | 46.55 | 47.81 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_pascal-context-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context/deeplabv3_r101-d8_480x480_40k_pascal_context_20200911_204118-1aa27336.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context/deeplabv3_r101-d8_480x480_40k_pascal_context-20200911_204118.log.json) | +| DeepLabV3 | R-101-D8 | 480x480 | 80000 | - | - | V100 | 46.42 | 47.53 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_pascal-context-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context/deeplabv3_r101-d8_480x480_80k_pascal_context_20200911_170155-2a21fff3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context/deeplabv3_r101-d8_480x480_80k_pascal_context-20200911_170155.log.json) | ### Pascal Context 59 -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| --------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| DeepLabV3 | R-101-D8 | 480x480 | 40000 | - | - | 52.61 | 54.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_pascal-context-59-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context_59/deeplabv3_r101-d8_480x480_40k_pascal_context_59_20210416_110332-cb08ea46.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context_59/deeplabv3_r101-d8_480x480_40k_pascal_context_59-20210416_110332.log.json) | -| DeepLabV3 | R-101-D8 | 480x480 | 80000 | - | - | 52.46 | 54.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_pascal-context-59-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context_59/deeplabv3_r101-d8_480x480_80k_pascal_context_59_20210416_113002-26303993.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context_59/deeplabv3_r101-d8_480x480_80k_pascal_context_59-20210416_113002.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3 | R-101-D8 | 480x480 | 40000 | - | - | V100 | 52.61 | 54.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_pascal-context-59-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context_59/deeplabv3_r101-d8_480x480_40k_pascal_context_59_20210416_110332-cb08ea46.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context_59/deeplabv3_r101-d8_480x480_40k_pascal_context_59-20210416_110332.log.json) | +| DeepLabV3 | R-101-D8 | 480x480 | 80000 | - | - | V100 | 52.46 | 54.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_pascal-context-59-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context_59/deeplabv3_r101-d8_480x480_80k_pascal_context_59_20210416_113002-26303993.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context_59/deeplabv3_r101-d8_480x480_80k_pascal_context_59-20210416_113002.log.json) | ### COCO-Stuff 10k -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| --------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| DeepLabV3 | R-50-D8 | 512x512 | 20000 | 9.6 | 10.8 | 34.66 | 36.08 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r50-d8_4xb4-20k_coco-stuff10k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k_20210821_043025-b35f789d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k_20210821_043025.log.json) | -| DeepLabV3 | R-101-D8 | 512x512 | 20000 | 13.2 | 8.7 | 37.30 | 38.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r101-d8_4xb4-20k_coco-stuff10k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k_20210821_043025-c49752cb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k_20210821_043025.log.json) | -| DeepLabV3 | R-50-D8 | 512x512 | 40000 | - | - | 35.73 | 37.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r50-d8_4xb4-40k_coco-stuff10k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k_20210821_043305-dc76f3ff.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k_20210821_043305.log.json) | -| DeepLabV3 | R-101-D8 | 512x512 | 40000 | - | - | 37.81 | 38.80 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_coco-stuff10k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k_20210821_043305-636cb433.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k_20210821_043305.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3 | R-50-D8 | 512x512 | 20000 | 9.6 | 10.8 | V100 | 34.66 | 36.08 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r50-d8_4xb4-20k_coco-stuff10k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k_20210821_043025-b35f789d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k_20210821_043025.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 20000 | 13.2 | 8.7 | V100 | 37.30 | 38.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb4-20k_coco-stuff10k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k_20210821_043025-c49752cb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k_20210821_043025.log.json) | +| DeepLabV3 | R-50-D8 | 512x512 | 40000 | - | - | V100 | 35.73 | 37.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r50-d8_4xb4-40k_coco-stuff10k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k_20210821_043305-dc76f3ff.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k_20210821_043305.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 40000 | - | - | V100 | 37.81 | 38.80 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_coco-stuff10k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k_20210821_043305-636cb433.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k_20210821_043305.log.json) | ### COCO-Stuff 164k -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| --------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| DeepLabV3 | R-50-D8 | 512x512 | 80000 | 9.6 | 10.8 | 39.38 | 40.03 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r50-d8_4xb4-80k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k_20210709_163016-88675c24.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k_20210709_163016.log.json) | -| DeepLabV3 | R-101-D8 | 512x512 | 80000 | 13.2 | 8.7 | 40.87 | 41.50 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k_20210709_201252-13600dc2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k_20210709_201252.log.json) | -| DeepLabV3 | R-50-D8 | 512x512 | 160000 | - | - | 41.09 | 41.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r50-d8_4xb4-160k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k_20210709_163016-49f2812b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k_20210709_163016.log.json) | -| DeepLabV3 | R-101-D8 | 512x512 | 160000 | - | - | 41.82 | 42.49 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r101-d8_4xb4-160k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k_20210709_155402-f035acfd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k_20210709_155402.log.json) | -| DeepLabV3 | R-50-D8 | 512x512 | 320000 | - | - | 41.37 | 42.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r50-d8_4xb4-320k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k_20210709_155403-51b21115.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k_20210709_155403.log.json) | -| DeepLabV3 | R-101-D8 | 512x512 | 320000 | - | - | 42.61 | 43.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3/deeplabv3_r101-d8_4xb4-320k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k_20210709_155402-3cbca14d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k_20210709_155402.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3 | R-50-D8 | 512x512 | 80000 | 9.6 | 10.8 | V100 | 39.38 | 40.03 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r50-d8_4xb4-80k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k_20210709_163016-88675c24.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k_20210709_163016.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 80000 | 13.2 | 8.7 | V100 | 40.87 | 41.50 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k_20210709_201252-13600dc2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k_20210709_201252.log.json) | +| DeepLabV3 | R-50-D8 | 512x512 | 160000 | - | - | V100 | 41.09 | 41.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r50-d8_4xb4-160k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k_20210709_163016-49f2812b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k_20210709_163016.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 160000 | - | - | V100 | 41.82 | 42.49 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb4-160k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k_20210709_155402-f035acfd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k_20210709_155402.log.json) | +| DeepLabV3 | R-50-D8 | 512x512 | 320000 | - | - | V100 | 41.37 | 42.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r50-d8_4xb4-320k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k_20210709_155403-51b21115.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k_20210709_155403.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 320000 | - | - | V100 | 42.61 | 43.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3/deeplabv3_r101-d8_4xb4-320k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k_20210709_155402-3cbca14d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k_20210709_155402.log.json) | Note: - `D-8` here corresponding to the output stride 8 setting for DeepLab series. - `FP16` means Mixed Precision (FP16) is adopted in training. + +## Citation + +```bibtext +@article{chen2017rethinking, + title={Rethinking atrous convolution for semantic image segmentation}, + author={Chen, Liang-Chieh and Papandreou, George and Schroff, Florian and Adam, Hartwig}, + journal={arXiv preprint arXiv:1706.05587}, + year={2017} +} +``` diff --git a/configs/deeplabv3/deeplabv3.yml b/configs/deeplabv3/deeplabv3.yml deleted file mode 100644 index 6196212992..0000000000 --- a/configs/deeplabv3/deeplabv3.yml +++ /dev/null @@ -1,756 +0,0 @@ -Collections: -- Name: DeepLabV3 - Metadata: - Training Data: - - Cityscapes - - ADE20K - - Pascal VOC 2012 + Aug - - Pascal Context - - Pascal Context 59 - - COCO-Stuff 10k - - COCO-Stuff 164k - Paper: - URL: https://arxiv.org/abs/1706.05587 - Title: Rethinking atrous convolution for semantic image segmentation - README: configs/deeplabv3/README.md - Code: - URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 - Version: v0.17.0 - Converted From: - Code: https://github.com/tensorflow/models/tree/master/research/deeplab -Models: -- Name: deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024 - In Collection: DeepLabV3 - Metadata: - backbone: R-50-D8 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 389.11 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 6.1 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.09 - mIoU(ms+flip): 80.45 - Config: configs/deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes/deeplabv3_r50-d8_512x1024_40k_cityscapes_20200605_022449-acadc2f8.pth -- Name: deeplabv3_r101-d8_4xb2-40k_cityscapes-512x1024 - In Collection: DeepLabV3 - Metadata: - backbone: R-101-D8 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 520.83 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 9.6 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 77.12 - mIoU(ms+flip): 79.61 - Config: configs/deeplabv3/deeplabv3_r101-d8_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes/deeplabv3_r101-d8_512x1024_40k_cityscapes_20200605_012241-7fd3f799.pth -- Name: deeplabv3_r50-d8_4xb2-40k_cityscapes-769x769 - In Collection: DeepLabV3 - Metadata: - backbone: R-50-D8 - crop size: (769,769) - lr schd: 40000 - inference time (ms/im): - - value: 900.9 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 6.9 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.58 - mIoU(ms+flip): 79.89 - Config: configs/deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes/deeplabv3_r50-d8_769x769_40k_cityscapes_20200606_113723-7eda553c.pth -- Name: deeplabv3_r101-d8_4xb2-40k_cityscapes-769x769 - In Collection: DeepLabV3 - Metadata: - backbone: R-101-D8 - crop size: (769,769) - lr schd: 40000 - inference time (ms/im): - - value: 1204.82 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 10.9 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.27 - mIoU(ms+flip): 80.11 - Config: configs/deeplabv3/deeplabv3_r101-d8_4xb2-40k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes/deeplabv3_r101-d8_769x769_40k_cityscapes_20200606_113809-c64f889f.pth -- Name: deeplabv3_r18-d8_4xb2-80k_cityscapes-512x1024 - In Collection: DeepLabV3 - Metadata: - backbone: R-18-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 72.57 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 1.7 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 76.7 - mIoU(ms+flip): 78.27 - Config: configs/deeplabv3/deeplabv3_r18-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_512x1024_80k_cityscapes/deeplabv3_r18-d8_512x1024_80k_cityscapes_20201225_021506-23dffbe2.pth -- Name: deeplabv3_r50-d8_4xb2-80k_cityscapes-512x1024 - In Collection: DeepLabV3 - Metadata: - backbone: R-50-D8 - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.32 - mIoU(ms+flip): 80.57 - Config: configs/deeplabv3/deeplabv3_r50-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes/deeplabv3_r50-d8_512x1024_80k_cityscapes_20200606_113404-b92cfdd4.pth -- Name: deeplabv3_r101-d8_4xb2-80k_cityscapes-512x1024 - In Collection: DeepLabV3 - Metadata: - backbone: R-101-D8 - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 80.2 - mIoU(ms+flip): 81.21 - Config: configs/deeplabv3/deeplabv3_r101-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes/deeplabv3_r101-d8_512x1024_80k_cityscapes_20200606_113503-9e428899.pth -- Name: deeplabv3_r101-d8_4xb2-amp-80k_cityscapes-512x1024 - In Collection: DeepLabV3 - Metadata: - backbone: R-101-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 259.07 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: AMP - resolution: (512,1024) - Training Memory (GB): 5.75 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 80.48 - Config: configs/deeplabv3/deeplabv3_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920-774d9cec.pth -- Name: deeplabv3_r18-d8_4xb2-80k_cityscapes-769x769 - In Collection: DeepLabV3 - Metadata: - backbone: R-18-D8 - crop size: (769,769) - lr schd: 80000 - inference time (ms/im): - - value: 180.18 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 1.9 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 76.6 - mIoU(ms+flip): 78.26 - Config: configs/deeplabv3/deeplabv3_r18-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_769x769_80k_cityscapes/deeplabv3_r18-d8_769x769_80k_cityscapes_20201225_021506-6452126a.pth -- Name: deeplabv3_r50-d8_4xb2-80k_cityscapes-769x769 - In Collection: DeepLabV3 - Metadata: - backbone: R-50-D8 - crop size: (769,769) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.89 - mIoU(ms+flip): 81.06 - Config: configs/deeplabv3/deeplabv3_r50-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes/deeplabv3_r50-d8_769x769_80k_cityscapes_20200606_221338-788d6228.pth -- Name: deeplabv3_r101-d8_4xb2-80k_cityscapes-769x769 - In Collection: DeepLabV3 - Metadata: - backbone: R-101-D8 - crop size: (769,769) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.67 - mIoU(ms+flip): 80.81 - Config: configs/deeplabv3/deeplabv3_r101-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes/deeplabv3_r101-d8_769x769_80k_cityscapes_20200607_013353-60e95418.pth -- Name: deeplabv3_r101-d16-mg124_4xb2-80k_cityscapes-512x1024 - In Collection: DeepLabV3 - Metadata: - backbone: R-101-D16-MG124 - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.36 - mIoU(ms+flip): 79.84 - Config: configs/deeplabv3/deeplabv3_r101-d16-mg124_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes_20200908_005644-57bb8425.pth -- Name: deeplabv3_r18b-d8_4xb2-80k_cityscapes-512x1024 - In Collection: DeepLabV3 - Metadata: - backbone: R-18b-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 71.79 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 1.6 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 76.26 - mIoU(ms+flip): 77.88 - Config: configs/deeplabv3/deeplabv3_r18b-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes/deeplabv3_r18b-d8_512x1024_80k_cityscapes_20201225_094144-46040cef.pth -- Name: deeplabv3_r50b-d8_4xb2-80k_cityscapes-512x1024 - In Collection: DeepLabV3 - Metadata: - backbone: R-50b-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 364.96 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 6.0 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.63 - mIoU(ms+flip): 80.98 - Config: configs/deeplabv3/deeplabv3_r50b-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes/deeplabv3_r50b-d8_512x1024_80k_cityscapes_20201225_155148-ec368954.pth -- Name: deeplabv3_r101b-d8_4xb2-80k_cityscapes-512x1024 - In Collection: DeepLabV3 - Metadata: - backbone: R-101b-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 552.49 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 9.5 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 80.01 - mIoU(ms+flip): 81.21 - Config: configs/deeplabv3/deeplabv3_r101b-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes/deeplabv3_r101b-d8_512x1024_80k_cityscapes_20201226_171821-8fd49503.pth -- Name: deeplabv3_r18b-d8_4xb2-80k_cityscapes-769x769 - In Collection: DeepLabV3 - Metadata: - backbone: R-18b-D8 - crop size: (769,769) - lr schd: 80000 - inference time (ms/im): - - value: 172.71 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 1.8 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 75.63 - mIoU(ms+flip): 77.51 - Config: configs/deeplabv3/deeplabv3_r18b-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes/deeplabv3_r18b-d8_769x769_80k_cityscapes_20201225_094144-fdc985d9.pth -- Name: deeplabv3_r50b-d8_4xb2-80k_cityscapes-769x769 - In Collection: DeepLabV3 - Metadata: - backbone: R-50b-D8 - crop size: (769,769) - lr schd: 80000 - inference time (ms/im): - - value: 862.07 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 6.8 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.8 - mIoU(ms+flip): 80.27 - Config: configs/deeplabv3/deeplabv3_r50b-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes/deeplabv3_r50b-d8_769x769_80k_cityscapes_20201225_155404-87fb0cf4.pth -- Name: deeplabv3_r101b-d8_4xb2-80k_cityscapes-769x769 - In Collection: DeepLabV3 - Metadata: - backbone: R-101b-D8 - crop size: (769,769) - lr schd: 80000 - inference time (ms/im): - - value: 1219.51 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 10.7 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.41 - mIoU(ms+flip): 80.73 - Config: configs/deeplabv3/deeplabv3_r101b-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes/deeplabv3_r101b-d8_769x769_80k_cityscapes_20201226_190843-9142ee57.pth -- Name: deeplabv3_r50-d8_4xb4-80k_ade20k-512x512 - In Collection: DeepLabV3 - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 67.75 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 8.9 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 42.42 - mIoU(ms+flip): 43.28 - Config: configs/deeplabv3/deeplabv3_r50-d8_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k/deeplabv3_r50-d8_512x512_80k_ade20k_20200614_185028-0bb3f844.pth -- Name: deeplabv3_r101-d8_4xb4-80k_ade20k-512x512 - In Collection: DeepLabV3 - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 98.62 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 12.4 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 44.08 - mIoU(ms+flip): 45.19 - Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k/deeplabv3_r101-d8_512x512_80k_ade20k_20200615_021256-d89c7fa4.pth -- Name: deeplabv3_r50-d8_4xb4-160k_ade20k-512x512 - In Collection: DeepLabV3 - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 42.66 - mIoU(ms+flip): 44.09 - Config: configs/deeplabv3/deeplabv3_r50-d8_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k/deeplabv3_r50-d8_512x512_160k_ade20k_20200615_123227-5d0ee427.pth -- Name: deeplabv3_r101-d8_4xb4-160k_ade20k-512x512 - In Collection: DeepLabV3 - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 45.0 - mIoU(ms+flip): 46.66 - Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k/deeplabv3_r101-d8_512x512_160k_ade20k_20200615_105816-b1f72b3b.pth -- Name: deeplabv3_r50-d8_4xb4-20k_voc12aug-512x512 - In Collection: DeepLabV3 - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 20000 - inference time (ms/im): - - value: 72.05 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 6.1 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 76.17 - mIoU(ms+flip): 77.42 - Config: configs/deeplabv3/deeplabv3_r50-d8_4xb4-20k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug/deeplabv3_r50-d8_512x512_20k_voc12aug_20200617_010906-596905ef.pth -- Name: deeplabv3_r101-d8_4xb4-20k_voc12aug-512x512 - In Collection: DeepLabV3 - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 20000 - inference time (ms/im): - - value: 101.94 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 9.6 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 78.7 - mIoU(ms+flip): 79.95 - Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-20k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug/deeplabv3_r101-d8_512x512_20k_voc12aug_20200617_010932-8d13832f.pth -- Name: deeplabv3_r50-d8_4xb4-40k_voc12aug-512x512 - In Collection: DeepLabV3 - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 77.68 - mIoU(ms+flip): 78.78 - Config: configs/deeplabv3/deeplabv3_r50-d8_4xb4-40k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug/deeplabv3_r50-d8_512x512_40k_voc12aug_20200613_161546-2ae96e7e.pth -- Name: deeplabv3_r101-d8_4xb4-40k_voc12aug-512x512 - In Collection: DeepLabV3 - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 77.92 - mIoU(ms+flip): 79.18 - Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug/deeplabv3_r101-d8_512x512_40k_voc12aug_20200613_161432-0017d784.pth -- Name: deeplabv3_r101-d8_4xb4-40k_pascal-context-480x480 - In Collection: DeepLabV3 - Metadata: - backbone: R-101-D8 - crop size: (480,480) - lr schd: 40000 - inference time (ms/im): - - value: 141.04 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (480,480) - Training Memory (GB): 9.2 - Results: - - Task: Semantic Segmentation - Dataset: Pascal Context - Metrics: - mIoU: 46.55 - mIoU(ms+flip): 47.81 - Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_pascal-context-480x480.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context/deeplabv3_r101-d8_480x480_40k_pascal_context_20200911_204118-1aa27336.pth -- Name: deeplabv3_r101-d8_4xb4-80k_pascal-context-480x480 - In Collection: DeepLabV3 - Metadata: - backbone: R-101-D8 - crop size: (480,480) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal Context - Metrics: - mIoU: 46.42 - mIoU(ms+flip): 47.53 - Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_pascal-context-480x480.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context/deeplabv3_r101-d8_480x480_80k_pascal_context_20200911_170155-2a21fff3.pth -- Name: deeplabv3_r101-d8_4xb4-40k_pascal-context-59-480x480 - In Collection: DeepLabV3 - Metadata: - backbone: R-101-D8 - crop size: (480,480) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal Context 59 - Metrics: - mIoU: 52.61 - mIoU(ms+flip): 54.28 - Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_pascal-context-59-480x480.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context_59/deeplabv3_r101-d8_480x480_40k_pascal_context_59_20210416_110332-cb08ea46.pth -- Name: deeplabv3_r101-d8_4xb4-80k_pascal-context-59-480x480 - In Collection: DeepLabV3 - Metadata: - backbone: R-101-D8 - crop size: (480,480) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal Context 59 - Metrics: - mIoU: 52.46 - mIoU(ms+flip): 54.09 - Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_pascal-context-59-480x480.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context_59/deeplabv3_r101-d8_480x480_80k_pascal_context_59_20210416_113002-26303993.pth -- Name: deeplabv3_r50-d8_4xb4-20k_coco-stuff10k-512x512 - In Collection: DeepLabV3 - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 20000 - inference time (ms/im): - - value: 92.59 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 9.6 - Results: - - Task: Semantic Segmentation - Dataset: COCO-Stuff 10k - Metrics: - mIoU: 34.66 - mIoU(ms+flip): 36.08 - Config: configs/deeplabv3/deeplabv3_r50-d8_4xb4-20k_coco-stuff10k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k_20210821_043025-b35f789d.pth -- Name: deeplabv3_r101-d8_4xb4-20k_coco-stuff10k-512x512 - In Collection: DeepLabV3 - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 20000 - inference time (ms/im): - - value: 114.94 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 13.2 - Results: - - Task: Semantic Segmentation - Dataset: COCO-Stuff 10k - Metrics: - mIoU: 37.3 - mIoU(ms+flip): 38.42 - Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-20k_coco-stuff10k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k_20210821_043025-c49752cb.pth -- Name: deeplabv3_r50-d8_4xb4-40k_coco-stuff10k-512x512 - In Collection: DeepLabV3 - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: COCO-Stuff 10k - Metrics: - mIoU: 35.73 - mIoU(ms+flip): 37.09 - Config: configs/deeplabv3/deeplabv3_r50-d8_4xb4-40k_coco-stuff10k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k_20210821_043305-dc76f3ff.pth -- Name: deeplabv3_r101-d8_4xb4-40k_coco-stuff10k-512x512 - In Collection: DeepLabV3 - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: COCO-Stuff 10k - Metrics: - mIoU: 37.81 - mIoU(ms+flip): 38.8 - Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_coco-stuff10k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k_20210821_043305-636cb433.pth -- Name: deeplabv3_r50-d8_4xb4-80k_coco-stuff164k-512x512 - In Collection: DeepLabV3 - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 92.59 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 9.6 - Results: - - Task: Semantic Segmentation - Dataset: COCO-Stuff 164k - Metrics: - mIoU: 39.38 - mIoU(ms+flip): 40.03 - Config: configs/deeplabv3/deeplabv3_r50-d8_4xb4-80k_coco-stuff164k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k_20210709_163016-88675c24.pth -- Name: deeplabv3_r101-d8_4xb4-80k_coco-stuff164k-512x512 - In Collection: DeepLabV3 - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 114.94 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 13.2 - Results: - - Task: Semantic Segmentation - Dataset: COCO-Stuff 164k - Metrics: - mIoU: 40.87 - mIoU(ms+flip): 41.5 - Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_coco-stuff164k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k_20210709_201252-13600dc2.pth -- Name: deeplabv3_r50-d8_4xb4-160k_coco-stuff164k-512x512 - In Collection: DeepLabV3 - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: COCO-Stuff 164k - Metrics: - mIoU: 41.09 - mIoU(ms+flip): 41.69 - Config: configs/deeplabv3/deeplabv3_r50-d8_4xb4-160k_coco-stuff164k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k_20210709_163016-49f2812b.pth -- Name: deeplabv3_r101-d8_4xb4-160k_coco-stuff164k-512x512 - In Collection: DeepLabV3 - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: COCO-Stuff 164k - Metrics: - mIoU: 41.82 - mIoU(ms+flip): 42.49 - Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-160k_coco-stuff164k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k_20210709_155402-f035acfd.pth -- Name: deeplabv3_r50-d8_4xb4-320k_coco-stuff164k-512x512 - In Collection: DeepLabV3 - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 320000 - Results: - - Task: Semantic Segmentation - Dataset: COCO-Stuff 164k - Metrics: - mIoU: 41.37 - mIoU(ms+flip): 42.22 - Config: configs/deeplabv3/deeplabv3_r50-d8_4xb4-320k_coco-stuff164k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k_20210709_155403-51b21115.pth -- Name: deeplabv3_r101-d8_4xb4-320k_coco-stuff164k-512x512 - In Collection: DeepLabV3 - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 320000 - Results: - - Task: Semantic Segmentation - Dataset: COCO-Stuff 164k - Metrics: - mIoU: 42.61 - mIoU(ms+flip): 43.42 - Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-320k_coco-stuff164k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k_20210709_155402-3cbca14d.pth diff --git a/configs/deeplabv3/metafile.yaml b/configs/deeplabv3/metafile.yaml new file mode 100644 index 0000000000..650f7d695d --- /dev/null +++ b/configs/deeplabv3/metafile.yaml @@ -0,0 +1,985 @@ +Collections: +- Name: DeepLabV3 + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + - Pascal Context + - Pascal Context 59 + - COCO-Stuff 10k + - COCO-Stuff 164k + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + README: configs/deeplabv3/README.md + Frameworks: + - PyTorch +Models: +- Name: deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.09 + mIoU(ms+flip): 80.45 + Config: configs/deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 6.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes/deeplabv3_r50-d8_512x1024_40k_cityscapes_20200605_022449-acadc2f8.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes/deeplabv3_r50-d8_512x1024_40k_cityscapes_20200605_022449.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb2-40k_cityscapes-512x1024 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.12 + mIoU(ms+flip): 79.61 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 9.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes/deeplabv3_r101-d8_512x1024_40k_cityscapes_20200605_012241-7fd3f799.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes/deeplabv3_r101-d8_512x1024_40k_cityscapes_20200605_012241.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r50-d8_4xb2-40k_cityscapes-769x769 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.58 + mIoU(ms+flip): 79.89 + Config: configs/deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 6.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes/deeplabv3_r50-d8_769x769_40k_cityscapes_20200606_113723-7eda553c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes/deeplabv3_r50-d8_769x769_40k_cityscapes_20200606_113723.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb2-40k_cityscapes-769x769 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.27 + mIoU(ms+flip): 80.11 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 10.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes/deeplabv3_r101-d8_769x769_40k_cityscapes_20200606_113809-c64f889f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes/deeplabv3_r101-d8_769x769_40k_cityscapes_20200606_113809.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r18-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.7 + mIoU(ms+flip): 78.27 + Config: configs/deeplabv3/deeplabv3_r18-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 1.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_512x1024_80k_cityscapes/deeplabv3_r18-d8_512x1024_80k_cityscapes_20201225_021506-23dffbe2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_512x1024_80k_cityscapes/deeplabv3_r18-d8_512x1024_80k_cityscapes-20201225_021506.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r50-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.32 + mIoU(ms+flip): 80.57 + Config: configs/deeplabv3/deeplabv3_r50-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes/deeplabv3_r50-d8_512x1024_80k_cityscapes_20200606_113404-b92cfdd4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes/deeplabv3_r50-d8_512x1024_80k_cityscapes_20200606_113404.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.2 + mIoU(ms+flip): 81.21 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes/deeplabv3_r101-d8_512x1024_80k_cityscapes_20200606_113503-9e428899.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes/deeplabv3_r101-d8_512x1024_80k_cityscapes_20200606_113503.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb2-amp-80k_cityscapes-512x1024 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.48 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DeepLabV3 + - (FP16) + Training Resources: 4x V100 GPUS + Memory (GB): 5.75 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920-774d9cec.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r18-d8_4xb2-80k_cityscapes-769x769 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.6 + mIoU(ms+flip): 78.26 + Config: configs/deeplabv3/deeplabv3_r18-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 1.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_769x769_80k_cityscapes/deeplabv3_r18-d8_769x769_80k_cityscapes_20201225_021506-6452126a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_769x769_80k_cityscapes/deeplabv3_r18-d8_769x769_80k_cityscapes-20201225_021506.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r50-d8_4xb2-80k_cityscapes-769x769 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.89 + mIoU(ms+flip): 81.06 + Config: configs/deeplabv3/deeplabv3_r50-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes/deeplabv3_r50-d8_769x769_80k_cityscapes_20200606_221338-788d6228.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes/deeplabv3_r50-d8_769x769_80k_cityscapes_20200606_221338.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb2-80k_cityscapes-769x769 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.67 + mIoU(ms+flip): 80.81 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes/deeplabv3_r101-d8_769x769_80k_cityscapes_20200607_013353-60e95418.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes/deeplabv3_r101-d8_769x769_80k_cityscapes_20200607_013353.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d16-mg124_4xb2-40k_cityscapes-512x1024 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.71 + mIoU(ms+flip): 78.63 + Config: configs/deeplabv3/deeplabv3_r101-d16-mg124_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D16-MG124 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 4.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes_20200908_005644-67b0c992.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes-20200908_005644.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d16-mg124_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.36 + mIoU(ms+flip): 79.84 + Config: configs/deeplabv3/deeplabv3_r101-d16-mg124_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D16-MG124 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes_20200908_005644-57bb8425.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes-20200908_005644.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r18b-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.26 + mIoU(ms+flip): 77.88 + Config: configs/deeplabv3/deeplabv3_r18b-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18b-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 1.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes/deeplabv3_r18b-d8_512x1024_80k_cityscapes_20201225_094144-46040cef.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes/deeplabv3_r18b-d8_512x1024_80k_cityscapes-20201225_094144.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r50b-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.63 + mIoU(ms+flip): 80.98 + Config: configs/deeplabv3/deeplabv3_r50b-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50b-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 6.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes/deeplabv3_r50b-d8_512x1024_80k_cityscapes_20201225_155148-ec368954.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes/deeplabv3_r50b-d8_512x1024_80k_cityscapes-20201225_155148.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101b-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.01 + mIoU(ms+flip): 81.21 + Config: configs/deeplabv3/deeplabv3_r101b-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101b-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 9.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes/deeplabv3_r101b-d8_512x1024_80k_cityscapes_20201226_171821-8fd49503.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes/deeplabv3_r101b-d8_512x1024_80k_cityscapes-20201226_171821.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r18b-d8_4xb2-80k_cityscapes-769x769 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.63 + mIoU(ms+flip): 77.51 + Config: configs/deeplabv3/deeplabv3_r18b-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18b-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 1.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes/deeplabv3_r18b-d8_769x769_80k_cityscapes_20201225_094144-fdc985d9.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes/deeplabv3_r18b-d8_769x769_80k_cityscapes-20201225_094144.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r50b-d8_4xb2-80k_cityscapes-769x769 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.8 + mIoU(ms+flip): 80.27 + Config: configs/deeplabv3/deeplabv3_r50b-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50b-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 6.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes/deeplabv3_r50b-d8_769x769_80k_cityscapes_20201225_155404-87fb0cf4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes/deeplabv3_r50b-d8_769x769_80k_cityscapes-20201225_155404.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101b-d8_4xb2-80k_cityscapes-769x769 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.41 + mIoU(ms+flip): 80.73 + Config: configs/deeplabv3/deeplabv3_r101b-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101b-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 10.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes/deeplabv3_r101b-d8_769x769_80k_cityscapes_20201226_190843-9142ee57.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes/deeplabv3_r101b-d8_769x769_80k_cityscapes-20201226_190843.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r50-d8_4xb4-80k_ade20k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.42 + mIoU(ms+flip): 43.28 + Config: configs/deeplabv3/deeplabv3_r50-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 8.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k/deeplabv3_r50-d8_512x512_80k_ade20k_20200614_185028-0bb3f844.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k/deeplabv3_r50-d8_512x512_80k_ade20k_20200614_185028.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb4-80k_ade20k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 44.08 + mIoU(ms+flip): 45.19 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 12.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k/deeplabv3_r101-d8_512x512_80k_ade20k_20200615_021256-d89c7fa4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k/deeplabv3_r101-d8_512x512_80k_ade20k_20200615_021256.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r50-d8_4xb4-160k_ade20k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.66 + mIoU(ms+flip): 44.09 + Config: configs/deeplabv3/deeplabv3_r50-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k/deeplabv3_r50-d8_512x512_160k_ade20k_20200615_123227-5d0ee427.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k/deeplabv3_r50-d8_512x512_160k_ade20k_20200615_123227.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb4-160k_ade20k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.0 + mIoU(ms+flip): 46.66 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k/deeplabv3_r101-d8_512x512_160k_ade20k_20200615_105816-b1f72b3b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k/deeplabv3_r101-d8_512x512_160k_ade20k_20200615_105816.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r50-d8_4xb4-20k_voc12aug-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.17 + mIoU(ms+flip): 77.42 + Config: configs/deeplabv3/deeplabv3_r50-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 6.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug/deeplabv3_r50-d8_512x512_20k_voc12aug_20200617_010906-596905ef.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug/deeplabv3_r50-d8_512x512_20k_voc12aug_20200617_010906.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb4-20k_voc12aug-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 78.7 + mIoU(ms+flip): 79.95 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 9.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug/deeplabv3_r101-d8_512x512_20k_voc12aug_20200617_010932-8d13832f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug/deeplabv3_r101-d8_512x512_20k_voc12aug_20200617_010932.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r50-d8_4xb4-40k_voc12aug-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.68 + mIoU(ms+flip): 78.78 + Config: configs/deeplabv3/deeplabv3_r50-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug/deeplabv3_r50-d8_512x512_40k_voc12aug_20200613_161546-2ae96e7e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug/deeplabv3_r50-d8_512x512_40k_voc12aug_20200613_161546.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb4-40k_voc12aug-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.92 + mIoU(ms+flip): 79.18 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug/deeplabv3_r101-d8_512x512_40k_voc12aug_20200613_161432-0017d784.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug/deeplabv3_r101-d8_512x512_40k_voc12aug_20200613_161432.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb4-40k_pascal-context-480x480 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Pascal Context + Metrics: + mIoU: 46.55 + mIoU(ms+flip): 47.81 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_pascal-context-480x480.py + Metadata: + Training Data: Pascal Context + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 9.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context/deeplabv3_r101-d8_480x480_40k_pascal_context_20200911_204118-1aa27336.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context/deeplabv3_r101-d8_480x480_40k_pascal_context-20200911_204118.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb4-80k_pascal-context-480x480 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Pascal Context + Metrics: + mIoU: 46.42 + mIoU(ms+flip): 47.53 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_pascal-context-480x480.py + Metadata: + Training Data: Pascal Context + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context/deeplabv3_r101-d8_480x480_80k_pascal_context_20200911_170155-2a21fff3.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context/deeplabv3_r101-d8_480x480_80k_pascal_context-20200911_170155.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb4-40k_pascal-context-59-480x480 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Pascal Context 59 + Metrics: + mIoU: 52.61 + mIoU(ms+flip): 54.28 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_pascal-context-59-480x480.py + Metadata: + Training Data: Pascal Context 59 + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context_59/deeplabv3_r101-d8_480x480_40k_pascal_context_59_20210416_110332-cb08ea46.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context_59/deeplabv3_r101-d8_480x480_40k_pascal_context_59-20210416_110332.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb4-80k_pascal-context-59-480x480 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Pascal Context 59 + Metrics: + mIoU: 52.46 + mIoU(ms+flip): 54.09 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_pascal-context-59-480x480.py + Metadata: + Training Data: Pascal Context 59 + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context_59/deeplabv3_r101-d8_480x480_80k_pascal_context_59_20210416_113002-26303993.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context_59/deeplabv3_r101-d8_480x480_80k_pascal_context_59-20210416_113002.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r50-d8_4xb4-20k_coco-stuff10k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 10k + Metrics: + mIoU: 34.66 + mIoU(ms+flip): 36.08 + Config: configs/deeplabv3/deeplabv3_r50-d8_4xb4-20k_coco-stuff10k-512x512.py + Metadata: + Training Data: COCO-Stuff 10k + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 9.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k_20210821_043025-b35f789d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k_20210821_043025.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb4-20k_coco-stuff10k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 10k + Metrics: + mIoU: 37.3 + mIoU(ms+flip): 38.42 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-20k_coco-stuff10k-512x512.py + Metadata: + Training Data: COCO-Stuff 10k + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 13.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k_20210821_043025-c49752cb.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k_20210821_043025.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r50-d8_4xb4-40k_coco-stuff10k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 10k + Metrics: + mIoU: 35.73 + mIoU(ms+flip): 37.09 + Config: configs/deeplabv3/deeplabv3_r50-d8_4xb4-40k_coco-stuff10k-512x512.py + Metadata: + Training Data: COCO-Stuff 10k + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k_20210821_043305-dc76f3ff.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k_20210821_043305.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb4-40k_coco-stuff10k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 10k + Metrics: + mIoU: 37.81 + mIoU(ms+flip): 38.8 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-40k_coco-stuff10k-512x512.py + Metadata: + Training Data: COCO-Stuff 10k + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k_20210821_043305-636cb433.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k_20210821_043305.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r50-d8_4xb4-80k_coco-stuff164k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 39.38 + mIoU(ms+flip): 40.03 + Config: configs/deeplabv3/deeplabv3_r50-d8_4xb4-80k_coco-stuff164k-512x512.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 9.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k_20210709_163016-88675c24.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k_20210709_163016.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb4-80k_coco-stuff164k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 40.87 + mIoU(ms+flip): 41.5 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-80k_coco-stuff164k-512x512.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 13.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k_20210709_201252-13600dc2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k_20210709_201252.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r50-d8_4xb4-160k_coco-stuff164k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 41.09 + mIoU(ms+flip): 41.69 + Config: configs/deeplabv3/deeplabv3_r50-d8_4xb4-160k_coco-stuff164k-512x512.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k_20210709_163016-49f2812b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k_20210709_163016.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb4-160k_coco-stuff164k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 41.82 + mIoU(ms+flip): 42.49 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-160k_coco-stuff164k-512x512.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k_20210709_155402-f035acfd.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k_20210709_155402.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r50-d8_4xb4-320k_coco-stuff164k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 41.37 + mIoU(ms+flip): 42.22 + Config: configs/deeplabv3/deeplabv3_r50-d8_4xb4-320k_coco-stuff164k-512x512.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k_20210709_155403-51b21115.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k_20210709_155403.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch +- Name: deeplabv3_r101-d8_4xb4-320k_coco-stuff164k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 42.61 + mIoU(ms+flip): 43.42 + Config: configs/deeplabv3/deeplabv3_r101-d8_4xb4-320k_coco-stuff164k-512x512.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k_20210709_155402-3cbca14d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k_20210709_155402.log.json + Paper: + Title: Rethinking atrous convolution for semantic image segmentation + URL: https://arxiv.org/abs/1706.05587 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Framework: PyTorch diff --git a/configs/deeplabv3plus/README.md b/configs/deeplabv3plus/README.md index b3d3ce7678..04d01fa512 100644 --- a/configs/deeplabv3plus/README.md +++ b/configs/deeplabv3plus/README.md @@ -1,6 +1,6 @@ # DeepLabV3+ -[Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1802.02611) +> [Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1802.02611) ## Introduction @@ -22,107 +22,102 @@ Spatial pyramid pooling module or encode-decoder structure are used in deep neur
-## Citation - -```bibtex -@inproceedings{deeplabv3plus2018, - title={Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation}, - author={Liang-Chieh Chen and Yukun Zhu and George Papandreou and Florian Schroff and Hartwig Adam}, - booktitle={ECCV}, - year={2018} -} -``` - ## Results and models ### Cityscapes -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ----------------- | --------------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| DeepLabV3+ | R-50-D8 | 512x1024 | 40000 | 7.5 | 3.94 | 79.61 | 81.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes/deeplabv3plus_r50-d8_512x1024_40k_cityscapes_20200605_094610-d222ffcd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes/deeplabv3plus_r50-d8_512x1024_40k_cityscapes_20200605_094610.log.json) | -| DeepLabV3+ | R-101-D8 | 512x1024 | 40000 | 11 | 2.60 | 80.21 | 81.82 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes/deeplabv3plus_r101-d8_512x1024_40k_cityscapes_20200605_094614-3769eecf.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes/deeplabv3plus_r101-d8_512x1024_40k_cityscapes_20200605_094614.log.json) | -| DeepLabV3+ | R-50-D8 | 769x769 | 40000 | 8.5 | 1.72 | 78.97 | 80.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes/deeplabv3plus_r50-d8_769x769_40k_cityscapes_20200606_114143-1dcb0e3c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes/deeplabv3plus_r50-d8_769x769_40k_cityscapes_20200606_114143.log.json) | -| DeepLabV3+ | R-101-D8 | 769x769 | 40000 | 12.5 | 1.15 | 79.46 | 80.50 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes/deeplabv3plus_r101-d8_769x769_40k_cityscapes_20200606_114304-ff414b9e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes/deeplabv3plus_r101-d8_769x769_40k_cityscapes_20200606_114304.log.json) | -| DeepLabV3+ | R-18-D8 | 512x1024 | 80000 | 2.2 | 14.27 | 76.89 | 78.76 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes/deeplabv3plus_r18-d8_512x1024_80k_cityscapes_20201226_080942-cff257fe.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes/deeplabv3plus_r18-d8_512x1024_80k_cityscapes-20201226_080942.log.json) | -| DeepLabV3+ | R-50-D8 | 512x1024 | 80000 | - | - | 80.09 | 81.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes/deeplabv3plus_r50-d8_512x1024_80k_cityscapes_20200606_114049-f9fb496d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes/deeplabv3plus_r50-d8_512x1024_80k_cityscapes_20200606_114049.log.json) | -| DeepLabV3+ | R-101-D8 | 512x1024 | 80000 | - | - | 80.97 | 82.03 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_cityscapes_20200606_114143-068fcfe9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_cityscapes_20200606_114143.log.json) | -| DeepLabV3+ (FP16) | R-101-D8 | 512x1024 | 80000 | 6.35 | 7.87 | 80.46 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920-f1104f4b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920.log.json) | -| DeepLabV3+ | R-18-D8 | 769x769 | 80000 | 2.5 | 5.74 | 76.26 | 77.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_769x769_80k_cityscapes/deeplabv3plus_r18-d8_769x769_80k_cityscapes_20201226_083346-f326e06a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_769x769_80k_cityscapes/deeplabv3plus_r18-d8_769x769_80k_cityscapes-20201226_083346.log.json) | -| DeepLabV3+ | R-50-D8 | 769x769 | 80000 | - | - | 79.83 | 81.48 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes/deeplabv3plus_r50-d8_769x769_80k_cityscapes_20200606_210233-0e9dfdc4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes/deeplabv3plus_r50-d8_769x769_80k_cityscapes_20200606_210233.log.json) | -| DeepLabV3+ | R-101-D8 | 769x769 | 80000 | - | - | 80.65 | 81.47 | [config\[1\]](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes/deeplabv3plus_r101-d8_769x769_80k_cityscapes_20220406_154720-dfcc0b68.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes/deeplabv3plus_r101-d8_769x769_80k_cityscapes_20220406_154720.log.json) | -| DeepLabV3+ | R-101-D16-MG124 | 512x1024 | 40000 | 5.8 | 7.48 | 79.09 | 80.36 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/ddeeplabv3plus_r101-d16-mg124_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes_20200908_005644-cf9ce186.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes-20200908_005644.log.json) | -| DeepLabV3+ | R-101-D16-MG124 | 512x1024 | 80000 | 9.9 | - | 79.90 | 81.33 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes_20200908_005644-ee6158e0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes-20200908_005644.log.json) | -| DeepLabV3+ | R-18b-D8 | 512x1024 | 80000 | 2.1 | 14.95 | 75.87 | 77.52 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r18b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes_20201226_090828-e451abd9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes-20201226_090828.log.json) | -| DeepLabV3+ | R-50b-D8 | 512x1024 | 80000 | 7.4 | 3.94 | 80.28 | 81.44 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r50b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes_20201225_213645-a97e4e43.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes-20201225_213645.log.json) | -| DeepLabV3+ | R-101b-D8 | 512x1024 | 80000 | 10.9 | 2.60 | 80.16 | 81.41 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r101b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes_20201226_190843-9c3c93a4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes-20201226_190843.log.json) | -| DeepLabV3+ | R-18b-D8 | 769x769 | 80000 | 2.4 | 5.96 | 76.36 | 78.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r18b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_769x769_80k_cityscapes/deeplabv3plus_r18b-d8_769x769_80k_cityscapes_20201226_151312-2c868aff.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_769x769_80k_cityscapes/deeplabv3plus_r18b-d8_769x769_80k_cityscapes-20201226_151312.log.json) | -| DeepLabV3+ | R-50b-D8 | 769x769 | 80000 | 8.4 | 1.72 | 79.41 | 80.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r50b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_769x769_80k_cityscapes/deeplabv3plus_r50b-d8_769x769_80k_cityscapes_20201225_224655-8b596d1c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_769x769_80k_cityscapes/deeplabv3plus_r50b-d8_769x769_80k_cityscapes-20201225_224655.log.json) | -| DeepLabV3+ | R-101b-D8 | 769x769 | 80000 | 12.3 | 1.10 | 79.88 | 81.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r101b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_769x769_80k_cityscapes/deeplabv3plus_r101b-d8_769x769_80k_cityscapes_20201226_205041-227cdf7c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_769x769_80k_cityscapes/deeplabv3plus_r101b-d8_769x769_80k_cityscapes-20201226_205041.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ----------------- | --------------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| DeepLabV3+ | R-50-D8 | 512x1024 | 40000 | 7.5 | 3.94 | V100 | 79.61 | 81.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes/deeplabv3plus_r50-d8_512x1024_40k_cityscapes_20200605_094610-d222ffcd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes/deeplabv3plus_r50-d8_512x1024_40k_cityscapes_20200605_094610.log.json) | +| DeepLabV3+ | R-101-D8 | 512x1024 | 40000 | 11 | 2.60 | V100 | 80.21 | 81.82 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes/deeplabv3plus_r101-d8_512x1024_40k_cityscapes_20200605_094614-3769eecf.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes/deeplabv3plus_r101-d8_512x1024_40k_cityscapes_20200605_094614.log.json) | +| DeepLabV3+ | R-50-D8 | 769x769 | 40000 | 8.5 | 1.72 | V100 | 78.97 | 80.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes/deeplabv3plus_r50-d8_769x769_40k_cityscapes_20200606_114143-1dcb0e3c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes/deeplabv3plus_r50-d8_769x769_40k_cityscapes_20200606_114143.log.json) | +| DeepLabV3+ | R-101-D8 | 769x769 | 40000 | 12.5 | 1.15 | V100 | 79.46 | 80.50 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes/deeplabv3plus_r101-d8_769x769_40k_cityscapes_20200606_114304-ff414b9e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes/deeplabv3plus_r101-d8_769x769_40k_cityscapes_20200606_114304.log.json) | +| DeepLabV3+ | R-18-D8 | 512x1024 | 80000 | 2.2 | 14.27 | V100 | 76.89 | 78.76 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes/deeplabv3plus_r18-d8_512x1024_80k_cityscapes_20201226_080942-cff257fe.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes/deeplabv3plus_r18-d8_512x1024_80k_cityscapes-20201226_080942.log.json) | +| DeepLabV3+ | R-50-D8 | 512x1024 | 80000 | - | - | V100 | 80.09 | 81.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes/deeplabv3plus_r50-d8_512x1024_80k_cityscapes_20200606_114049-f9fb496d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes/deeplabv3plus_r50-d8_512x1024_80k_cityscapes_20200606_114049.log.json) | +| DeepLabV3+ | R-101-D8 | 512x1024 | 80000 | - | - | V100 | 80.97 | 82.03 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_cityscapes_20200606_114143-068fcfe9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_cityscapes_20200606_114143.log.json) | +| DeepLabV3+ (FP16) | R-101-D8 | 512x1024 | 80000 | 6.35 | 7.87 | V100 | 80.46 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920-f1104f4b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920.log.json) | +| DeepLabV3+ | R-18-D8 | 769x769 | 80000 | 2.5 | 5.74 | V100 | 76.26 | 77.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_769x769_80k_cityscapes/deeplabv3plus_r18-d8_769x769_80k_cityscapes_20201226_083346-f326e06a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_769x769_80k_cityscapes/deeplabv3plus_r18-d8_769x769_80k_cityscapes-20201226_083346.log.json) | +| DeepLabV3+ | R-50-D8 | 769x769 | 80000 | - | - | V100 | 79.83 | 81.48 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes/deeplabv3plus_r50-d8_769x769_80k_cityscapes_20200606_210233-0e9dfdc4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes/deeplabv3plus_r50-d8_769x769_80k_cityscapes_20200606_210233.log.json) | +| DeepLabV3+ | R-101-D8 | 769x769 | 80000 | - | - | V100 | 80.65 | 81.47 | [config\[1\]](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes/deeplabv3plus_r101-d8_769x769_80k_cityscapes_20220406_154720-dfcc0b68.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes/deeplabv3plus_r101-d8_769x769_80k_cityscapes_20220406_154720.log.json) | +| DeepLabV3+ | R-101-D16-MG124 | 512x1024 | 40000 | 5.8 | 7.48 | V100 | 79.09 | 80.36 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/ddeeplabv3plus_r101-d16-mg124_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes_20200908_005644-cf9ce186.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes-20200908_005644.log.json) | +| DeepLabV3+ | R-101-D16-MG124 | 512x1024 | 80000 | 9.9 | - | V100 | 79.90 | 81.33 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes_20200908_005644-ee6158e0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes-20200908_005644.log.json) | +| DeepLabV3+ | R-18b-D8 | 512x1024 | 80000 | 2.1 | 14.95 | V100 | 75.87 | 77.52 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r18b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes_20201226_090828-e451abd9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes-20201226_090828.log.json) | +| DeepLabV3+ | R-50b-D8 | 512x1024 | 80000 | 7.4 | 3.94 | V100 | 80.28 | 81.44 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes_20201225_213645-a97e4e43.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes-20201225_213645.log.json) | +| DeepLabV3+ | R-101b-D8 | 512x1024 | 80000 | 10.9 | 2.60 | V100 | 80.16 | 81.41 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes_20201226_190843-9c3c93a4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes-20201226_190843.log.json) | +| DeepLabV3+ | R-18b-D8 | 769x769 | 80000 | 2.4 | 5.96 | V100 | 76.36 | 78.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r18b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_769x769_80k_cityscapes/deeplabv3plus_r18b-d8_769x769_80k_cityscapes_20201226_151312-2c868aff.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_769x769_80k_cityscapes/deeplabv3plus_r18b-d8_769x769_80k_cityscapes-20201226_151312.log.json) | +| DeepLabV3+ | R-50b-D8 | 769x769 | 80000 | 8.4 | 1.72 | V100 | 79.41 | 80.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_769x769_80k_cityscapes/deeplabv3plus_r50b-d8_769x769_80k_cityscapes_20201225_224655-8b596d1c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_769x769_80k_cityscapes/deeplabv3plus_r50b-d8_769x769_80k_cityscapes-20201225_224655.log.json) | +| DeepLabV3+ | R-101b-D8 | 769x769 | 80000 | 12.3 | 1.10 | V100 | 79.88 | 81.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_769x769_80k_cityscapes/deeplabv3plus_r101b-d8_769x769_80k_cityscapes_20201226_205041-227cdf7c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_769x769_80k_cityscapes/deeplabv3plus_r101b-d8_769x769_80k_cityscapes-20201226_205041.log.json) | \[1\] The training of the model is sensitive to random seed, and the seed to train it is 1111. ### ADE20K -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ---------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| DeepLabV3+ | R-50-D8 | 512x512 | 80000 | 10.6 | 21.01 | 42.72 | 43.75 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k/deeplabv3plus_r50-d8_512x512_80k_ade20k_20200614_185028-bf1400d8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k/deeplabv3plus_r50-d8_512x512_80k_ade20k_20200614_185028.log.json) | -| DeepLabV3+ | R-101-D8 | 512x512 | 80000 | 14.1 | 14.16 | 44.60 | 46.06 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k/deeplabv3plus_r101-d8_512x512_80k_ade20k_20200615_014139-d5730af7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k/deeplabv3plus_r101-d8_512x512_80k_ade20k_20200615_014139.log.json) | -| DeepLabV3+ | R-50-D8 | 512x512 | 160000 | - | - | 43.95 | 44.93 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k/deeplabv3plus_r50-d8_512x512_160k_ade20k_20200615_124504-6135c7e0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k/deeplabv3plus_r50-d8_512x512_160k_ade20k_20200615_124504.log.json) | -| DeepLabV3+ | R-101-D8 | 512x512 | 160000 | - | - | 45.47 | 46.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232-38ed86bb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3+ | R-50-D8 | 512x512 | 80000 | 10.6 | 21.01 | V100 | 42.72 | 43.75 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k/deeplabv3plus_r50-d8_512x512_80k_ade20k_20200614_185028-bf1400d8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k/deeplabv3plus_r50-d8_512x512_80k_ade20k_20200614_185028.log.json) | +| DeepLabV3+ | R-101-D8 | 512x512 | 80000 | 14.1 | 14.16 | V100 | 44.60 | 46.06 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k/deeplabv3plus_r101-d8_512x512_80k_ade20k_20200615_014139-d5730af7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k/deeplabv3plus_r101-d8_512x512_80k_ade20k_20200615_014139.log.json) | +| DeepLabV3+ | R-50-D8 | 512x512 | 160000 | - | - | V100 | 43.95 | 44.93 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k/deeplabv3plus_r50-d8_512x512_160k_ade20k_20200615_124504-6135c7e0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k/deeplabv3plus_r50-d8_512x512_160k_ade20k_20200615_124504.log.json) | +| DeepLabV3+ | R-101-D8 | 512x512 | 160000 | - | - | V100 | 45.47 | 46.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232-38ed86bb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232.log.json) | ### Pascal VOC 2012 + Aug -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ---------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| DeepLabV3+ | R-50-D8 | 512x512 | 20000 | 7.6 | 21 | 75.93 | 77.50 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug/deeplabv3plus_r50-d8_512x512_20k_voc12aug_20200617_102323-aad58ef1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug/deeplabv3plus_r50-d8_512x512_20k_voc12aug_20200617_102323.log.json) | -| DeepLabV3+ | R-101-D8 | 512x512 | 20000 | 11 | 13.88 | 77.22 | 78.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug/deeplabv3plus_r101-d8_512x512_20k_voc12aug_20200617_102345-c7ff3d56.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug/deeplabv3plus_r101-d8_512x512_20k_voc12aug_20200617_102345.log.json) | -| DeepLabV3+ | R-50-D8 | 512x512 | 40000 | - | - | 76.81 | 77.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug/deeplabv3plus_r50-d8_512x512_40k_voc12aug_20200613_161759-e1b43aa9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug/deeplabv3plus_r50-d8_512x512_40k_voc12aug_20200613_161759.log.json) | -| DeepLabV3+ | R-101-D8 | 512x512 | 40000 | - | - | 78.62 | 79.53 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug/deeplabv3plus_r101-d8_512x512_40k_voc12aug_20200613_205333-faf03387.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug/deeplabv3plus_r101-d8_512x512_40k_voc12aug_20200613_205333.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3+ | R-50-D8 | 512x512 | 20000 | 7.6 | 21 | V100 | 75.93 | 77.50 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug/deeplabv3plus_r50-d8_512x512_20k_voc12aug_20200617_102323-aad58ef1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug/deeplabv3plus_r50-d8_512x512_20k_voc12aug_20200617_102323.log.json) | +| DeepLabV3+ | R-101-D8 | 512x512 | 20000 | 11 | 13.88 | V100 | 77.22 | 78.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug/deeplabv3plus_r101-d8_512x512_20k_voc12aug_20200617_102345-c7ff3d56.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug/deeplabv3plus_r101-d8_512x512_20k_voc12aug_20200617_102345.log.json) | +| DeepLabV3+ | R-50-D8 | 512x512 | 40000 | - | - | V100 | 76.81 | 77.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug/deeplabv3plus_r50-d8_512x512_40k_voc12aug_20200613_161759-e1b43aa9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug/deeplabv3plus_r50-d8_512x512_40k_voc12aug_20200613_161759.log.json) | +| DeepLabV3+ | R-101-D8 | 512x512 | 40000 | - | - | V100 | 78.62 | 79.53 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug/deeplabv3plus_r101-d8_512x512_40k_voc12aug_20200613_205333-faf03387.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug/deeplabv3plus_r101-d8_512x512_40k_voc12aug_20200613_205333.log.json) | ### Pascal Context -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ---------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| DeepLabV3+ | R-101-D8 | 480x480 | 40000 | - | 9.09 | 47.30 | 48.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-40k_pascal-context-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context/deeplabv3plus_r101-d8_480x480_40k_pascal_context_20200911_165459-d3c8a29e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context/deeplabv3plus_r101-d8_480x480_40k_pascal_context-20200911_165459.log.json) | -| DeepLabV3+ | R-101-D8 | 480x480 | 80000 | - | - | 47.23 | 48.26 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_pascal-context-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context/deeplabv3plus_r101-d8_480x480_80k_pascal_context_20200911_155322-145d3ee8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context/deeplabv3plus_r101-d8_480x480_80k_pascal_context-20200911_155322.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3+ | R-101-D8 | 480x480 | 40000 | - | 9.09 | V100 | 47.30 | 48.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-40k_pascal-context-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context/deeplabv3plus_r101-d8_480x480_40k_pascal_context_20200911_165459-d3c8a29e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context/deeplabv3plus_r101-d8_480x480_40k_pascal_context-20200911_165459.log.json) | +| DeepLabV3+ | R-101-D8 | 480x480 | 80000 | - | - | V100 | 47.23 | 48.26 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_pascal-context-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context/deeplabv3plus_r101-d8_480x480_80k_pascal_context_20200911_155322-145d3ee8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context/deeplabv3plus_r101-d8_480x480_80k_pascal_context-20200911_155322.log.json) | ### Pascal Context 59 -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ---------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| DeepLabV3+ | R-101-D8 | 480x480 | 40000 | - | - | 52.86 | 54.54 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-40k_pascal-context-59-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59_20210416_111233-ed937f15.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59-20210416_111233.log.json) | -| DeepLabV3+ | R-101-D8 | 480x480 | 80000 | - | - | 53.2 | 54.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_pascal-context-59-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59_20210416_111127-7ca0331d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59-20210416_111127.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3+ | R-101-D8 | 480x480 | 40000 | - | - | V100 | 52.86 | 54.54 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-40k_pascal-context-59-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59_20210416_111233-ed937f15.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59-20210416_111233.log.json) | +| DeepLabV3+ | R-101-D8 | 480x480 | 80000 | - | - | V100 | 53.2 | 54.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_pascal-context-59-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59_20210416_111127-7ca0331d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59-20210416_111127.log.json) | ### LoveDA -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ---------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| DeepLabV3+ | R-18-D8 | 512x512 | 80000 | 1.93 | 25.57 | 50.28 | 50.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_loveda-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_loveda/deeplabv3plus_r18-d8_512x512_80k_loveda_20211104_132800-ce0fa0ca.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_loveda/deeplabv3plus_r18-d8_512x512_80k_loveda_20211104_132800.log.json) | -| DeepLabV3+ | R-50-D8 | 512x512 | 80000 | 7.37 | 6.00 | 50.99 | 50.65 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_loveda-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_loveda/deeplabv3plus_r50-d8_512x512_80k_loveda_20211105_080442-f0720392.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_loveda/deeplabv3plus_r50-d8_512x512_80k_loveda_20211105_080442.log.json) | -| DeepLabV3+ | R-101-D8 | 512x512 | 80000 | 10.84 | 4.33 | 51.47 | 51.32 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_loveda-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_loveda/deeplabv3plus_r101-d8_512x512_80k_loveda_20211105_110759-4c1f297e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_loveda/deeplabv3plus_r101-d8_512x512_80k_loveda_20211105_110759.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| DeepLabV3+ | R-18-D8 | 512x512 | 80000 | 1.93 | 25.57 | V100 | 50.28 | 50.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_loveda-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_loveda/deeplabv3plus_r18-d8_512x512_80k_loveda_20211104_132800-ce0fa0ca.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_loveda/deeplabv3plus_r18-d8_512x512_80k_loveda_20211104_132800.log.json) | +| DeepLabV3+ | R-50-D8 | 512x512 | 80000 | 7.37 | 6.00 | V100 | 50.99 | 50.65 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_loveda-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_loveda/deeplabv3plus_r50-d8_512x512_80k_loveda_20211105_080442-f0720392.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_loveda/deeplabv3plus_r50-d8_512x512_80k_loveda_20211105_080442.log.json) | +| DeepLabV3+ | R-101-D8 | 512x512 | 80000 | 10.84 | 4.33 | V100 | 51.47 | 51.32 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_loveda-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_loveda/deeplabv3plus_r101-d8_512x512_80k_loveda_20211105_110759-4c1f297e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_loveda/deeplabv3plus_r101-d8_512x512_80k_loveda_20211105_110759.log.json) | ### Potsdam -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ---------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| DeepLabV3+ | R-18-D8 | 512x512 | 80000 | 1.91 | 81.68 | 77.09 | 78.44 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_potsdam-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_potsdam/deeplabv3plus_r18-d8_512x512_80k_potsdam_20211219_020601-75fd5bc3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_potsdam/deeplabv3plus_r18-d8_512x512_80k_potsdam_20211219_020601.log.json) | -| DeepLabV3+ | R-50-D8 | 512x512 | 80000 | 7.36 | 26.44 | 78.33 | 79.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_potsdam-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_potsdam/deeplabv3plus_r50-d8_512x512_80k_potsdam_20211219_031508-7e7a2b24.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_potsdam/deeplabv3plus_r50-d8_512x512_80k_potsdam_20211219_031508.log.json) | -| DeepLabV3+ | R-101-D8 | 512x512 | 80000 | 10.83 | 17.56 | 78.7 | 79.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_potsdam-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_potsdam/deeplabv3plus_r101-d8_512x512_80k_potsdam_20211219_031508-8b112708.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_potsdam/deeplabv3plus_r101-d8_512x512_80k_potsdam_20211219_031508.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3+ | R-18-D8 | 512x512 | 80000 | 1.91 | 81.68 | V100 | 77.09 | 78.44 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_potsdam-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_potsdam/deeplabv3plus_r18-d8_512x512_80k_potsdam_20211219_020601-75fd5bc3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_potsdam/deeplabv3plus_r18-d8_512x512_80k_potsdam_20211219_020601.log.json) | +| DeepLabV3+ | R-50-D8 | 512x512 | 80000 | 7.36 | 26.44 | V100 | 78.33 | 79.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_potsdam-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_potsdam/deeplabv3plus_r50-d8_512x512_80k_potsdam_20211219_031508-7e7a2b24.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_potsdam/deeplabv3plus_r50-d8_512x512_80k_potsdam_20211219_031508.log.json) | +| DeepLabV3+ | R-101-D8 | 512x512 | 80000 | 10.83 | 17.56 | V100 | 78.7 | 79.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_potsdam-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_potsdam/deeplabv3plus_r101-d8_512x512_80k_potsdam_20211219_031508-8b112708.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_potsdam/deeplabv3plus_r101-d8_512x512_80k_potsdam_20211219_031508.log.json) | ### Vaihingen -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ---------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| DeepLabV3+ | R-18-D8 | 512x512 | 80000 | 1.91 | 72.79 | 72.50 | 74.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_vaihingen-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen_20211231_230805-7626a263.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen_20211231_230805.log.json) | -| DeepLabV3+ | R-50-D8 | 512x512 | 80000 | 7.36 | 26.91 | 73.97 | 75.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_vaihingen-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen_20211231_230816-5040938d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen_20211231_230816.log.json) | -| DeepLabV3+ | R-101-D8 | 512x512 | 80000 | 10.83 | 18.59 | 73.06 | 74.14 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_vaihingen-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen_20211231_230816-8a095afa.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen_20211231_230816.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3+ | R-18-D8 | 512x512 | 80000 | 1.91 | 72.79 | V100 | 72.50 | 74.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_vaihingen-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen_20211231_230805-7626a263.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen_20211231_230805.log.json) | +| DeepLabV3+ | R-50-D8 | 512x512 | 80000 | 7.36 | 26.91 | V100 | 73.97 | 75.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_vaihingen-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen_20211231_230816-5040938d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen_20211231_230816.log.json) | +| DeepLabV3+ | R-101-D8 | 512x512 | 80000 | 10.83 | 18.59 | V100 | 73.06 | 74.14 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_vaihingen-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen_20211231_230816-8a095afa.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen_20211231_230816.log.json) | ### iSAID -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ---------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| DeepLabV3+ | R-18-D8 | 896x896 | 80000 | 6.19 | 24.81 | 61.35 | 62.61 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_isaid-896x896.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid_20220110_180526-7059991d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid_20220110_180526.log.json) | -| DeepLabV3+ | R-50-D8 | 896x896 | 80000 | 21.45 | 8.42 | 67.06 | 68.02 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_isaid-896x896.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid_20220110_180526-598be439.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid_20220110_180526.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3+ | R-18-D8 | 896x896 | 80000 | 6.19 | 24.81 | V100 | 61.35 | 62.61 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_isaid-896x896.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid_20220110_180526-7059991d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid_20220110_180526.log.json) | +| DeepLabV3+ | R-50-D8 | 896x896 | 80000 | 21.45 | 8.42 | V100 | 67.06 | 68.02 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_isaid-896x896.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid_20220110_180526-598be439.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid_20220110_180526.log.json) | + +### Mapillary Vistas v1.2 + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3+ | R-50-D8 | 1280x1280 | 300000 | 24.04 | 17.92 | A100 | 47.35 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-300k_mapillay_v1_65-1280x1280.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-300k_mapillay_v1_65-1280x1280/deeplabv3plus_r50-d8_4xb2-300k_mapillay_v1_65-1280x1280_20230301_110504-655f8e43.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-300k_mapillay_v1_65-1280x1280/deeplabv3plus_r50-d8_4xb2-300k_mapillay_v1_65-1280x1280_20230301_110504.json) | Note: @@ -130,3 +125,14 @@ Note: - `MG-124` stands for multi-grid dilation in the last stage of ResNet. - `FP16` means Mixed Precision (FP16) is adopted in training. - `896x896` is the Crop Size of iSAID dataset, which is followed by the implementation of [PointFlow: Flowing Semantics Through Points for Aerial Image Segmentation](https://arxiv.org/pdf/2103.06564.pdf) + +## Citation + +```bibtex +@inproceedings{deeplabv3plus2018, + title={Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation}, + author={Liang-Chieh Chen and Yukun Zhu and George Papandreou and Florian Schroff and Hartwig Adam}, + booktitle={ECCV}, + year={2018} +} +``` diff --git a/configs/deeplabv3plus/deeplabv3plus.yml b/configs/deeplabv3plus/deeplabv3plus.yml deleted file mode 100644 index 755c1fd4be..0000000000 --- a/configs/deeplabv3plus/deeplabv3plus.yml +++ /dev/null @@ -1,850 +0,0 @@ -Collections: -- Name: DeepLabV3+ - Metadata: - Training Data: - - Cityscapes - - ADE20K - - Pascal VOC 2012 + Aug - - Pascal Context - - Pascal Context 59 - - LoveDA - - Potsdam - - Vaihingen - - iSAID - Paper: - URL: https://arxiv.org/abs/1802.02611 - Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation - README: configs/deeplabv3plus/README.md - Code: - URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 - Version: v0.17.0 - Converted From: - Code: https://github.com/tensorflow/models/tree/master/research/deeplab -Models: -- Name: deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024 - In Collection: DeepLabV3+ - Metadata: - backbone: R-50-D8 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 253.81 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 7.5 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.61 - mIoU(ms+flip): 81.01 - Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes/deeplabv3plus_r50-d8_512x1024_40k_cityscapes_20200605_094610-d222ffcd.pth -- Name: deeplabv3plus_r101-d8_4xb2-40k_cityscapes-512x1024 - In Collection: DeepLabV3+ - Metadata: - backbone: R-101-D8 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 384.62 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 11.0 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 80.21 - mIoU(ms+flip): 81.82 - Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes/deeplabv3plus_r101-d8_512x1024_40k_cityscapes_20200605_094614-3769eecf.pth -- Name: deeplabv3plus_r50-d8_4xb2-40k_cityscapes-769x769 - In Collection: DeepLabV3+ - Metadata: - backbone: R-50-D8 - crop size: (769,769) - lr schd: 40000 - inference time (ms/im): - - value: 581.4 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 8.5 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.97 - mIoU(ms+flip): 80.46 - Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes/deeplabv3plus_r50-d8_769x769_40k_cityscapes_20200606_114143-1dcb0e3c.pth -- Name: deeplabv3plus_r101-d8_4xb2-40k_cityscapes-769x769 - In Collection: DeepLabV3+ - Metadata: - backbone: R-101-D8 - crop size: (769,769) - lr schd: 40000 - inference time (ms/im): - - value: 869.57 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 12.5 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.46 - mIoU(ms+flip): 80.5 - Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-40k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes/deeplabv3plus_r101-d8_769x769_40k_cityscapes_20200606_114304-ff414b9e.pth -- Name: deeplabv3plus_r18-d8_4xb2-80k_cityscapes-512x1024 - In Collection: DeepLabV3+ - Metadata: - backbone: R-18-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 70.08 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 2.2 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 76.89 - mIoU(ms+flip): 78.76 - Config: configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes/deeplabv3plus_r18-d8_512x1024_80k_cityscapes_20201226_080942-cff257fe.pth -- Name: deeplabv3plus_r50-d8_4xb2-80k_cityscapes-512x1024 - In Collection: DeepLabV3+ - Metadata: - backbone: R-50-D8 - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 80.09 - mIoU(ms+flip): 81.13 - Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes/deeplabv3plus_r50-d8_512x1024_80k_cityscapes_20200606_114049-f9fb496d.pth -- Name: deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024 - In Collection: DeepLabV3+ - Metadata: - backbone: R-101-D8 - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 80.97 - mIoU(ms+flip): 82.03 - Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_cityscapes_20200606_114143-068fcfe9.pth -- Name: deeplabv3plus_r101-d8_4xb2-amp-80k_cityscapes-512x1024 - In Collection: DeepLabV3+ - Metadata: - backbone: R-101-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 127.06 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: AMP - resolution: (512,1024) - Training Memory (GB): 6.35 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 80.46 - Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920-f1104f4b.pth -- Name: deeplabv3plus_r18-d8_4xb2-80k_cityscapes-769x769 - In Collection: DeepLabV3+ - Metadata: - backbone: R-18-D8 - crop size: (769,769) - lr schd: 80000 - inference time (ms/im): - - value: 174.22 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 2.5 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 76.26 - mIoU(ms+flip): 77.91 - Config: configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_769x769_80k_cityscapes/deeplabv3plus_r18-d8_769x769_80k_cityscapes_20201226_083346-f326e06a.pth -- Name: deeplabv3plus_r50-d8_4xb2-80k_cityscapes-769x769 - In Collection: DeepLabV3+ - Metadata: - backbone: R-50-D8 - crop size: (769,769) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.83 - mIoU(ms+flip): 81.48 - Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes/deeplabv3plus_r50-d8_769x769_80k_cityscapes_20200606_210233-0e9dfdc4.pth -- Name: deeplabv3plus_r101-d8_4xb2-80k_cityscapes-769x769 - In Collection: DeepLabV3+ - Metadata: - backbone: R-101-D8 - crop size: (769,769) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 80.65 - mIoU(ms+flip): 81.47 - Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes/deeplabv3plus_r101-d8_769x769_80k_cityscapes_20220406_154720-dfcc0b68.pth -- Name: deeplabv3plus_r101-d16-mg124_4xb2-40k_cityscapes-512x1024 - In Collection: DeepLabV3+ - Metadata: - backbone: R-101-D16-MG124 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 133.69 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 5.8 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.09 - mIoU(ms+flip): 80.36 - Config: configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes_20200908_005644-cf9ce186.pth -- Name: deeplabv3plus_r101-d16-mg124_4xb2-80k_cityscapes-512x1024 - In Collection: DeepLabV3+ - Metadata: - backbone: R-101-D16-MG124 - crop size: (512,1024) - lr schd: 80000 - Training Memory (GB): 9.9 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.9 - mIoU(ms+flip): 81.33 - Config: configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes_20200908_005644-ee6158e0.pth -- Name: deeplabv3plus_r18b-d8_4xb2-80k_cityscapes-512x1024 - In Collection: DeepLabV3+ - Metadata: - backbone: R-18b-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 66.89 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 2.1 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 75.87 - mIoU(ms+flip): 77.52 - Config: configs/deeplabv3plus/deeplabv3plus_r18b-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes_20201226_090828-e451abd9.pth -- Name: deeplabv3plus_r50b-d8_4xb2-80k_cityscapes-512x1024 - In Collection: DeepLabV3+ - Metadata: - backbone: R-50b-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 253.81 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 7.4 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 80.28 - mIoU(ms+flip): 81.44 - Config: configs/deeplabv3plus/deeplabv3plus_r50b-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes_20201225_213645-a97e4e43.pth -- Name: deeplabv3plus_r101b-d8_4xb2-80k_cityscapes-512x1024 - In Collection: DeepLabV3+ - Metadata: - backbone: R-101b-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 384.62 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 10.9 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 80.16 - mIoU(ms+flip): 81.41 - Config: configs/deeplabv3plus/deeplabv3plus_r101b-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes_20201226_190843-9c3c93a4.pth -- Name: deeplabv3plus_r18b-d8_4xb2-80k_cityscapes-769x769 - In Collection: DeepLabV3+ - Metadata: - backbone: R-18b-D8 - crop size: (769,769) - lr schd: 80000 - inference time (ms/im): - - value: 167.79 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 2.4 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 76.36 - mIoU(ms+flip): 78.24 - Config: configs/deeplabv3plus/deeplabv3plus_r18b-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_769x769_80k_cityscapes/deeplabv3plus_r18b-d8_769x769_80k_cityscapes_20201226_151312-2c868aff.pth -- Name: deeplabv3plus_r50b-d8_4xb2-80k_cityscapes-769x769 - In Collection: DeepLabV3+ - Metadata: - backbone: R-50b-D8 - crop size: (769,769) - lr schd: 80000 - inference time (ms/im): - - value: 581.4 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 8.4 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.41 - mIoU(ms+flip): 80.56 - Config: configs/deeplabv3plus/deeplabv3plus_r50b-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_769x769_80k_cityscapes/deeplabv3plus_r50b-d8_769x769_80k_cityscapes_20201225_224655-8b596d1c.pth -- Name: deeplabv3plus_r101b-d8_4xb2-80k_cityscapes-769x769 - In Collection: DeepLabV3+ - Metadata: - backbone: R-101b-D8 - crop size: (769,769) - lr schd: 80000 - inference time (ms/im): - - value: 909.09 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 12.3 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.88 - mIoU(ms+flip): 81.46 - Config: configs/deeplabv3plus/deeplabv3plus_r101b-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_769x769_80k_cityscapes/deeplabv3plus_r101b-d8_769x769_80k_cityscapes_20201226_205041-227cdf7c.pth -- Name: deeplabv3plus_r50-d8_4xb4-80k_ade20k-512x512 - In Collection: DeepLabV3+ - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 47.6 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 10.6 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 42.72 - mIoU(ms+flip): 43.75 - Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k/deeplabv3plus_r50-d8_512x512_80k_ade20k_20200614_185028-bf1400d8.pth -- Name: deeplabv3plus_r101-d8_4xb4-160k_ade20k-512x512 - In Collection: DeepLabV3+ - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 70.62 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 14.1 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 44.6 - mIoU(ms+flip): 46.06 - Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k/deeplabv3plus_r101-d8_512x512_80k_ade20k_20200615_014139-d5730af7.pth -- Name: deeplabv3plus_r50-d8_4xb4-160k_ade20k-512x512 - In Collection: DeepLabV3+ - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 43.95 - mIoU(ms+flip): 44.93 - Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k/deeplabv3plus_r50-d8_512x512_160k_ade20k_20200615_124504-6135c7e0.pth -- Name: deeplabv3plus_r101-d8_4xb4-160k_ade20k-512x512 - In Collection: DeepLabV3+ - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 45.47 - mIoU(ms+flip): 46.35 - Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232-38ed86bb.pth -- Name: deeplabv3plus_r50-d8_4xb4-20k_voc12aug-512x512 - In Collection: DeepLabV3+ - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 20000 - inference time (ms/im): - - value: 47.62 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 7.6 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 75.93 - mIoU(ms+flip): 77.5 - Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-20k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug/deeplabv3plus_r50-d8_512x512_20k_voc12aug_20200617_102323-aad58ef1.pth -- Name: deeplabv3plus_r101-d8_4xb4-20k_voc12aug-512x512 - In Collection: DeepLabV3+ - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 20000 - inference time (ms/im): - - value: 72.05 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 11.0 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 77.22 - mIoU(ms+flip): 78.59 - Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-20k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug/deeplabv3plus_r101-d8_512x512_20k_voc12aug_20200617_102345-c7ff3d56.pth -- Name: deeplabv3plus_r50-d8_4xb4-40k_voc12aug-512x512 - In Collection: DeepLabV3+ - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 76.81 - mIoU(ms+flip): 77.57 - Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-40k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug/deeplabv3plus_r50-d8_512x512_40k_voc12aug_20200613_161759-e1b43aa9.pth -- Name: deeplabv3plus_r101-d8_4xb4-40k_voc12aug-512x512 - In Collection: DeepLabV3+ - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 78.62 - mIoU(ms+flip): 79.53 - Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-40k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug/deeplabv3plus_r101-d8_512x512_40k_voc12aug_20200613_205333-faf03387.pth -- Name: deeplabv3plus_r50-d8_4xb4-40k_pascal-context-480x480 - In Collection: DeepLabV3+ - Metadata: - backbone: R-101-D8 - crop size: (480,480) - lr schd: 40000 - inference time (ms/im): - - value: 110.01 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (480,480) - Results: - - Task: Semantic Segmentation - Dataset: Pascal Context - Metrics: - mIoU: 47.3 - mIoU(ms+flip): 48.47 - Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-40k_pascal-context-480x480.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context/deeplabv3plus_r101-d8_480x480_40k_pascal_context_20200911_165459-d3c8a29e.pth -- Name: deeplabv3plus_r50-d8_4xb4-80k_pascal-context-480x480 - In Collection: DeepLabV3+ - Metadata: - backbone: R-101-D8 - crop size: (480,480) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal Context - Metrics: - mIoU: 47.23 - mIoU(ms+flip): 48.26 - Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_pascal-context-480x480.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context/deeplabv3plus_r101-d8_480x480_80k_pascal_context_20200911_155322-145d3ee8.pth -- Name: deeplabv3plus_r101-d8_4xb4-40k_pascal-context-59-480x480 - In Collection: DeepLabV3+ - Metadata: - backbone: R-101-D8 - crop size: (480,480) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal Context 59 - Metrics: - mIoU: 52.86 - mIoU(ms+flip): 54.54 - Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-40k_pascal-context-59-480x480.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59_20210416_111233-ed937f15.pth -- Name: deeplabv3plus_r101-d8_4xb4-80k_pascal-context-59-480x480 - In Collection: DeepLabV3+ - Metadata: - backbone: R-101-D8 - crop size: (480,480) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal Context 59 - Metrics: - mIoU: 53.2 - mIoU(ms+flip): 54.67 - Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_pascal-context-59-480x480.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59_20210416_111127-7ca0331d.pth -- Name: deeplabv3plus_r18-d8_4xb4-80k_loveda-512x512 - In Collection: DeepLabV3+ - Metadata: - backbone: R-18-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 39.11 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 1.93 - Results: - - Task: Semantic Segmentation - Dataset: LoveDA - Metrics: - mIoU: 50.28 - mIoU(ms+flip): 50.47 - Config: configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_loveda-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_loveda/deeplabv3plus_r18-d8_512x512_80k_loveda_20211104_132800-ce0fa0ca.pth -- Name: deeplabv3plus_r50-d8_4xb4-80k_loveda-512x512 - In Collection: DeepLabV3+ - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 166.67 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 7.37 - Results: - - Task: Semantic Segmentation - Dataset: LoveDA - Metrics: - mIoU: 50.99 - mIoU(ms+flip): 50.65 - Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_loveda-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_loveda/deeplabv3plus_r50-d8_512x512_80k_loveda_20211105_080442-f0720392.pth -- Name: deeplabv3plus_r101-d8_4xb4-80k_loveda-512x512 - In Collection: DeepLabV3+ - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 230.95 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 10.84 - Results: - - Task: Semantic Segmentation - Dataset: LoveDA - Metrics: - mIoU: 51.47 - mIoU(ms+flip): 51.32 - Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_loveda-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_loveda/deeplabv3plus_r101-d8_512x512_80k_loveda_20211105_110759-4c1f297e.pth -- Name: deeplabv3plus_r18-d8_4xb4-80k_potsdam-512x512 - In Collection: DeepLabV3+ - Metadata: - backbone: R-18-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 12.24 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 1.91 - Results: - - Task: Semantic Segmentation - Dataset: Potsdam - Metrics: - mIoU: 77.09 - mIoU(ms+flip): 78.44 - Config: configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_potsdam-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_potsdam/deeplabv3plus_r18-d8_512x512_80k_potsdam_20211219_020601-75fd5bc3.pth -- Name: deeplabv3plus_r50-d8_4xb4-80k_potsdam-512x512 - In Collection: DeepLabV3+ - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 37.82 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 7.36 - Results: - - Task: Semantic Segmentation - Dataset: Potsdam - Metrics: - mIoU: 78.33 - mIoU(ms+flip): 79.27 - Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_potsdam-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_potsdam/deeplabv3plus_r50-d8_512x512_80k_potsdam_20211219_031508-7e7a2b24.pth -- Name: deeplabv3plus_r101-d8_4xb4-80k_potsdam-512x512 - In Collection: DeepLabV3+ - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 56.95 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 10.83 - Results: - - Task: Semantic Segmentation - Dataset: Potsdam - Metrics: - mIoU: 78.7 - mIoU(ms+flip): 79.47 - Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_potsdam-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_potsdam/deeplabv3plus_r101-d8_512x512_80k_potsdam_20211219_031508-8b112708.pth -- Name: deeplabv3plus_r18-d8_4xb4-80k_vaihingen-512x512 - In Collection: DeepLabV3+ - Metadata: - backbone: R-18-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 13.74 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 1.91 - Results: - - Task: Semantic Segmentation - Dataset: Vaihingen - Metrics: - mIoU: 72.5 - mIoU(ms+flip): 74.13 - Config: configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_vaihingen-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen_20211231_230805-7626a263.pth -- Name: deeplabv3plus_r50-d8_4xb4-80k_vaihingen-512x512 - In Collection: DeepLabV3+ - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 37.16 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 7.36 - Results: - - Task: Semantic Segmentation - Dataset: Vaihingen - Metrics: - mIoU: 73.97 - mIoU(ms+flip): 75.05 - Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_vaihingen-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen_20211231_230816-5040938d.pth -- Name: deeplabv3plus_r101-d8_4xb4-80k_vaihingen-512x512 - In Collection: DeepLabV3+ - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 53.79 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 10.83 - Results: - - Task: Semantic Segmentation - Dataset: Vaihingen - Metrics: - mIoU: 73.06 - mIoU(ms+flip): 74.14 - Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_vaihingen-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen_20211231_230816-8a095afa.pth -- Name: deeplabv3plus_r18-d8_4xb4-80k_isaid-896x896 - In Collection: DeepLabV3+ - Metadata: - backbone: R-18-D8 - crop size: (896,896) - lr schd: 80000 - inference time (ms/im): - - value: 40.31 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (896,896) - Training Memory (GB): 6.19 - Results: - - Task: Semantic Segmentation - Dataset: iSAID - Metrics: - mIoU: 61.35 - mIoU(ms+flip): 62.61 - Config: configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_isaid-896x896.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid_20220110_180526-7059991d.pth -- Name: deeplabv3plus_r50-d8_4xb4-80k_isaid-896x896 - In Collection: DeepLabV3+ - Metadata: - backbone: R-50-D8 - crop size: (896,896) - lr schd: 80000 - inference time (ms/im): - - value: 118.76 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (896,896) - Training Memory (GB): 21.45 - Results: - - Task: Semantic Segmentation - Dataset: iSAID - Metrics: - mIoU: 67.06 - mIoU(ms+flip): 68.02 - Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_isaid-896x896.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid_20220110_180526-598be439.pth diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-300k_mapillay_v1_65-1280x1280.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-300k_mapillay_v1_65-1280x1280.py new file mode 100644 index 0000000000..133c45ae1d --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-300k_mapillay_v1_65-1280x1280.py @@ -0,0 +1,58 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/mapillary_v1_65.py', + '../_base_/default_runtime.py', +] + +crop_size = (1280, 1280) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict(depth=50), + decode_head=dict(num_classes=65), + auxiliary_head=dict(num_classes=65)) + +iters = 300000 +# optimizer +optimizer = dict( + type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.0001) +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, + clip_grad=dict(max_norm=0.01, norm_type=2), + paramwise_cfg=dict( + custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)})) +param_scheduler = [ + dict( + type='PolyLR', + eta_min=0, + power=0.9, + begin=0, + end=iters, + by_epoch=False) +] + +# training schedule for 300k +train_cfg = dict( + type='IterBasedTrainLoop', max_iters=iters, val_interval=iters // 10) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') + +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict( + type='CheckpointHook', by_epoch=False, interval=iters // 10), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) + +train_dataloader = dict(batch_size=2) + +# Default setting for scaling LR automatically +# - `enable` means enable scaling LR automatically +# or not by default. +# - `base_batch_size` = (4 GPUs) x (2 samples per GPU). +auto_scale_lr = dict(enable=False, base_batch_size=8) diff --git a/configs/deeplabv3plus/metafile.yaml b/configs/deeplabv3plus/metafile.yaml new file mode 100644 index 0000000000..b41de4dee2 --- /dev/null +++ b/configs/deeplabv3plus/metafile.yaml @@ -0,0 +1,1041 @@ +Collections: +- Name: DeepLabV3+ + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + - Pascal Context + - Pascal Context 59 + - LoveDA + - Potsdam + - Vaihingen + - iSAID + - Mapillary Vistas v1.2 + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + README: configs/deeplabv3plus/README.md + Frameworks: + - PyTorch +Models: +- Name: deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.61 + mIoU(ms+flip): 81.01 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 7.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes/deeplabv3plus_r50-d8_512x1024_40k_cityscapes_20200605_094610-d222ffcd.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes/deeplabv3plus_r50-d8_512x1024_40k_cityscapes_20200605_094610.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101-d8_4xb2-40k_cityscapes-512x1024 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.21 + mIoU(ms+flip): 81.82 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 11.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes/deeplabv3plus_r101-d8_512x1024_40k_cityscapes_20200605_094614-3769eecf.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes/deeplabv3plus_r101-d8_512x1024_40k_cityscapes_20200605_094614.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50-d8_4xb2-40k_cityscapes-769x769 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.97 + mIoU(ms+flip): 80.46 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 8.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes/deeplabv3plus_r50-d8_769x769_40k_cityscapes_20200606_114143-1dcb0e3c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes/deeplabv3plus_r50-d8_769x769_40k_cityscapes_20200606_114143.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101-d8_4xb2-40k_cityscapes-769x769 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.46 + mIoU(ms+flip): 80.5 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 12.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes/deeplabv3plus_r101-d8_769x769_40k_cityscapes_20200606_114304-ff414b9e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes/deeplabv3plus_r101-d8_769x769_40k_cityscapes_20200606_114304.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r18-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.89 + mIoU(ms+flip): 78.76 + Config: configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 2.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes/deeplabv3plus_r18-d8_512x1024_80k_cityscapes_20201226_080942-cff257fe.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes/deeplabv3plus_r18-d8_512x1024_80k_cityscapes-20201226_080942.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.09 + mIoU(ms+flip): 81.13 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes/deeplabv3plus_r50-d8_512x1024_80k_cityscapes_20200606_114049-f9fb496d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes/deeplabv3plus_r50-d8_512x1024_80k_cityscapes_20200606_114049.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.97 + mIoU(ms+flip): 82.03 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_cityscapes_20200606_114143-068fcfe9.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_cityscapes_20200606_114143.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101-d8_4xb2-amp-80k_cityscapes-512x1024 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.46 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DeepLabV3+ + - (FP16) + Training Resources: 4x V100 GPUS + Memory (GB): 6.35 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920-f1104f4b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r18-d8_4xb2-80k_cityscapes-769x769 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.26 + mIoU(ms+flip): 77.91 + Config: configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 2.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_769x769_80k_cityscapes/deeplabv3plus_r18-d8_769x769_80k_cityscapes_20201226_083346-f326e06a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_769x769_80k_cityscapes/deeplabv3plus_r18-d8_769x769_80k_cityscapes-20201226_083346.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50-d8_4xb2-80k_cityscapes-769x769 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.83 + mIoU(ms+flip): 81.48 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes/deeplabv3plus_r50-d8_769x769_80k_cityscapes_20200606_210233-0e9dfdc4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes/deeplabv3plus_r50-d8_769x769_80k_cityscapes_20200606_210233.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101-d8_4xb2-80k_cityscapes-769x769 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.65 + mIoU(ms+flip): 81.47 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes/deeplabv3plus_r101-d8_769x769_80k_cityscapes_20220406_154720-dfcc0b68.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes/deeplabv3plus_r101-d8_769x769_80k_cityscapes_20220406_154720.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: ddeeplabv3plus_r101-d16-mg124_4xb2-40k_cityscapes-512x1024 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.09 + mIoU(ms+flip): 80.36 + Config: configs/deeplabv3plus/ddeeplabv3plus_r101-d16-mg124_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D16-MG124 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 5.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes_20200908_005644-cf9ce186.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes-20200908_005644.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101-d16-mg124_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.9 + mIoU(ms+flip): 81.33 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D16-MG124 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 9.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes_20200908_005644-ee6158e0.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes-20200908_005644.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r18b-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.87 + mIoU(ms+flip): 77.52 + Config: configs/deeplabv3plus/deeplabv3plus_r18b-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18b-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 2.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes_20201226_090828-e451abd9.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes-20201226_090828.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50b-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.28 + mIoU(ms+flip): 81.44 + Config: configs/deeplabv3plus/deeplabv3plus_r50b-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50b-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 7.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes_20201225_213645-a97e4e43.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes-20201225_213645.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101b-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.16 + mIoU(ms+flip): 81.41 + Config: configs/deeplabv3plus/deeplabv3plus_r101b-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101b-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 10.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes_20201226_190843-9c3c93a4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes-20201226_190843.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r18b-d8_4xb2-80k_cityscapes-769x769 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.36 + mIoU(ms+flip): 78.24 + Config: configs/deeplabv3plus/deeplabv3plus_r18b-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18b-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 2.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_769x769_80k_cityscapes/deeplabv3plus_r18b-d8_769x769_80k_cityscapes_20201226_151312-2c868aff.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_769x769_80k_cityscapes/deeplabv3plus_r18b-d8_769x769_80k_cityscapes-20201226_151312.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50b-d8_4xb2-80k_cityscapes-769x769 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.41 + mIoU(ms+flip): 80.56 + Config: configs/deeplabv3plus/deeplabv3plus_r50b-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50b-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 8.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_769x769_80k_cityscapes/deeplabv3plus_r50b-d8_769x769_80k_cityscapes_20201225_224655-8b596d1c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_769x769_80k_cityscapes/deeplabv3plus_r50b-d8_769x769_80k_cityscapes-20201225_224655.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101b-d8_4xb2-80k_cityscapes-769x769 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.88 + mIoU(ms+flip): 81.46 + Config: configs/deeplabv3plus/deeplabv3plus_r101b-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101b-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 12.3 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_769x769_80k_cityscapes/deeplabv3plus_r101b-d8_769x769_80k_cityscapes_20201226_205041-227cdf7c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_769x769_80k_cityscapes/deeplabv3plus_r101b-d8_769x769_80k_cityscapes-20201226_205041.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50-d8_4xb4-80k_ade20k-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.72 + mIoU(ms+flip): 43.75 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 10.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k/deeplabv3plus_r50-d8_512x512_80k_ade20k_20200614_185028-bf1400d8.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k/deeplabv3plus_r50-d8_512x512_80k_ade20k_20200614_185028.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101-d8_4xb4-160k_ade20k-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 44.6 + mIoU(ms+flip): 46.06 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 14.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k/deeplabv3plus_r101-d8_512x512_80k_ade20k_20200615_014139-d5730af7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k/deeplabv3plus_r101-d8_512x512_80k_ade20k_20200615_014139.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50-d8_4xb4-160k_ade20k-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.95 + mIoU(ms+flip): 44.93 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k/deeplabv3plus_r50-d8_512x512_160k_ade20k_20200615_124504-6135c7e0.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k/deeplabv3plus_r50-d8_512x512_160k_ade20k_20200615_124504.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101-d8_4xb4-160k_ade20k-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.47 + mIoU(ms+flip): 46.35 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232-38ed86bb.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50-d8_4xb4-20k_voc12aug-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 75.93 + mIoU(ms+flip): 77.5 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 7.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug/deeplabv3plus_r50-d8_512x512_20k_voc12aug_20200617_102323-aad58ef1.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug/deeplabv3plus_r50-d8_512x512_20k_voc12aug_20200617_102323.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101-d8_4xb4-20k_voc12aug-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.22 + mIoU(ms+flip): 78.59 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 11.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug/deeplabv3plus_r101-d8_512x512_20k_voc12aug_20200617_102345-c7ff3d56.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug/deeplabv3plus_r101-d8_512x512_20k_voc12aug_20200617_102345.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50-d8_4xb4-40k_voc12aug-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.81 + mIoU(ms+flip): 77.57 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug/deeplabv3plus_r50-d8_512x512_40k_voc12aug_20200613_161759-e1b43aa9.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug/deeplabv3plus_r50-d8_512x512_40k_voc12aug_20200613_161759.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101-d8_4xb4-40k_voc12aug-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 78.62 + mIoU(ms+flip): 79.53 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug/deeplabv3plus_r101-d8_512x512_40k_voc12aug_20200613_205333-faf03387.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug/deeplabv3plus_r101-d8_512x512_40k_voc12aug_20200613_205333.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50-d8_4xb4-40k_pascal-context-480x480 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Pascal Context + Metrics: + mIoU: 47.3 + mIoU(ms+flip): 48.47 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-40k_pascal-context-480x480.py + Metadata: + Training Data: Pascal Context + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context/deeplabv3plus_r101-d8_480x480_40k_pascal_context_20200911_165459-d3c8a29e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context/deeplabv3plus_r101-d8_480x480_40k_pascal_context-20200911_165459.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50-d8_4xb4-80k_pascal-context-480x480 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Pascal Context + Metrics: + mIoU: 47.23 + mIoU(ms+flip): 48.26 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_pascal-context-480x480.py + Metadata: + Training Data: Pascal Context + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context/deeplabv3plus_r101-d8_480x480_80k_pascal_context_20200911_155322-145d3ee8.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context/deeplabv3plus_r101-d8_480x480_80k_pascal_context-20200911_155322.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101-d8_4xb4-40k_pascal-context-59-480x480 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Pascal Context 59 + Metrics: + mIoU: 52.86 + mIoU(ms+flip): 54.54 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-40k_pascal-context-59-480x480.py + Metadata: + Training Data: Pascal Context 59 + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59_20210416_111233-ed937f15.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59-20210416_111233.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101-d8_4xb4-80k_pascal-context-59-480x480 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Pascal Context 59 + Metrics: + mIoU: 53.2 + mIoU(ms+flip): 54.67 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_pascal-context-59-480x480.py + Metadata: + Training Data: Pascal Context 59 + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59_20210416_111127-7ca0331d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59-20210416_111127.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r18-d8_4xb4-80k_loveda-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: LoveDA + Metrics: + mIoU: 50.28 + mIoU(ms+flip): 50.47 + Config: configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_loveda-512x512.py + Metadata: + Training Data: LoveDA + Batch Size: 16 + Architecture: + - R-18-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 1.93 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_loveda/deeplabv3plus_r18-d8_512x512_80k_loveda_20211104_132800-ce0fa0ca.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_loveda/deeplabv3plus_r18-d8_512x512_80k_loveda_20211104_132800.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50-d8_4xb4-80k_loveda-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: LoveDA + Metrics: + mIoU: 50.99 + mIoU(ms+flip): 50.65 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_loveda-512x512.py + Metadata: + Training Data: LoveDA + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 7.37 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_loveda/deeplabv3plus_r50-d8_512x512_80k_loveda_20211105_080442-f0720392.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_loveda/deeplabv3plus_r50-d8_512x512_80k_loveda_20211105_080442.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101-d8_4xb4-80k_loveda-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: LoveDA + Metrics: + mIoU: 51.47 + mIoU(ms+flip): 51.32 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_loveda-512x512.py + Metadata: + Training Data: LoveDA + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 10.84 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_loveda/deeplabv3plus_r101-d8_512x512_80k_loveda_20211105_110759-4c1f297e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_loveda/deeplabv3plus_r101-d8_512x512_80k_loveda_20211105_110759.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r18-d8_4xb4-80k_potsdam-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Potsdam + Metrics: + mIoU: 77.09 + mIoU(ms+flip): 78.44 + Config: configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_potsdam-512x512.py + Metadata: + Training Data: Potsdam + Batch Size: 16 + Architecture: + - R-18-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 1.91 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_potsdam/deeplabv3plus_r18-d8_512x512_80k_potsdam_20211219_020601-75fd5bc3.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_potsdam/deeplabv3plus_r18-d8_512x512_80k_potsdam_20211219_020601.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50-d8_4xb4-80k_potsdam-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Potsdam + Metrics: + mIoU: 78.33 + mIoU(ms+flip): 79.27 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_potsdam-512x512.py + Metadata: + Training Data: Potsdam + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 7.36 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_potsdam/deeplabv3plus_r50-d8_512x512_80k_potsdam_20211219_031508-7e7a2b24.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_potsdam/deeplabv3plus_r50-d8_512x512_80k_potsdam_20211219_031508.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101-d8_4xb4-80k_potsdam-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Potsdam + Metrics: + mIoU: 78.7 + mIoU(ms+flip): 79.47 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_potsdam-512x512.py + Metadata: + Training Data: Potsdam + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 10.83 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_potsdam/deeplabv3plus_r101-d8_512x512_80k_potsdam_20211219_031508-8b112708.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_potsdam/deeplabv3plus_r101-d8_512x512_80k_potsdam_20211219_031508.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r18-d8_4xb4-80k_vaihingen-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Vaihingen + Metrics: + mIoU: 72.5 + mIoU(ms+flip): 74.13 + Config: configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_vaihingen-512x512.py + Metadata: + Training Data: Vaihingen + Batch Size: 16 + Architecture: + - R-18-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 1.91 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen_20211231_230805-7626a263.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen_20211231_230805.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50-d8_4xb4-80k_vaihingen-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Vaihingen + Metrics: + mIoU: 73.97 + mIoU(ms+flip): 75.05 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_vaihingen-512x512.py + Metadata: + Training Data: Vaihingen + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 7.36 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen_20211231_230816-5040938d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen_20211231_230816.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r101-d8_4xb4-80k_vaihingen-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Vaihingen + Metrics: + mIoU: 73.06 + mIoU(ms+flip): 74.14 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb4-80k_vaihingen-512x512.py + Metadata: + Training Data: Vaihingen + Batch Size: 16 + Architecture: + - R-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 10.83 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen_20211231_230816-8a095afa.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen_20211231_230816.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r18-d8_4xb4-80k_isaid-896x896 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: iSAID + Metrics: + mIoU: 61.35 + mIoU(ms+flip): 62.61 + Config: configs/deeplabv3plus/deeplabv3plus_r18-d8_4xb4-80k_isaid-896x896.py + Metadata: + Training Data: iSAID + Batch Size: 16 + Architecture: + - R-18-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 6.19 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid_20220110_180526-7059991d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid_20220110_180526.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50-d8_4xb4-80k_isaid-896x896 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: iSAID + Metrics: + mIoU: 67.06 + mIoU(ms+flip): 68.02 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb4-80k_isaid-896x896.py + Metadata: + Training Data: iSAID + Batch Size: 16 + Architecture: + - R-50-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 21.45 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid_20220110_180526-598be439.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid_20220110_180526.log.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch +- Name: deeplabv3plus_r50-d8_4xb2-300k_mapillay_v1_65-1280x1280 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Mapillary Vistas v1.2 + Metrics: + mIoU: 47.35 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-300k_mapillay_v1_65-1280x1280.py + Metadata: + Training Data: Mapillary Vistas v1.2 + Batch Size: 8 + Architecture: + - R-50-D8 + - DeepLabV3+ + Training Resources: 4x A100 GPUS + Memory (GB): 24.04 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-300k_mapillay_v1_65-1280x1280/deeplabv3plus_r50-d8_4xb2-300k_mapillay_v1_65-1280x1280_20230301_110504-655f8e43.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-300k_mapillay_v1_65-1280x1280/deeplabv3plus_r50-d8_4xb2-300k_mapillay_v1_65-1280x1280_20230301_110504.json + Paper: + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + URL: https://arxiv.org/abs/1802.02611 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Framework: PyTorch diff --git a/configs/dmnet/README.md b/configs/dmnet/README.md index 535740ddd3..b0cf94455e 100644 --- a/configs/dmnet/README.md +++ b/configs/dmnet/README.md @@ -1,6 +1,6 @@ # DMNet -[Dynamic Multi-scale Filters for Semantic Segmentation](https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf) +> [Dynamic Multi-scale Filters for Semantic Segmentation](https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf) ## Introduction @@ -22,6 +22,30 @@ Multi-scale representation provides an effective way toaddress scale variation o +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DMNet | R-50-D8 | 512x1024 | 40000 | 7.0 | 3.66 | V100 | 77.78 | 79.14 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dmnet/dmnet_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes/dmnet_r50-d8_512x1024_40k_cityscapes_20201215_042326-615373cf.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes/dmnet_r50-d8_512x1024_40k_cityscapes-20201215_042326.log.json) | +| DMNet | R-101-D8 | 512x1024 | 40000 | 10.6 | 2.54 | V100 | 78.37 | 79.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dmnet/dmnet_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes/dmnet_r101-d8_512x1024_40k_cityscapes_20201215_043100-8291e976.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes/dmnet_r101-d8_512x1024_40k_cityscapes-20201215_043100.log.json) | +| DMNet | R-50-D8 | 769x769 | 40000 | 7.9 | 1.57 | V100 | 78.49 | 80.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dmnet/dmnet_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_40k_cityscapes/dmnet_r50-d8_769x769_40k_cityscapes_20201215_093706-e7f0e23e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_40k_cityscapes/dmnet_r50-d8_769x769_40k_cityscapes-20201215_093706.log.json) | +| DMNet | R-101-D8 | 769x769 | 40000 | 12.0 | 1.01 | V100 | 77.62 | 78.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dmnet/dmnet_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_40k_cityscapes/dmnet_r101-d8_769x769_40k_cityscapes_20201215_081348-a74261f6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_40k_cityscapes/dmnet_r101-d8_769x769_40k_cityscapes-20201215_081348.log.json) | +| DMNet | R-50-D8 | 512x1024 | 80000 | - | - | V100 | 79.07 | 80.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dmnet/dmnet_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes/dmnet_r50-d8_512x1024_80k_cityscapes_20201215_053728-3c8893b9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes/dmnet_r50-d8_512x1024_80k_cityscapes-20201215_053728.log.json) | +| DMNet | R-101-D8 | 512x1024 | 80000 | - | - | V100 | 79.64 | 80.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dmnet/dmnet_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes/dmnet_r101-d8_512x1024_80k_cityscapes_20201215_031718-fa081cb8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes/dmnet_r101-d8_512x1024_80k_cityscapes-20201215_031718.log.json) | +| DMNet | R-50-D8 | 769x769 | 80000 | - | - | V100 | 79.22 | 80.55 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dmnet/dmnet_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_80k_cityscapes/dmnet_r50-d8_769x769_80k_cityscapes_20201215_034006-6060840e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_80k_cityscapes/dmnet_r50-d8_769x769_80k_cityscapes-20201215_034006.log.json) | +| DMNet | R-101-D8 | 769x769 | 80000 | - | - | V100 | 79.19 | 80.65 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dmnet/dmnet_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_80k_cityscapes/dmnet_r101-d8_769x769_80k_cityscapes_20201215_082810-7f0de59a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_80k_cityscapes/dmnet_r101-d8_769x769_80k_cityscapes-20201215_082810.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DMNet | R-50-D8 | 512x512 | 80000 | 9.4 | 20.95 | V100 | 42.37 | 43.62 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dmnet/dmnet_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_80k_ade20k/dmnet_r50-d8_512x512_80k_ade20k_20201215_144744-f89092a6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_80k_ade20k/dmnet_r50-d8_512x512_80k_ade20k-20201215_144744.log.json) | +| DMNet | R-101-D8 | 512x512 | 80000 | 13.0 | 13.88 | V100 | 45.34 | 46.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dmnet/dmnet_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_80k_ade20k/dmnet_r101-d8_512x512_80k_ade20k_20201215_104812-bfa45311.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_80k_ade20k/dmnet_r101-d8_512x512_80k_ade20k-20201215_104812.log.json) | +| DMNet | R-50-D8 | 512x512 | 160000 | - | - | V100 | 43.15 | 44.17 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dmnet/dmnet_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_160k_ade20k/dmnet_r50-d8_512x512_160k_ade20k_20201215_115313-025ab3f9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_160k_ade20k/dmnet_r50-d8_512x512_160k_ade20k-20201215_115313.log.json) | +| DMNet | R-101-D8 | 512x512 | 160000 | - | - | V100 | 45.42 | 46.76 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dmnet/dmnet_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_160k_ade20k/dmnet_r101-d8_512x512_160k_ade20k_20201215_111145-a0bc02ef.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_160k_ade20k/dmnet_r101-d8_512x512_160k_ade20k-20201215_111145.log.json) | + ## Citation ```bibtex @@ -33,27 +57,3 @@ month = {October}, year = {2019} } ``` - -## Results and models - -### Cityscapes - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| DMNet | R-50-D8 | 512x1024 | 40000 | 7.0 | 3.66 | 77.78 | 79.14 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/dmnet/dmnet_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes/dmnet_r50-d8_512x1024_40k_cityscapes_20201215_042326-615373cf.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes/dmnet_r50-d8_512x1024_40k_cityscapes-20201215_042326.log.json) | -| DMNet | R-101-D8 | 512x1024 | 40000 | 10.6 | 2.54 | 78.37 | 79.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/dmnet/dmnet_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes/dmnet_r101-d8_512x1024_40k_cityscapes_20201215_043100-8291e976.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes/dmnet_r101-d8_512x1024_40k_cityscapes-20201215_043100.log.json) | -| DMNet | R-50-D8 | 769x769 | 40000 | 7.9 | 1.57 | 78.49 | 80.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/dmnet/dmnet_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_40k_cityscapes/dmnet_r50-d8_769x769_40k_cityscapes_20201215_093706-e7f0e23e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_40k_cityscapes/dmnet_r50-d8_769x769_40k_cityscapes-20201215_093706.log.json) | -| DMNet | R-101-D8 | 769x769 | 40000 | 12.0 | 1.01 | 77.62 | 78.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/dmnet/dmnet_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_40k_cityscapes/dmnet_r101-d8_769x769_40k_cityscapes_20201215_081348-a74261f6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_40k_cityscapes/dmnet_r101-d8_769x769_40k_cityscapes-20201215_081348.log.json) | -| DMNet | R-50-D8 | 512x1024 | 80000 | - | - | 79.07 | 80.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/dmnet/dmnet_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes/dmnet_r50-d8_512x1024_80k_cityscapes_20201215_053728-3c8893b9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes/dmnet_r50-d8_512x1024_80k_cityscapes-20201215_053728.log.json) | -| DMNet | R-101-D8 | 512x1024 | 80000 | - | - | 79.64 | 80.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/dmnet/dmnet_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes/dmnet_r101-d8_512x1024_80k_cityscapes_20201215_031718-fa081cb8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes/dmnet_r101-d8_512x1024_80k_cityscapes-20201215_031718.log.json) | -| DMNet | R-50-D8 | 769x769 | 80000 | - | - | 79.22 | 80.55 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/dmnet/dmnet_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_80k_cityscapes/dmnet_r50-d8_769x769_80k_cityscapes_20201215_034006-6060840e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_80k_cityscapes/dmnet_r50-d8_769x769_80k_cityscapes-20201215_034006.log.json) | -| DMNet | R-101-D8 | 769x769 | 80000 | - | - | 79.19 | 80.65 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/dmnet/dmnet_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_80k_cityscapes/dmnet_r101-d8_769x769_80k_cityscapes_20201215_082810-7f0de59a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_80k_cityscapes/dmnet_r101-d8_769x769_80k_cityscapes-20201215_082810.log.json) | - -### ADE20K - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| DMNet | R-50-D8 | 512x512 | 80000 | 9.4 | 20.95 | 42.37 | 43.62 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/dmnet/dmnet_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_80k_ade20k/dmnet_r50-d8_512x512_80k_ade20k_20201215_144744-f89092a6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_80k_ade20k/dmnet_r50-d8_512x512_80k_ade20k-20201215_144744.log.json) | -| DMNet | R-101-D8 | 512x512 | 80000 | 13.0 | 13.88 | 45.34 | 46.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/dmnet/dmnet_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_80k_ade20k/dmnet_r101-d8_512x512_80k_ade20k_20201215_104812-bfa45311.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_80k_ade20k/dmnet_r101-d8_512x512_80k_ade20k-20201215_104812.log.json) | -| DMNet | R-50-D8 | 512x512 | 160000 | - | - | 43.15 | 44.17 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/dmnet/dmnet_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_160k_ade20k/dmnet_r50-d8_512x512_160k_ade20k_20201215_115313-025ab3f9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_160k_ade20k/dmnet_r50-d8_512x512_160k_ade20k-20201215_115313.log.json) | -| DMNet | R-101-D8 | 512x512 | 160000 | - | - | 45.42 | 46.76 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/dmnet/dmnet_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_160k_ade20k/dmnet_r101-d8_512x512_160k_ade20k_20201215_111145-a0bc02ef.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_160k_ade20k/dmnet_r101-d8_512x512_160k_ade20k-20201215_111145.log.json) | diff --git a/configs/dmnet/dmnet.yml b/configs/dmnet/dmnet.yml deleted file mode 100644 index dfb80ba7e1..0000000000 --- a/configs/dmnet/dmnet.yml +++ /dev/null @@ -1,232 +0,0 @@ -Collections: -- Name: DMNet - Metadata: - Training Data: - - Cityscapes - - ADE20K - Paper: - URL: https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf - Title: Dynamic Multi-scale Filters for Semantic Segmentation - README: configs/dmnet/README.md - Code: - URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dm_head.py#L93 - Version: v0.17.0 - Converted From: - Code: https://github.com/Junjun2016/DMNet -Models: -- Name: dmnet_r50-d8_4xb2-40k_cityscapes-512x1024 - In Collection: DMNet - Metadata: - backbone: R-50-D8 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 273.22 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 7.0 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 77.78 - mIoU(ms+flip): 79.14 - Config: configs/dmnet/dmnet_r50-d8_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes/dmnet_r50-d8_512x1024_40k_cityscapes_20201215_042326-615373cf.pth -- Name: dmnet_r101-d8_4xb2-40k_cityscapes-512x1024 - In Collection: DMNet - Metadata: - backbone: R-101-D8 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 393.7 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 10.6 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.37 - mIoU(ms+flip): 79.72 - Config: configs/dmnet/dmnet_r101-d8_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes/dmnet_r101-d8_512x1024_40k_cityscapes_20201215_043100-8291e976.pth -- Name: dmnet_r50-d8_4xb2-40k_cityscapes-769x769 - In Collection: DMNet - Metadata: - backbone: R-50-D8 - crop size: (769,769) - lr schd: 40000 - inference time (ms/im): - - value: 636.94 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 7.9 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.49 - mIoU(ms+flip): 80.27 - Config: configs/dmnet/dmnet_r50-d8_4xb2-40k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_40k_cityscapes/dmnet_r50-d8_769x769_40k_cityscapes_20201215_093706-e7f0e23e.pth -- Name: dmnet_r101-d8_4xb2-40k_cityscapes-769x769 - In Collection: DMNet - Metadata: - backbone: R-101-D8 - crop size: (769,769) - lr schd: 40000 - inference time (ms/im): - - value: 990.1 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 12.0 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 77.62 - mIoU(ms+flip): 78.94 - Config: configs/dmnet/dmnet_r101-d8_4xb2-40k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_40k_cityscapes/dmnet_r101-d8_769x769_40k_cityscapes_20201215_081348-a74261f6.pth -- Name: dmnet_r50-d8_4xb2-80k_cityscapes-512x1024 - In Collection: DMNet - Metadata: - backbone: R-50-D8 - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.07 - mIoU(ms+flip): 80.22 - Config: configs/dmnet/dmnet_r50-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes/dmnet_r50-d8_512x1024_80k_cityscapes_20201215_053728-3c8893b9.pth -- Name: dmnet_r101-d8_4xb2-80k_cityscapes-512x1024 - In Collection: DMNet - Metadata: - backbone: R-101-D8 - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.64 - mIoU(ms+flip): 80.67 - Config: configs/dmnet/dmnet_r101-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes/dmnet_r101-d8_512x1024_80k_cityscapes_20201215_031718-fa081cb8.pth -- Name: dmnet_r50-d8_4xb2-80k_cityscapes-769x769 - In Collection: DMNet - Metadata: - backbone: R-50-D8 - crop size: (769,769) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.22 - mIoU(ms+flip): 80.55 - Config: configs/dmnet/dmnet_r50-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_80k_cityscapes/dmnet_r50-d8_769x769_80k_cityscapes_20201215_034006-6060840e.pth -- Name: dmnet_r101-d8_4xb2-80k_cityscapes-769x769 - In Collection: DMNet - Metadata: - backbone: R-101-D8 - crop size: (769,769) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.19 - mIoU(ms+flip): 80.65 - Config: configs/dmnet/dmnet_r101-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_80k_cityscapes/dmnet_r101-d8_769x769_80k_cityscapes_20201215_082810-7f0de59a.pth -- Name: dmnet_r50-d8_4xb4-80k_ade20k-512x512 - In Collection: DMNet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 47.73 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 9.4 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 42.37 - mIoU(ms+flip): 43.62 - Config: configs/dmnet/dmnet_r50-d8_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_80k_ade20k/dmnet_r50-d8_512x512_80k_ade20k_20201215_144744-f89092a6.pth -- Name: dmnet_r101-d8_4xb4-80k_ade20k-512x512 - In Collection: DMNet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 72.05 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 13.0 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 45.34 - mIoU(ms+flip): 46.13 - Config: configs/dmnet/dmnet_r101-d8_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_80k_ade20k/dmnet_r101-d8_512x512_80k_ade20k_20201215_104812-bfa45311.pth -- Name: dmnet_r50-d8_4xb4-160k_ade20k-512x512 - In Collection: DMNet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 43.15 - mIoU(ms+flip): 44.17 - Config: configs/dmnet/dmnet_r50-d8_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_160k_ade20k/dmnet_r50-d8_512x512_160k_ade20k_20201215_115313-025ab3f9.pth -- Name: dmnet_r101-d8_4xb4-160k_ade20k-512x512 - In Collection: DMNet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 45.42 - mIoU(ms+flip): 46.76 - Config: configs/dmnet/dmnet_r101-d8_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_160k_ade20k/dmnet_r101-d8_512x512_160k_ade20k_20201215_111145-a0bc02ef.pth diff --git a/configs/dmnet/metafile.yaml b/configs/dmnet/metafile.yaml new file mode 100644 index 0000000000..7f5e536753 --- /dev/null +++ b/configs/dmnet/metafile.yaml @@ -0,0 +1,296 @@ +Collections: +- Name: DMNet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + Paper: + Title: Dynamic Multi-scale Filters for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf + README: configs/dmnet/README.md + Frameworks: + - PyTorch +Models: +- Name: dmnet_r50-d8_4xb2-40k_cityscapes-512x1024 + In Collection: DMNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.78 + mIoU(ms+flip): 79.14 + Config: configs/dmnet/dmnet_r50-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DMNet + Training Resources: 4x V100 GPUS + Memory (GB): 7.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes/dmnet_r50-d8_512x1024_40k_cityscapes_20201215_042326-615373cf.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes/dmnet_r50-d8_512x1024_40k_cityscapes-20201215_042326.log.json + Paper: + Title: Dynamic Multi-scale Filters for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dm_head.py#L93 + Framework: PyTorch +- Name: dmnet_r101-d8_4xb2-40k_cityscapes-512x1024 + In Collection: DMNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.37 + mIoU(ms+flip): 79.72 + Config: configs/dmnet/dmnet_r101-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DMNet + Training Resources: 4x V100 GPUS + Memory (GB): 10.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes/dmnet_r101-d8_512x1024_40k_cityscapes_20201215_043100-8291e976.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes/dmnet_r101-d8_512x1024_40k_cityscapes-20201215_043100.log.json + Paper: + Title: Dynamic Multi-scale Filters for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dm_head.py#L93 + Framework: PyTorch +- Name: dmnet_r50-d8_4xb2-40k_cityscapes-769x769 + In Collection: DMNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.49 + mIoU(ms+flip): 80.27 + Config: configs/dmnet/dmnet_r50-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DMNet + Training Resources: 4x V100 GPUS + Memory (GB): 7.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_40k_cityscapes/dmnet_r50-d8_769x769_40k_cityscapes_20201215_093706-e7f0e23e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_40k_cityscapes/dmnet_r50-d8_769x769_40k_cityscapes-20201215_093706.log.json + Paper: + Title: Dynamic Multi-scale Filters for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dm_head.py#L93 + Framework: PyTorch +- Name: dmnet_r101-d8_4xb2-40k_cityscapes-769x769 + In Collection: DMNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.62 + mIoU(ms+flip): 78.94 + Config: configs/dmnet/dmnet_r101-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DMNet + Training Resources: 4x V100 GPUS + Memory (GB): 12.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_40k_cityscapes/dmnet_r101-d8_769x769_40k_cityscapes_20201215_081348-a74261f6.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_40k_cityscapes/dmnet_r101-d8_769x769_40k_cityscapes-20201215_081348.log.json + Paper: + Title: Dynamic Multi-scale Filters for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dm_head.py#L93 + Framework: PyTorch +- Name: dmnet_r50-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DMNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.07 + mIoU(ms+flip): 80.22 + Config: configs/dmnet/dmnet_r50-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DMNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes/dmnet_r50-d8_512x1024_80k_cityscapes_20201215_053728-3c8893b9.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes/dmnet_r50-d8_512x1024_80k_cityscapes-20201215_053728.log.json + Paper: + Title: Dynamic Multi-scale Filters for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dm_head.py#L93 + Framework: PyTorch +- Name: dmnet_r101-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DMNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.64 + mIoU(ms+flip): 80.67 + Config: configs/dmnet/dmnet_r101-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DMNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes/dmnet_r101-d8_512x1024_80k_cityscapes_20201215_031718-fa081cb8.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes/dmnet_r101-d8_512x1024_80k_cityscapes-20201215_031718.log.json + Paper: + Title: Dynamic Multi-scale Filters for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dm_head.py#L93 + Framework: PyTorch +- Name: dmnet_r50-d8_4xb2-80k_cityscapes-769x769 + In Collection: DMNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.22 + mIoU(ms+flip): 80.55 + Config: configs/dmnet/dmnet_r50-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DMNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_80k_cityscapes/dmnet_r50-d8_769x769_80k_cityscapes_20201215_034006-6060840e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_80k_cityscapes/dmnet_r50-d8_769x769_80k_cityscapes-20201215_034006.log.json + Paper: + Title: Dynamic Multi-scale Filters for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dm_head.py#L93 + Framework: PyTorch +- Name: dmnet_r101-d8_4xb2-80k_cityscapes-769x769 + In Collection: DMNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.19 + mIoU(ms+flip): 80.65 + Config: configs/dmnet/dmnet_r101-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DMNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_80k_cityscapes/dmnet_r101-d8_769x769_80k_cityscapes_20201215_082810-7f0de59a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_80k_cityscapes/dmnet_r101-d8_769x769_80k_cityscapes-20201215_082810.log.json + Paper: + Title: Dynamic Multi-scale Filters for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dm_head.py#L93 + Framework: PyTorch +- Name: dmnet_r50-d8_4xb4-80k_ade20k-512x512 + In Collection: DMNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.37 + mIoU(ms+flip): 43.62 + Config: configs/dmnet/dmnet_r50-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - DMNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_80k_ade20k/dmnet_r50-d8_512x512_80k_ade20k_20201215_144744-f89092a6.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_80k_ade20k/dmnet_r50-d8_512x512_80k_ade20k-20201215_144744.log.json + Paper: + Title: Dynamic Multi-scale Filters for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dm_head.py#L93 + Framework: PyTorch +- Name: dmnet_r101-d8_4xb4-80k_ade20k-512x512 + In Collection: DMNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.34 + mIoU(ms+flip): 46.13 + Config: configs/dmnet/dmnet_r101-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - DMNet + Training Resources: 4x V100 GPUS + Memory (GB): 13.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_80k_ade20k/dmnet_r101-d8_512x512_80k_ade20k_20201215_104812-bfa45311.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_80k_ade20k/dmnet_r101-d8_512x512_80k_ade20k-20201215_104812.log.json + Paper: + Title: Dynamic Multi-scale Filters for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dm_head.py#L93 + Framework: PyTorch +- Name: dmnet_r50-d8_4xb4-160k_ade20k-512x512 + In Collection: DMNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.15 + mIoU(ms+flip): 44.17 + Config: configs/dmnet/dmnet_r50-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - DMNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_160k_ade20k/dmnet_r50-d8_512x512_160k_ade20k_20201215_115313-025ab3f9.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_160k_ade20k/dmnet_r50-d8_512x512_160k_ade20k-20201215_115313.log.json + Paper: + Title: Dynamic Multi-scale Filters for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dm_head.py#L93 + Framework: PyTorch +- Name: dmnet_r101-d8_4xb4-160k_ade20k-512x512 + In Collection: DMNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.42 + mIoU(ms+flip): 46.76 + Config: configs/dmnet/dmnet_r101-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - DMNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_160k_ade20k/dmnet_r101-d8_512x512_160k_ade20k_20201215_111145-a0bc02ef.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_160k_ade20k/dmnet_r101-d8_512x512_160k_ade20k-20201215_111145.log.json + Paper: + Title: Dynamic Multi-scale Filters for Semantic Segmentation + URL: https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dm_head.py#L93 + Framework: PyTorch diff --git a/configs/dnlnet/README.md b/configs/dnlnet/README.md index ab24549ed6..6835ffd1ed 100644 --- a/configs/dnlnet/README.md +++ b/configs/dnlnet/README.md @@ -1,6 +1,6 @@ # DNLNet -[Disentangled Non-Local Neural Networks](https://arxiv.org/abs/2006.06668) +> [Disentangled Non-Local Neural Networks](https://arxiv.org/abs/2006.06668) ## Introduction @@ -22,7 +22,31 @@ The non-local block is a popular module for strengthening the context modeling a -## Citation +## Results and models (in progress) + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------: | -------------- | ------ | ----: | ------------- | --------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DNLNet | R-50-D8 | 512x1024 | 40000 | 7.3 | 2.56 | V100 | 78.61 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dnlnet/dnl_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes/dnl_r50-d8_512x1024_40k_cityscapes_20200904_233629-53d4ea93.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes/dnl_r50-d8_512x1024_40k_cityscapes-20200904_233629.log.json) | +| DNLNet | R-101-D8 | 512x1024 | 40000 | 10.9 | 1.96 | V100 | 78.31 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dnlnet/dnl_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_40k_cityscapes/dnl_r101-d8_512x1024_40k_cityscapes_20200904_233629-9928ffef.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_40k_cityscapes/dnl_r101-d8_512x1024_40k_cityscapes-20200904_233629.log.json) | +| DNLNet | R-50-D8 | 769x769 | 40000 | 9.2 | 1.50 | V100 | 78.44 | 80.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dnlnet/dnl_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_40k_cityscapes/dnl_r50-d8_769x769_40k_cityscapes_20200820_232206-0f283785.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_40k_cityscapes/dnl_r50-d8_769x769_40k_cityscapes-20200820_232206.log.json) | +| DNLNet | R-101-D8 | 769x769 | 40000 | 12.6 | 1.02 | V100 | 76.39 | 77.77 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dnlnet/dnl_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_40k_cityscapes/dnl_r101-d8_769x769_40k_cityscapes_20200820_171256-76c596df.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_40k_cityscapes/dnl_r101-d8_769x769_40k_cityscapes-20200820_171256.log.json) | +| DNLNet | R-50-D8 | 512x1024 | 80000 | - | - | V100 | 79.33 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dnlnet/dnl_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes/dnl_r50-d8_512x1024_80k_cityscapes_20200904_233629-58b2f778.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes/dnl_r50-d8_512x1024_80k_cityscapes-20200904_233629.log.json) | +| DNLNet | R-101-D8 | 512x1024 | 80000 | - | - | V100 | 80.41 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dnlnet/dnl_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_80k_cityscapes/dnl_r101-d8_512x1024_80k_cityscapes_20200904_233629-758e2dd4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_80k_cityscapes/dnl_r101-d8_512x1024_80k_cityscapes-20200904_233629.log.json) | +| DNLNet | R-50-D8 | 769x769 | 80000 | - | - | V100 | 79.36 | 80.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dnlnet/dnl_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_80k_cityscapes/dnl_r50-d8_769x769_80k_cityscapes_20200820_011925-366bc4c7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_80k_cityscapes/dnl_r50-d8_769x769_80k_cityscapes-20200820_011925.log.json) | +| DNLNet | R-101-D8 | 769x769 | 80000 | - | - | V100 | 79.41 | 80.68 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dnlnet/dnl_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_80k_cityscapes/dnl_r101-d8_769x769_80k_cityscapes_20200821_051111-95ff84ab.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_80k_cityscapes/dnl_r101-d8_769x769_80k_cityscapes-20200821_051111.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------: | -------------- | ------ | ----: | ------------- | ----------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DNLNet | R-50-D8 | 512x512 | 80000 | 8.8 | 20.66 | V100 | 41.76 | 42.99 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dnlnet/dnl_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_80k_ade20k/dnl_r50-d8_512x512_80k_ade20k_20200826_183354-1cf6e0c1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_80k_ade20k/dnl_r50-d8_512x512_80k_ade20k-20200826_183354.log.json) | +| DNLNet | R-101-D8 | 512x512 | 80000 | 12.8 | 12.54 | V100 | 43.76 | 44.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dnlnet/dnl_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_80k_ade20k/dnl_r101-d8_512x512_80k_ade20k_20200826_183354-d820d6ea.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_80k_ade20k/dnl_r101-d8_512x512_80k_ade20k-20200826_183354.log.json) | +| DNLNet | R-50-D8 | 512x512 | 160000 | - | - | V100 | 41.87 | 43.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dnlnet/dnl_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_160k_ade20k/dnl_r50-d8_512x512_160k_ade20k_20200826_183350-37837798.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_160k_ade20k/dnl_r50-d8_512x512_160k_ade20k-20200826_183350.log.json) | +| DNLNet | R-101-D8 | 512x512 | 160000 | - | - | V100 | 44.25 | 45.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dnlnet/dnl_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_160k_ade20k/dnl_r101-d8_512x512_160k_ade20k_20200826_183350-ed522c61.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_160k_ade20k/dnl_r101-d8_512x512_160k_ade20k-20200826_183350.log.json) | + +## Notes This example is to reproduce ["Disentangled Non-Local Neural Networks"](https://arxiv.org/abs/2006.06668) for semantic segmentation. It is still in progress. @@ -36,27 +60,3 @@ This example is to reproduce ["Disentangled Non-Local Neural Networks"](https:// booktitle={ECCV} } ``` - -## Results and models (in progress) - -### Cityscapes - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| DNLNet | R-50-D8 | 512x1024 | 40000 | 7.3 | 2.56 | 78.61 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/dnlnet/dnl_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes/dnl_r50-d8_512x1024_40k_cityscapes_20200904_233629-53d4ea93.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes/dnl_r50-d8_512x1024_40k_cityscapes-20200904_233629.log.json) | -| DNLNet | R-101-D8 | 512x1024 | 40000 | 10.9 | 1.96 | 78.31 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/dnlnet/dnl_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_40k_cityscapes/dnl_r101-d8_512x1024_40k_cityscapes_20200904_233629-9928ffef.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_40k_cityscapes/dnl_r101-d8_512x1024_40k_cityscapes-20200904_233629.log.json) | -| DNLNet | R-50-D8 | 769x769 | 40000 | 9.2 | 1.50 | 78.44 | 80.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/dnlnet/dnl_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_40k_cityscapes/dnl_r50-d8_769x769_40k_cityscapes_20200820_232206-0f283785.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_40k_cityscapes/dnl_r50-d8_769x769_40k_cityscapes-20200820_232206.log.json) | -| DNLNet | R-101-D8 | 769x769 | 40000 | 12.6 | 1.02 | 76.39 | 77.77 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/dnlnet/dnl_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_40k_cityscapes/dnl_r101-d8_769x769_40k_cityscapes_20200820_171256-76c596df.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_40k_cityscapes/dnl_r101-d8_769x769_40k_cityscapes-20200820_171256.log.json) | -| DNLNet | R-50-D8 | 512x1024 | 80000 | - | - | 79.33 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/dnlnet/dnl_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes/dnl_r50-d8_512x1024_80k_cityscapes_20200904_233629-58b2f778.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes/dnl_r50-d8_512x1024_80k_cityscapes-20200904_233629.log.json) | -| DNLNet | R-101-D8 | 512x1024 | 80000 | - | - | 80.41 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/dnlnet/dnl_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_80k_cityscapes/dnl_r101-d8_512x1024_80k_cityscapes_20200904_233629-758e2dd4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_80k_cityscapes/dnl_r101-d8_512x1024_80k_cityscapes-20200904_233629.log.json) | -| DNLNet | R-50-D8 | 769x769 | 80000 | - | - | 79.36 | 80.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/dnlnet/dnl_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_80k_cityscapes/dnl_r50-d8_769x769_80k_cityscapes_20200820_011925-366bc4c7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_80k_cityscapes/dnl_r50-d8_769x769_80k_cityscapes-20200820_011925.log.json) | -| DNLNet | R-101-D8 | 769x769 | 80000 | - | - | 79.41 | 80.68 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/dnlnet/dnl_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_80k_cityscapes/dnl_r101-d8_769x769_80k_cityscapes_20200821_051111-95ff84ab.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_80k_cityscapes/dnl_r101-d8_769x769_80k_cityscapes-20200821_051111.log.json) | - -### ADE20K - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | -------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| DNLNet | R-50-D8 | 512x512 | 80000 | 8.8 | 20.66 | 41.76 | 42.99 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/dnlnet/dnl_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_80k_ade20k/dnl_r50-d8_512x512_80k_ade20k_20200826_183354-1cf6e0c1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_80k_ade20k/dnl_r50-d8_512x512_80k_ade20k-20200826_183354.log.json) | -| DNLNet | R-101-D8 | 512x512 | 80000 | 12.8 | 12.54 | 43.76 | 44.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/dnlnet/dnl_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_80k_ade20k/dnl_r101-d8_512x512_80k_ade20k_20200826_183354-d820d6ea.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_80k_ade20k/dnl_r101-d8_512x512_80k_ade20k-20200826_183354.log.json) | -| DNLNet | R-50-D8 | 512x512 | 160000 | - | - | 41.87 | 43.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/dnlnet/dnl_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_160k_ade20k/dnl_r50-d8_512x512_160k_ade20k_20200826_183350-37837798.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_160k_ade20k/dnl_r50-d8_512x512_160k_ade20k-20200826_183350.log.json) | -| DNLNet | R-101-D8 | 512x512 | 160000 | - | - | 44.25 | 45.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/dnlnet/dnl_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_160k_ade20k/dnl_r101-d8_512x512_160k_ade20k_20200826_183350-ed522c61.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_160k_ade20k/dnl_r101-d8_512x512_160k_ade20k-20200826_183350.log.json) | diff --git a/configs/dnlnet/dnlnet.yml b/configs/dnlnet/dnlnet.yml deleted file mode 100644 index ae65dbbaca..0000000000 --- a/configs/dnlnet/dnlnet.yml +++ /dev/null @@ -1,228 +0,0 @@ -Collections: -- Name: DNLNet - Metadata: - Training Data: - - Cityscapes - - ADE20K - Paper: - URL: https://arxiv.org/abs/2006.06668 - Title: Disentangled Non-Local Neural Networks - README: configs/dnlnet/README.md - Code: - URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dnl_head.py#L88 - Version: v0.17.0 - Converted From: - Code: https://github.com/yinmh17/DNL-Semantic-Segmentation -Models: -- Name: dnl_r50-d8_4xb2-40k_cityscapes-512x1024 - In Collection: DNLNet - Metadata: - backbone: R-50-D8 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 390.62 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 7.3 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.61 - Config: configs/dnlnet/dnl_r50-d8_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes/dnl_r50-d8_512x1024_40k_cityscapes_20200904_233629-53d4ea93.pth -- Name: dnl_r101-d8_4xb2-40k_cityscapes-512x1024 - In Collection: DNLNet - Metadata: - backbone: R-101-D8 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 510.2 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 10.9 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.31 - Config: configs/dnlnet/dnl_r101-d8_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_40k_cityscapes/dnl_r101-d8_512x1024_40k_cityscapes_20200904_233629-9928ffef.pth -- Name: dnl_r50-d8_4xb2-40k_cityscapes-769x769 - In Collection: DNLNet - Metadata: - backbone: R-50-D8 - crop size: (769,769) - lr schd: 40000 - inference time (ms/im): - - value: 666.67 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 9.2 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.44 - mIoU(ms+flip): 80.27 - Config: configs/dnlnet/dnl_r50-d8_4xb2-40k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_40k_cityscapes/dnl_r50-d8_769x769_40k_cityscapes_20200820_232206-0f283785.pth -- Name: dnl_r101-d8_4xb2-40k_cityscapes-769x769 - In Collection: DNLNet - Metadata: - backbone: R-101-D8 - crop size: (769,769) - lr schd: 40000 - inference time (ms/im): - - value: 980.39 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 12.6 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 76.39 - mIoU(ms+flip): 77.77 - Config: configs/dnlnet/dnl_r101-d8_4xb2-40k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_40k_cityscapes/dnl_r101-d8_769x769_40k_cityscapes_20200820_171256-76c596df.pth -- Name: dnl_r50-d8_4xb2-80k_cityscapes-512x1024 - In Collection: DNLNet - Metadata: - backbone: R-50-D8 - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.33 - Config: configs/dnlnet/dnl_r50-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes/dnl_r50-d8_512x1024_80k_cityscapes_20200904_233629-58b2f778.pth -- Name: dnl_r101-d8_4xb2-80k_cityscapes-512x1024 - In Collection: DNLNet - Metadata: - backbone: R-101-D8 - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 80.41 - Config: configs/dnlnet/dnl_r101-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_80k_cityscapes/dnl_r101-d8_512x1024_80k_cityscapes_20200904_233629-758e2dd4.pth -- Name: dnl_r50-d8_4xb2-80k_cityscapes-769x769 - In Collection: DNLNet - Metadata: - backbone: R-50-D8 - crop size: (769,769) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.36 - mIoU(ms+flip): 80.7 - Config: configs/dnlnet/dnl_r50-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_80k_cityscapes/dnl_r50-d8_769x769_80k_cityscapes_20200820_011925-366bc4c7.pth -- Name: dnl_r101-d8_4xb2-80k_cityscapes-769x769 - In Collection: DNLNet - Metadata: - backbone: R-101-D8 - crop size: (769,769) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.41 - mIoU(ms+flip): 80.68 - Config: configs/dnlnet/dnl_r101-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_80k_cityscapes/dnl_r101-d8_769x769_80k_cityscapes_20200821_051111-95ff84ab.pth -- Name: dnl_r50-d8_4xb4-80k_ade20k-512x512 - In Collection: DNLNet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 48.4 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 8.8 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 41.76 - mIoU(ms+flip): 42.99 - Config: configs/dnlnet/dnl_r50-d8_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_80k_ade20k/dnl_r50-d8_512x512_80k_ade20k_20200826_183354-1cf6e0c1.pth -- Name: dnl_r101-d8_4xb4-80k_ade20k-512x512 - In Collection: DNLNet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 79.74 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 12.8 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 43.76 - mIoU(ms+flip): 44.91 - Config: configs/dnlnet/dnl_r101-d8_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_80k_ade20k/dnl_r101-d8_512x512_80k_ade20k_20200826_183354-d820d6ea.pth -- Name: dnl_r50-d8_4xb4-160k_ade20k-512x512 - In Collection: DNLNet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 41.87 - mIoU(ms+flip): 43.01 - Config: configs/dnlnet/dnl_r50-d8_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_160k_ade20k/dnl_r50-d8_512x512_160k_ade20k_20200826_183350-37837798.pth -- Name: dnl_r101-d8_4xb4-160k_ade20k-512x512 - In Collection: DNLNet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 44.25 - mIoU(ms+flip): 45.78 - Config: configs/dnlnet/dnl_r101-d8_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_160k_ade20k/dnl_r101-d8_512x512_160k_ade20k_20200826_183350-ed522c61.pth diff --git a/configs/dnlnet/metafile.yaml b/configs/dnlnet/metafile.yaml new file mode 100644 index 0000000000..22e48d3dc5 --- /dev/null +++ b/configs/dnlnet/metafile.yaml @@ -0,0 +1,292 @@ +Collections: +- Name: DNLNet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + Paper: + Title: Disentangled Non-Local Neural Networks + URL: https://arxiv.org/abs/2006.06668 + README: configs/dnlnet/README.md + Frameworks: + - PyTorch +Models: +- Name: dnl_r50-d8_4xb2-40k_cityscapes-512x1024 + In Collection: DNLNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.61 + Config: configs/dnlnet/dnl_r50-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DNLNet + Training Resources: 4x V100 GPUS + Memory (GB): 7.3 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes/dnl_r50-d8_512x1024_40k_cityscapes_20200904_233629-53d4ea93.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes/dnl_r50-d8_512x1024_40k_cityscapes-20200904_233629.log.json + Paper: + Title: Disentangled Non-Local Neural Networks + URL: https://arxiv.org/abs/2006.06668 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dnl_head.py#L88 + Framework: PyTorch +- Name: dnl_r101-d8_4xb2-40k_cityscapes-512x1024 + In Collection: DNLNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.31 + Config: configs/dnlnet/dnl_r101-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DNLNet + Training Resources: 4x V100 GPUS + Memory (GB): 10.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_40k_cityscapes/dnl_r101-d8_512x1024_40k_cityscapes_20200904_233629-9928ffef.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_40k_cityscapes/dnl_r101-d8_512x1024_40k_cityscapes-20200904_233629.log.json + Paper: + Title: Disentangled Non-Local Neural Networks + URL: https://arxiv.org/abs/2006.06668 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dnl_head.py#L88 + Framework: PyTorch +- Name: dnl_r50-d8_4xb2-40k_cityscapes-769x769 + In Collection: DNLNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.44 + mIoU(ms+flip): 80.27 + Config: configs/dnlnet/dnl_r50-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DNLNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_40k_cityscapes/dnl_r50-d8_769x769_40k_cityscapes_20200820_232206-0f283785.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_40k_cityscapes/dnl_r50-d8_769x769_40k_cityscapes-20200820_232206.log.json + Paper: + Title: Disentangled Non-Local Neural Networks + URL: https://arxiv.org/abs/2006.06668 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dnl_head.py#L88 + Framework: PyTorch +- Name: dnl_r101-d8_4xb2-40k_cityscapes-769x769 + In Collection: DNLNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.39 + mIoU(ms+flip): 77.77 + Config: configs/dnlnet/dnl_r101-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DNLNet + Training Resources: 4x V100 GPUS + Memory (GB): 12.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_40k_cityscapes/dnl_r101-d8_769x769_40k_cityscapes_20200820_171256-76c596df.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_40k_cityscapes/dnl_r101-d8_769x769_40k_cityscapes-20200820_171256.log.json + Paper: + Title: Disentangled Non-Local Neural Networks + URL: https://arxiv.org/abs/2006.06668 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dnl_head.py#L88 + Framework: PyTorch +- Name: dnl_r50-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DNLNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.33 + Config: configs/dnlnet/dnl_r50-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DNLNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes/dnl_r50-d8_512x1024_80k_cityscapes_20200904_233629-58b2f778.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes/dnl_r50-d8_512x1024_80k_cityscapes-20200904_233629.log.json + Paper: + Title: Disentangled Non-Local Neural Networks + URL: https://arxiv.org/abs/2006.06668 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dnl_head.py#L88 + Framework: PyTorch +- Name: dnl_r101-d8_4xb2-80k_cityscapes-512x1024 + In Collection: DNLNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.41 + Config: configs/dnlnet/dnl_r101-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DNLNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_80k_cityscapes/dnl_r101-d8_512x1024_80k_cityscapes_20200904_233629-758e2dd4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_80k_cityscapes/dnl_r101-d8_512x1024_80k_cityscapes-20200904_233629.log.json + Paper: + Title: Disentangled Non-Local Neural Networks + URL: https://arxiv.org/abs/2006.06668 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dnl_head.py#L88 + Framework: PyTorch +- Name: dnl_r50-d8_4xb2-80k_cityscapes-769x769 + In Collection: DNLNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.36 + mIoU(ms+flip): 80.7 + Config: configs/dnlnet/dnl_r50-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - DNLNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_80k_cityscapes/dnl_r50-d8_769x769_80k_cityscapes_20200820_011925-366bc4c7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_80k_cityscapes/dnl_r50-d8_769x769_80k_cityscapes-20200820_011925.log.json + Paper: + Title: Disentangled Non-Local Neural Networks + URL: https://arxiv.org/abs/2006.06668 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dnl_head.py#L88 + Framework: PyTorch +- Name: dnl_r101-d8_4xb2-80k_cityscapes-769x769 + In Collection: DNLNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.41 + mIoU(ms+flip): 80.68 + Config: configs/dnlnet/dnl_r101-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - DNLNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_80k_cityscapes/dnl_r101-d8_769x769_80k_cityscapes_20200821_051111-95ff84ab.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_80k_cityscapes/dnl_r101-d8_769x769_80k_cityscapes-20200821_051111.log.json + Paper: + Title: Disentangled Non-Local Neural Networks + URL: https://arxiv.org/abs/2006.06668 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dnl_head.py#L88 + Framework: PyTorch +- Name: dnl_r50-d8_4xb4-80k_ade20k-512x512 + In Collection: DNLNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.76 + mIoU(ms+flip): 42.99 + Config: configs/dnlnet/dnl_r50-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - DNLNet + Training Resources: 4x V100 GPUS + Memory (GB): 8.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_80k_ade20k/dnl_r50-d8_512x512_80k_ade20k_20200826_183354-1cf6e0c1.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_80k_ade20k/dnl_r50-d8_512x512_80k_ade20k-20200826_183354.log.json + Paper: + Title: Disentangled Non-Local Neural Networks + URL: https://arxiv.org/abs/2006.06668 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dnl_head.py#L88 + Framework: PyTorch +- Name: dnl_r101-d8_4xb4-80k_ade20k-512x512 + In Collection: DNLNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.76 + mIoU(ms+flip): 44.91 + Config: configs/dnlnet/dnl_r101-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - DNLNet + Training Resources: 4x V100 GPUS + Memory (GB): 12.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_80k_ade20k/dnl_r101-d8_512x512_80k_ade20k_20200826_183354-d820d6ea.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_80k_ade20k/dnl_r101-d8_512x512_80k_ade20k-20200826_183354.log.json + Paper: + Title: Disentangled Non-Local Neural Networks + URL: https://arxiv.org/abs/2006.06668 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dnl_head.py#L88 + Framework: PyTorch +- Name: dnl_r50-d8_4xb4-160k_ade20k-512x512 + In Collection: DNLNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.87 + mIoU(ms+flip): 43.01 + Config: configs/dnlnet/dnl_r50-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - DNLNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_160k_ade20k/dnl_r50-d8_512x512_160k_ade20k_20200826_183350-37837798.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_160k_ade20k/dnl_r50-d8_512x512_160k_ade20k-20200826_183350.log.json + Paper: + Title: Disentangled Non-Local Neural Networks + URL: https://arxiv.org/abs/2006.06668 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dnl_head.py#L88 + Framework: PyTorch +- Name: dnl_r101-d8_4xb4-160k_ade20k-512x512 + In Collection: DNLNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 44.25 + mIoU(ms+flip): 45.78 + Config: configs/dnlnet/dnl_r101-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - DNLNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_160k_ade20k/dnl_r101-d8_512x512_160k_ade20k_20200826_183350-ed522c61.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_160k_ade20k/dnl_r101-d8_512x512_160k_ade20k-20200826_183350.log.json + Paper: + Title: Disentangled Non-Local Neural Networks + URL: https://arxiv.org/abs/2006.06668 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dnl_head.py#L88 + Framework: PyTorch diff --git a/configs/dpt/README.md b/configs/dpt/README.md index 41d73ea57a..b3a5573a65 100644 --- a/configs/dpt/README.md +++ b/configs/dpt/README.md @@ -1,6 +1,6 @@ # DPT -[Vision Transformer for Dense Prediction](https://arxiv.org/abs/2103.13413) +> [Vision Transformer for Dense Prediction](https://arxiv.org/abs/2103.13413) ## Introduction @@ -22,24 +22,6 @@ We introduce dense vision transformers, an architecture that leverages vision tr -## Citation - -```bibtex -@article{dosoViTskiy2020, - title={An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale}, - author={DosoViTskiy, Alexey and Beyer, Lucas and Kolesnikov, Alexander and Weissenborn, Dirk and Zhai, Xiaohua and Unterthiner, Thomas and Dehghani, Mostafa and Minderer, Matthias and Heigold, Georg and Gelly, Sylvain and Uszkoreit, Jakob and Houlsby, Neil}, - journal={arXiv preprint arXiv:2010.11929}, - year={2020} -} - -@article{Ranftl2021, - author = {Ren\'{e} Ranftl and Alexey Bochkovskiy and Vladlen Koltun}, - title = {Vision Transformers for Dense Prediction}, - journal = {ArXiv preprint}, - year = {2021}, -} -``` - ## Usage To use other repositories' pre-trained models, it is necessary to convert keys. @@ -62,6 +44,24 @@ This script convert model from `PRETRAIN_PATH` and store the converted model in ### ADE20K -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| DPT | ViT-B | 512x512 | 160000 | 8.09 | 10.41 | 46.97 | 48.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/dpt/dpt_vit-b16_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dpt/dpt_vit-b16_512x512_160k_ade20k/dpt_vit-b16_512x512_160k_ade20k-db31cf52.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dpt/dpt_vit-b16_512x512_160k_ade20k/dpt_vit-b16_512x512_160k_ade20k-20210809_172025.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| DPT | ViT-B | 512x512 | 160000 | 8.09 | 10.41 | V100 | 46.97 | 48.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dpt/dpt_vit-b16_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dpt/dpt_vit-b16_512x512_160k_ade20k/dpt_vit-b16_512x512_160k_ade20k-db31cf52.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dpt/dpt_vit-b16_512x512_160k_ade20k/dpt_vit-b16_512x512_160k_ade20k-20210809_172025.log.json) | + +## Citation + +```bibtex +@article{dosoViTskiy2020, + title={An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale}, + author={DosoViTskiy, Alexey and Beyer, Lucas and Kolesnikov, Alexander and Weissenborn, Dirk and Zhai, Xiaohua and Unterthiner, Thomas and Dehghani, Mostafa and Minderer, Matthias and Heigold, Georg and Gelly, Sylvain and Uszkoreit, Jakob and Houlsby, Neil}, + journal={arXiv preprint arXiv:2010.11929}, + year={2020} +} + +@article{Ranftl2021, + author = {Ren\'{e} Ranftl and Alexey Bochkovskiy and Vladlen Koltun}, + title = {Vision Transformers for Dense Prediction}, + journal = {ArXiv preprint}, + year = {2021}, +} +``` diff --git a/configs/dpt/dpt.yml b/configs/dpt/dpt.yml deleted file mode 100644 index 32324d3459..0000000000 --- a/configs/dpt/dpt.yml +++ /dev/null @@ -1,37 +0,0 @@ -Collections: -- Name: DPT - Metadata: - Training Data: - - ADE20K - Paper: - URL: https://arxiv.org/abs/2103.13413 - Title: Vision Transformer for Dense Prediction - README: configs/dpt/README.md - Code: - URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dpt_head.py#L215 - Version: v0.17.0 - Converted From: - Code: https://github.com/isl-org/DPT -Models: -- Name: dpt_vit-b16_8xb2-160k_ade20k-512x512 - In Collection: DPT - Metadata: - backbone: ViT-B - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 96.06 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 8.09 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 46.97 - mIoU(ms+flip): 48.34 - Config: configs/dpt/dpt_vit-b16_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dpt/dpt_vit-b16_512x512_160k_ade20k/dpt_vit-b16_512x512_160k_ade20k-db31cf52.pth diff --git a/configs/dpt/metafile.yaml b/configs/dpt/metafile.yaml new file mode 100644 index 0000000000..b721e041b6 --- /dev/null +++ b/configs/dpt/metafile.yaml @@ -0,0 +1,37 @@ +Collections: +- Name: DPT + License: Apache License 2.0 + Metadata: + Training Data: + - ADE20K + Paper: + Title: Vision Transformer for Dense Prediction + URL: https://arxiv.org/abs/2103.13413 + README: configs/dpt/README.md + Frameworks: + - PyTorch +Models: +- Name: dpt_vit-b16_8xb2-160k_ade20k-512x512 + In Collection: DPT + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 46.97 + mIoU(ms+flip): 48.34 + Config: configs/dpt/dpt_vit-b16_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - ViT-B + - DPT + Training Resources: 8x V100 GPUS + Memory (GB): 8.09 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dpt/dpt_vit-b16_512x512_160k_ade20k/dpt_vit-b16_512x512_160k_ade20k-db31cf52.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/dpt/dpt_vit-b16_512x512_160k_ade20k/dpt_vit-b16_512x512_160k_ade20k-20210809_172025.log.json + Paper: + Title: Vision Transformer for Dense Prediction + URL: https://arxiv.org/abs/2103.13413 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dpt_head.py#L215 + Framework: PyTorch diff --git a/configs/emanet/README.md b/configs/emanet/README.md index 5a9bfc326a..8ffaf471ca 100644 --- a/configs/emanet/README.md +++ b/configs/emanet/README.md @@ -1,6 +1,6 @@ # EMANet -[Expectation-Maximization Attention Networks for Semantic Segmentation](https://arxiv.org/abs/1907.13426) +> [Expectation-Maximization Attention Networks for Semantic Segmentation](https://arxiv.org/abs/1907.13426) ## Introduction @@ -22,6 +22,17 @@ Self-attention mechanism has been widely used for various tasks. It is designed +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------: | -------------- | ------ | ----: | ------------- | -------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| EMANet | R-50-D8 | 512x1024 | 80000 | 5.4 | 4.58 | V100 | 77.59 | 79.44 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/emanet/eemanet_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_512x1024_80k_cityscapes/emanet_r50-d8_512x1024_80k_cityscapes_20200901_100301-c43fcef1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_512x1024_80k_cityscapes/emanet_r50-d8_512x1024_80k_cityscapes-20200901_100301.log.json) | +| EMANet | R-101-D8 | 512x1024 | 80000 | 6.2 | 2.87 | V100 | 79.10 | 81.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/emanet/emanet_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_512x1024_80k_cityscapes/emanet_r101-d8_512x1024_80k_cityscapes_20200901_100301-2d970745.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_512x1024_80k_cityscapes/emanet_r101-d8_512x1024_80k_cityscapes-20200901_100301.log.json) | +| EMANet | R-50-D8 | 769x769 | 80000 | 8.9 | 1.97 | V100 | 79.33 | 80.49 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/emanet/emanet_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_769x769_80k_cityscapes/emanet_r50-d8_769x769_80k_cityscapes_20200901_100301-16f8de52.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_769x769_80k_cityscapes/emanet_r50-d8_769x769_80k_cityscapes-20200901_100301.log.json) | +| EMANet | R-101-D8 | 769x769 | 80000 | 10.1 | 1.22 | V100 | 79.62 | 81.00 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/emanet/emanet_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_769x769_80k_cityscapes/emanet_r101-d8_769x769_80k_cityscapes_20200901_100301-47a324ce.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_769x769_80k_cityscapes/emanet_r101-d8_769x769_80k_cityscapes-20200901_100301.log.json) | + ## Citation ```bibtex @@ -33,14 +44,3 @@ Self-attention mechanism has been widely used for various tasks. It is designed year={2019} } ``` - -## Results and models - -### Cityscapes - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | -------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| EMANet | R-50-D8 | 512x1024 | 80000 | 5.4 | 4.58 | 77.59 | 79.44 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/emanet/eemanet_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_512x1024_80k_cityscapes/emanet_r50-d8_512x1024_80k_cityscapes_20200901_100301-c43fcef1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_512x1024_80k_cityscapes/emanet_r50-d8_512x1024_80k_cityscapes-20200901_100301.log.json) | -| EMANet | R-101-D8 | 512x1024 | 80000 | 6.2 | 2.87 | 79.10 | 81.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/emanet/emanet_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_512x1024_80k_cityscapes/emanet_r101-d8_512x1024_80k_cityscapes_20200901_100301-2d970745.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_512x1024_80k_cityscapes/emanet_r101-d8_512x1024_80k_cityscapes-20200901_100301.log.json) | -| EMANet | R-50-D8 | 769x769 | 80000 | 8.9 | 1.97 | 79.33 | 80.49 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/emanet/emanet_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_769x769_80k_cityscapes/emanet_r50-d8_769x769_80k_cityscapes_20200901_100301-16f8de52.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_769x769_80k_cityscapes/emanet_r50-d8_769x769_80k_cityscapes-20200901_100301.log.json) | -| EMANet | R-101-D8 | 769x769 | 80000 | 10.1 | 1.22 | 79.62 | 81.00 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/emanet/emanet_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_769x769_80k_cityscapes/emanet_r101-d8_769x769_80k_cityscapes_20200901_100301-47a324ce.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_769x769_80k_cityscapes/emanet_r101-d8_769x769_80k_cityscapes-20200901_100301.log.json) | diff --git a/configs/emanet/emanet.yml b/configs/emanet/emanet.yml deleted file mode 100644 index ac194f2a0f..0000000000 --- a/configs/emanet/emanet.yml +++ /dev/null @@ -1,103 +0,0 @@ -Collections: -- Name: EMANet - Metadata: - Training Data: - - Cityscapes - Paper: - URL: https://arxiv.org/abs/1907.13426 - Title: Expectation-Maximization Attention Networks for Semantic Segmentation - README: configs/emanet/README.md - Code: - URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ema_head.py#L80 - Version: v0.17.0 - Converted From: - Code: https://xialipku.github.io/EMANet -Models: -- Name: emanet_r50-d8_4xb2-80k_cityscapes-512x1024 - In Collection: EMANet - Metadata: - backbone: R-50-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 218.34 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 5.4 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 77.59 - mIoU(ms+flip): 79.44 - Config: configs/emanet/emanet_r50-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_512x1024_80k_cityscapes/emanet_r50-d8_512x1024_80k_cityscapes_20200901_100301-c43fcef1.pth -- Name: emanet_r101-d8_4xb2-80k_cityscapes-512x1024 - In Collection: EMANet - Metadata: - backbone: R-101-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 348.43 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 6.2 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.1 - mIoU(ms+flip): 81.21 - Config: configs/emanet/emanet_r101-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_512x1024_80k_cityscapes/emanet_r101-d8_512x1024_80k_cityscapes_20200901_100301-2d970745.pth -- Name: emanet_r50-d8_4xb2-80k_cityscapes-769x769 - In Collection: EMANet - Metadata: - backbone: R-50-D8 - crop size: (769,769) - lr schd: 80000 - inference time (ms/im): - - value: 507.61 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 8.9 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.33 - mIoU(ms+flip): 80.49 - Config: configs/emanet/emanet_r50-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_769x769_80k_cityscapes/emanet_r50-d8_769x769_80k_cityscapes_20200901_100301-16f8de52.pth -- Name: emanet_r101-d8_4xb2-80k_cityscapes-769x769 - In Collection: EMANet - Metadata: - backbone: R-101-D8 - crop size: (769,769) - lr schd: 80000 - inference time (ms/im): - - value: 819.67 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 10.1 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.62 - mIoU(ms+flip): 81.0 - Config: configs/emanet/emanet_r101-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_769x769_80k_cityscapes/emanet_r101-d8_769x769_80k_cityscapes_20200901_100301-47a324ce.pth diff --git a/configs/emanet/metafile.yaml b/configs/emanet/metafile.yaml new file mode 100644 index 0000000000..b2a6b09ed7 --- /dev/null +++ b/configs/emanet/metafile.yaml @@ -0,0 +1,109 @@ +Collections: +- Name: EMANet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + Paper: + Title: Expectation-Maximization Attention Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1907.13426 + README: configs/emanet/README.md + Frameworks: + - PyTorch +Models: +- Name: eemanet_r50-d8_4xb2-80k_cityscapes-512x1024 + In Collection: EMANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.59 + mIoU(ms+flip): 79.44 + Config: configs/emanet/eemanet_r50-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - EMANet + Training Resources: 4x V100 GPUS + Memory (GB): 5.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_512x1024_80k_cityscapes/emanet_r50-d8_512x1024_80k_cityscapes_20200901_100301-c43fcef1.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_512x1024_80k_cityscapes/emanet_r50-d8_512x1024_80k_cityscapes-20200901_100301.log.json + Paper: + Title: Expectation-Maximization Attention Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1907.13426 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ema_head.py#L80 + Framework: PyTorch +- Name: emanet_r101-d8_4xb2-80k_cityscapes-512x1024 + In Collection: EMANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.1 + mIoU(ms+flip): 81.21 + Config: configs/emanet/emanet_r101-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - EMANet + Training Resources: 4x V100 GPUS + Memory (GB): 6.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_512x1024_80k_cityscapes/emanet_r101-d8_512x1024_80k_cityscapes_20200901_100301-2d970745.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_512x1024_80k_cityscapes/emanet_r101-d8_512x1024_80k_cityscapes-20200901_100301.log.json + Paper: + Title: Expectation-Maximization Attention Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1907.13426 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ema_head.py#L80 + Framework: PyTorch +- Name: emanet_r50-d8_4xb2-80k_cityscapes-769x769 + In Collection: EMANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.33 + mIoU(ms+flip): 80.49 + Config: configs/emanet/emanet_r50-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - EMANet + Training Resources: 4x V100 GPUS + Memory (GB): 8.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_769x769_80k_cityscapes/emanet_r50-d8_769x769_80k_cityscapes_20200901_100301-16f8de52.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_769x769_80k_cityscapes/emanet_r50-d8_769x769_80k_cityscapes-20200901_100301.log.json + Paper: + Title: Expectation-Maximization Attention Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1907.13426 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ema_head.py#L80 + Framework: PyTorch +- Name: emanet_r101-d8_4xb2-80k_cityscapes-769x769 + In Collection: EMANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.62 + mIoU(ms+flip): 81.0 + Config: configs/emanet/emanet_r101-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - EMANet + Training Resources: 4x V100 GPUS + Memory (GB): 10.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_769x769_80k_cityscapes/emanet_r101-d8_769x769_80k_cityscapes_20200901_100301-47a324ce.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_769x769_80k_cityscapes/emanet_r101-d8_769x769_80k_cityscapes-20200901_100301.log.json + Paper: + Title: Expectation-Maximization Attention Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1907.13426 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ema_head.py#L80 + Framework: PyTorch diff --git a/configs/encnet/README.md b/configs/encnet/README.md index 7be0c6d926..ff09bc32f8 100644 --- a/configs/encnet/README.md +++ b/configs/encnet/README.md @@ -1,6 +1,6 @@ # EncNet -[Context Encoding for Semantic Segmentation](https://arxiv.org/abs/1803.08904) +> [Context Encoding for Semantic Segmentation](https://arxiv.org/abs/1803.08904) ## Introduction @@ -22,6 +22,30 @@ Recent work has made significant progress in improving spatial resolution for pi +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| EncNet | R-50-D8 | 512x1024 | 40000 | 8.6 | 4.58 | V100 | 75.67 | 77.08 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/encnet/encnet_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_40k_cityscapes/encnet_r50-d8_512x1024_40k_cityscapes_20200621_220958-68638a47.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_40k_cityscapes/encnet_r50-d8_512x1024_40k_cityscapes-20200621_220958.log.json) | +| EncNet | R-101-D8 | 512x1024 | 40000 | 12.1 | 2.66 | V100 | 75.81 | 77.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/encnet/encnet_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_40k_cityscapes/encnet_r101-d8_512x1024_40k_cityscapes_20200621_220933-35e0a3e8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_40k_cityscapes/encnet_r101-d8_512x1024_40k_cityscapes-20200621_220933.log.json) | +| EncNet | R-50-D8 | 769x769 | 40000 | 9.8 | 1.82 | V100 | 76.24 | 77.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/encnet/encnet_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_40k_cityscapes/encnet_r50-d8_769x769_40k_cityscapes_20200621_220958-3bcd2884.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_40k_cityscapes/encnet_r50-d8_769x769_40k_cityscapes-20200621_220958.log.json) | +| EncNet | R-101-D8 | 769x769 | 40000 | 13.7 | 1.26 | V100 | 74.25 | 76.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/encnet/encnet_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_40k_cityscapes/encnet_r101-d8_769x769_40k_cityscapes_20200621_220933-2fafed55.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_40k_cityscapes/encnet_r101-d8_769x769_40k_cityscapes-20200621_220933.log.json) | +| EncNet | R-50-D8 | 512x1024 | 80000 | - | - | V100 | 77.94 | 79.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/encnet/encnet_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_80k_cityscapes/encnet_r50-d8_512x1024_80k_cityscapes_20200622_003554-fc5c5624.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_80k_cityscapes/encnet_r50-d8_512x1024_80k_cityscapes-20200622_003554.log.json) | +| EncNet | R-101-D8 | 512x1024 | 80000 | - | - | V100 | 78.55 | 79.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/encnet/encnet_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_80k_cityscapes/encnet_r101-d8_512x1024_80k_cityscapes_20200622_003555-1de64bec.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_80k_cityscapes/encnet_r101-d8_512x1024_80k_cityscapes-20200622_003555.log.json) | +| EncNet | R-50-D8 | 769x769 | 80000 | - | - | V100 | 77.44 | 78.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/encnet/encnet_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_80k_cityscapes/encnet_r50-d8_769x769_80k_cityscapes_20200622_003554-55096dcb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_80k_cityscapes/encnet_r50-d8_769x769_80k_cityscapes-20200622_003554.log.json) | +| EncNet | R-101-D8 | 769x769 | 80000 | - | - | V100 | 76.10 | 76.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/encnet/encnet_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_80k_cityscapes/encnet_r101-d8_769x769_80k_cityscapes_20200622_003555-470ef79d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_80k_cityscapes/encnet_r101-d8_769x769_80k_cityscapes-20200622_003555.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| EncNet | R-50-D8 | 512x512 | 80000 | 10.1 | 22.81 | V100 | 39.53 | 41.17 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/encnet/encnet_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_80k_ade20k/encnet_r50-d8_512x512_80k_ade20k_20200622_042412-44b46b04.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_80k_ade20k/encnet_r50-d8_512x512_80k_ade20k-20200622_042412.log.json) | +| EncNet | R-101-D8 | 512x512 | 80000 | 13.6 | 14.87 | V100 | 42.11 | 43.61 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/encnet/encnet_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_80k_ade20k/encnet_r101-d8_512x512_80k_ade20k_20200622_101128-dd35e237.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_80k_ade20k/encnet_r101-d8_512x512_80k_ade20k-20200622_101128.log.json) | +| EncNet | R-50-D8 | 512x512 | 160000 | - | - | V100 | 40.10 | 41.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/encnet/encnet_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_160k_ade20k/encnet_r50-d8_512x512_160k_ade20k_20200622_101059-b2db95e0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_160k_ade20k/encnet_r50-d8_512x512_160k_ade20k-20200622_101059.log.json) | +| EncNet | R-101-D8 | 512x512 | 160000 | - | - | V100 | 42.61 | 44.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/encnet/encnet_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_160k_ade20k/encnet_r101-d8_512x512_160k_ade20k_20200622_073348-7989641f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_160k_ade20k/encnet_r101-d8_512x512_160k_ade20k-20200622_073348.log.json) | + ## Citation ```bibtex @@ -33,27 +57,3 @@ month = {June}, year = {2018} } ``` - -## Results and models - -### Cityscapes - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| EncNet | R-50-D8 | 512x1024 | 40000 | 8.6 | 4.58 | 75.67 | 77.08 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/encnet/encnet_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_40k_cityscapes/encnet_r50-d8_512x1024_40k_cityscapes_20200621_220958-68638a47.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_40k_cityscapes/encnet_r50-d8_512x1024_40k_cityscapes-20200621_220958.log.json) | -| EncNet | R-101-D8 | 512x1024 | 40000 | 12.1 | 2.66 | 75.81 | 77.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/encnet/encnet_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_40k_cityscapes/encnet_r101-d8_512x1024_40k_cityscapes_20200621_220933-35e0a3e8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_40k_cityscapes/encnet_r101-d8_512x1024_40k_cityscapes-20200621_220933.log.json) | -| EncNet | R-50-D8 | 769x769 | 40000 | 9.8 | 1.82 | 76.24 | 77.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/encnet/encnet_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_40k_cityscapes/encnet_r50-d8_769x769_40k_cityscapes_20200621_220958-3bcd2884.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_40k_cityscapes/encnet_r50-d8_769x769_40k_cityscapes-20200621_220958.log.json) | -| EncNet | R-101-D8 | 769x769 | 40000 | 13.7 | 1.26 | 74.25 | 76.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/encnet/encnet_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_40k_cityscapes/encnet_r101-d8_769x769_40k_cityscapes_20200621_220933-2fafed55.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_40k_cityscapes/encnet_r101-d8_769x769_40k_cityscapes-20200621_220933.log.json) | -| EncNet | R-50-D8 | 512x1024 | 80000 | - | - | 77.94 | 79.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/encnet/encnet_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_80k_cityscapes/encnet_r50-d8_512x1024_80k_cityscapes_20200622_003554-fc5c5624.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_80k_cityscapes/encnet_r50-d8_512x1024_80k_cityscapes-20200622_003554.log.json) | -| EncNet | R-101-D8 | 512x1024 | 80000 | - | - | 78.55 | 79.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/encnet/encnet_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_80k_cityscapes/encnet_r101-d8_512x1024_80k_cityscapes_20200622_003555-1de64bec.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_80k_cityscapes/encnet_r101-d8_512x1024_80k_cityscapes-20200622_003555.log.json) | -| EncNet | R-50-D8 | 769x769 | 80000 | - | - | 77.44 | 78.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/encnet/encnet_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_80k_cityscapes/encnet_r50-d8_769x769_80k_cityscapes_20200622_003554-55096dcb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_80k_cityscapes/encnet_r50-d8_769x769_80k_cityscapes-20200622_003554.log.json) | -| EncNet | R-101-D8 | 769x769 | 80000 | - | - | 76.10 | 76.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/encnet/encnet_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_80k_cityscapes/encnet_r101-d8_769x769_80k_cityscapes_20200622_003555-470ef79d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_80k_cityscapes/encnet_r101-d8_769x769_80k_cityscapes-20200622_003555.log.json) | - -### ADE20K - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| EncNet | R-50-D8 | 512x512 | 80000 | 10.1 | 22.81 | 39.53 | 41.17 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/encnet/encnet_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_80k_ade20k/encnet_r50-d8_512x512_80k_ade20k_20200622_042412-44b46b04.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_80k_ade20k/encnet_r50-d8_512x512_80k_ade20k-20200622_042412.log.json) | -| EncNet | R-101-D8 | 512x512 | 80000 | 13.6 | 14.87 | 42.11 | 43.61 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/encnet/encnet_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_80k_ade20k/encnet_r101-d8_512x512_80k_ade20k_20200622_101128-dd35e237.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_80k_ade20k/encnet_r101-d8_512x512_80k_ade20k-20200622_101128.log.json) | -| EncNet | R-50-D8 | 512x512 | 160000 | - | - | 40.10 | 41.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/encnet/encnet_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_160k_ade20k/encnet_r50-d8_512x512_160k_ade20k_20200622_101059-b2db95e0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_160k_ade20k/encnet_r50-d8_512x512_160k_ade20k-20200622_101059.log.json) | -| EncNet | R-101-D8 | 512x512 | 160000 | - | - | 42.61 | 44.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/encnet/encnet_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_160k_ade20k/encnet_r101-d8_512x512_160k_ade20k_20200622_073348-7989641f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_160k_ade20k/encnet_r101-d8_512x512_160k_ade20k-20200622_073348.log.json) | diff --git a/configs/encnet/encnet.yml b/configs/encnet/encnet.yml deleted file mode 100644 index bea147b286..0000000000 --- a/configs/encnet/encnet.yml +++ /dev/null @@ -1,232 +0,0 @@ -Collections: -- Name: EncNet - Metadata: - Training Data: - - Cityscapes - - ADE20K - Paper: - URL: https://arxiv.org/abs/1803.08904 - Title: Context Encoding for Semantic Segmentation - README: configs/encnet/README.md - Code: - URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/enc_head.py#L63 - Version: v0.17.0 - Converted From: - Code: https://github.com/zhanghang1989/PyTorch-Encoding -Models: -- Name: encnet_r50-d8_4xb2-40k_cityscapes-512x1024 - In Collection: EncNet - Metadata: - backbone: R-50-D8 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 218.34 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 8.6 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 75.67 - mIoU(ms+flip): 77.08 - Config: configs/encnet/encnet_r50-d8_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_40k_cityscapes/encnet_r50-d8_512x1024_40k_cityscapes_20200621_220958-68638a47.pth -- Name: encnet_r101-d8_4xb2-40k_cityscapes-512x1024 - In Collection: EncNet - Metadata: - backbone: R-101-D8 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 375.94 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 12.1 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 75.81 - mIoU(ms+flip): 77.21 - Config: configs/encnet/encnet_r101-d8_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_40k_cityscapes/encnet_r101-d8_512x1024_40k_cityscapes_20200621_220933-35e0a3e8.pth -- Name: encnet_r50-d8_4xb2-40k_cityscapes-769x769 - In Collection: EncNet - Metadata: - backbone: R-50-D8 - crop size: (769,769) - lr schd: 40000 - inference time (ms/im): - - value: 549.45 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 9.8 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 76.24 - mIoU(ms+flip): 77.85 - Config: configs/encnet/encnet_r50-d8_4xb2-40k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_40k_cityscapes/encnet_r50-d8_769x769_40k_cityscapes_20200621_220958-3bcd2884.pth -- Name: encnet_r101-d8_4xb2-40k_cityscapes-769x769 - In Collection: EncNet - Metadata: - backbone: R-101-D8 - crop size: (769,769) - lr schd: 40000 - inference time (ms/im): - - value: 793.65 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 13.7 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 74.25 - mIoU(ms+flip): 76.25 - Config: configs/encnet/encnet_r101-d8_4xb2-40k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_40k_cityscapes/encnet_r101-d8_769x769_40k_cityscapes_20200621_220933-2fafed55.pth -- Name: encnet_r50-d8_4xb2-80k_cityscapes-512x1024 - In Collection: EncNet - Metadata: - backbone: R-50-D8 - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 77.94 - mIoU(ms+flip): 79.13 - Config: configs/encnet/encnet_r50-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_80k_cityscapes/encnet_r50-d8_512x1024_80k_cityscapes_20200622_003554-fc5c5624.pth -- Name: encnet_r101-d8_4xb2-80k_cityscapes-512x1024 - In Collection: EncNet - Metadata: - backbone: R-101-D8 - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.55 - mIoU(ms+flip): 79.47 - Config: configs/encnet/encnet_r101-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_80k_cityscapes/encnet_r101-d8_512x1024_80k_cityscapes_20200622_003555-1de64bec.pth -- Name: encnet_r50-d8_4xb2-80k_cityscapes-769x769 - In Collection: EncNet - Metadata: - backbone: R-50-D8 - crop size: (769,769) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 77.44 - mIoU(ms+flip): 78.72 - Config: configs/encnet/encnet_r50-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_80k_cityscapes/encnet_r50-d8_769x769_80k_cityscapes_20200622_003554-55096dcb.pth -- Name: encnet_r101-d8_4xb2-80k_cityscapes-769x769 - In Collection: EncNet - Metadata: - backbone: R-101-D8 - crop size: (769,769) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 76.1 - mIoU(ms+flip): 76.97 - Config: configs/encnet/encnet_r101-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_80k_cityscapes/encnet_r101-d8_769x769_80k_cityscapes_20200622_003555-470ef79d.pth -- Name: encnet_r50-d8_4xb4-80k_ade20k-512x512 - In Collection: EncNet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 43.84 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 10.1 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 39.53 - mIoU(ms+flip): 41.17 - Config: configs/encnet/encnet_r50-d8_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_80k_ade20k/encnet_r50-d8_512x512_80k_ade20k_20200622_042412-44b46b04.pth -- Name: encnet_r101-d8_4xb4-80k_ade20k-512x512 - In Collection: EncNet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 67.25 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 13.6 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 42.11 - mIoU(ms+flip): 43.61 - Config: configs/encnet/encnet_r101-d8_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_80k_ade20k/encnet_r101-d8_512x512_80k_ade20k_20200622_101128-dd35e237.pth -- Name: encnet_r50-d8_4xb4-160k_ade20k-512x512 - In Collection: EncNet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 40.1 - mIoU(ms+flip): 41.71 - Config: configs/encnet/encnet_r50-d8_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_160k_ade20k/encnet_r50-d8_512x512_160k_ade20k_20200622_101059-b2db95e0.pth -- Name: encnet_r101-d8_4xb4-160k_ade20k-512x512 - In Collection: EncNet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 42.61 - mIoU(ms+flip): 44.01 - Config: configs/encnet/encnet_r101-d8_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_160k_ade20k/encnet_r101-d8_512x512_160k_ade20k_20200622_073348-7989641f.pth diff --git a/configs/encnet/metafile.yaml b/configs/encnet/metafile.yaml new file mode 100644 index 0000000000..0dbdcfaab3 --- /dev/null +++ b/configs/encnet/metafile.yaml @@ -0,0 +1,296 @@ +Collections: +- Name: EncNet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + Paper: + Title: Context Encoding for Semantic Segmentation + URL: https://arxiv.org/abs/1803.08904 + README: configs/encnet/README.md + Frameworks: + - PyTorch +Models: +- Name: encnet_r50-d8_4xb2-40k_cityscapes-512x1024 + In Collection: EncNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.67 + mIoU(ms+flip): 77.08 + Config: configs/encnet/encnet_r50-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - EncNet + Training Resources: 4x V100 GPUS + Memory (GB): 8.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_40k_cityscapes/encnet_r50-d8_512x1024_40k_cityscapes_20200621_220958-68638a47.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_40k_cityscapes/encnet_r50-d8_512x1024_40k_cityscapes-20200621_220958.log.json + Paper: + Title: Context Encoding for Semantic Segmentation + URL: https://arxiv.org/abs/1803.08904 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/enc_head.py#L63 + Framework: PyTorch +- Name: encnet_r101-d8_4xb2-40k_cityscapes-512x1024 + In Collection: EncNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.81 + mIoU(ms+flip): 77.21 + Config: configs/encnet/encnet_r101-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - EncNet + Training Resources: 4x V100 GPUS + Memory (GB): 12.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_40k_cityscapes/encnet_r101-d8_512x1024_40k_cityscapes_20200621_220933-35e0a3e8.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_40k_cityscapes/encnet_r101-d8_512x1024_40k_cityscapes-20200621_220933.log.json + Paper: + Title: Context Encoding for Semantic Segmentation + URL: https://arxiv.org/abs/1803.08904 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/enc_head.py#L63 + Framework: PyTorch +- Name: encnet_r50-d8_4xb2-40k_cityscapes-769x769 + In Collection: EncNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.24 + mIoU(ms+flip): 77.85 + Config: configs/encnet/encnet_r50-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - EncNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_40k_cityscapes/encnet_r50-d8_769x769_40k_cityscapes_20200621_220958-3bcd2884.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_40k_cityscapes/encnet_r50-d8_769x769_40k_cityscapes-20200621_220958.log.json + Paper: + Title: Context Encoding for Semantic Segmentation + URL: https://arxiv.org/abs/1803.08904 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/enc_head.py#L63 + Framework: PyTorch +- Name: encnet_r101-d8_4xb2-40k_cityscapes-769x769 + In Collection: EncNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.25 + mIoU(ms+flip): 76.25 + Config: configs/encnet/encnet_r101-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - EncNet + Training Resources: 4x V100 GPUS + Memory (GB): 13.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_40k_cityscapes/encnet_r101-d8_769x769_40k_cityscapes_20200621_220933-2fafed55.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_40k_cityscapes/encnet_r101-d8_769x769_40k_cityscapes-20200621_220933.log.json + Paper: + Title: Context Encoding for Semantic Segmentation + URL: https://arxiv.org/abs/1803.08904 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/enc_head.py#L63 + Framework: PyTorch +- Name: encnet_r50-d8_4xb2-80k_cityscapes-512x1024 + In Collection: EncNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.94 + mIoU(ms+flip): 79.13 + Config: configs/encnet/encnet_r50-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - EncNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_80k_cityscapes/encnet_r50-d8_512x1024_80k_cityscapes_20200622_003554-fc5c5624.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_80k_cityscapes/encnet_r50-d8_512x1024_80k_cityscapes-20200622_003554.log.json + Paper: + Title: Context Encoding for Semantic Segmentation + URL: https://arxiv.org/abs/1803.08904 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/enc_head.py#L63 + Framework: PyTorch +- Name: encnet_r101-d8_4xb2-80k_cityscapes-512x1024 + In Collection: EncNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.55 + mIoU(ms+flip): 79.47 + Config: configs/encnet/encnet_r101-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - EncNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_80k_cityscapes/encnet_r101-d8_512x1024_80k_cityscapes_20200622_003555-1de64bec.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_80k_cityscapes/encnet_r101-d8_512x1024_80k_cityscapes-20200622_003555.log.json + Paper: + Title: Context Encoding for Semantic Segmentation + URL: https://arxiv.org/abs/1803.08904 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/enc_head.py#L63 + Framework: PyTorch +- Name: encnet_r50-d8_4xb2-80k_cityscapes-769x769 + In Collection: EncNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.44 + mIoU(ms+flip): 78.72 + Config: configs/encnet/encnet_r50-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - EncNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_80k_cityscapes/encnet_r50-d8_769x769_80k_cityscapes_20200622_003554-55096dcb.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_80k_cityscapes/encnet_r50-d8_769x769_80k_cityscapes-20200622_003554.log.json + Paper: + Title: Context Encoding for Semantic Segmentation + URL: https://arxiv.org/abs/1803.08904 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/enc_head.py#L63 + Framework: PyTorch +- Name: encnet_r101-d8_4xb2-80k_cityscapes-769x769 + In Collection: EncNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.1 + mIoU(ms+flip): 76.97 + Config: configs/encnet/encnet_r101-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - EncNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_80k_cityscapes/encnet_r101-d8_769x769_80k_cityscapes_20200622_003555-470ef79d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_80k_cityscapes/encnet_r101-d8_769x769_80k_cityscapes-20200622_003555.log.json + Paper: + Title: Context Encoding for Semantic Segmentation + URL: https://arxiv.org/abs/1803.08904 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/enc_head.py#L63 + Framework: PyTorch +- Name: encnet_r50-d8_4xb4-80k_ade20k-512x512 + In Collection: EncNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 39.53 + mIoU(ms+flip): 41.17 + Config: configs/encnet/encnet_r50-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - EncNet + Training Resources: 4x V100 GPUS + Memory (GB): 10.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_80k_ade20k/encnet_r50-d8_512x512_80k_ade20k_20200622_042412-44b46b04.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_80k_ade20k/encnet_r50-d8_512x512_80k_ade20k-20200622_042412.log.json + Paper: + Title: Context Encoding for Semantic Segmentation + URL: https://arxiv.org/abs/1803.08904 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/enc_head.py#L63 + Framework: PyTorch +- Name: encnet_r101-d8_4xb4-80k_ade20k-512x512 + In Collection: EncNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.11 + mIoU(ms+flip): 43.61 + Config: configs/encnet/encnet_r101-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - EncNet + Training Resources: 4x V100 GPUS + Memory (GB): 13.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_80k_ade20k/encnet_r101-d8_512x512_80k_ade20k_20200622_101128-dd35e237.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_80k_ade20k/encnet_r101-d8_512x512_80k_ade20k-20200622_101128.log.json + Paper: + Title: Context Encoding for Semantic Segmentation + URL: https://arxiv.org/abs/1803.08904 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/enc_head.py#L63 + Framework: PyTorch +- Name: encnet_r50-d8_4xb4-160k_ade20k-512x512 + In Collection: EncNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 40.1 + mIoU(ms+flip): 41.71 + Config: configs/encnet/encnet_r50-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - EncNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_160k_ade20k/encnet_r50-d8_512x512_160k_ade20k_20200622_101059-b2db95e0.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_160k_ade20k/encnet_r50-d8_512x512_160k_ade20k-20200622_101059.log.json + Paper: + Title: Context Encoding for Semantic Segmentation + URL: https://arxiv.org/abs/1803.08904 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/enc_head.py#L63 + Framework: PyTorch +- Name: encnet_r101-d8_4xb4-160k_ade20k-512x512 + In Collection: EncNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.61 + mIoU(ms+flip): 44.01 + Config: configs/encnet/encnet_r101-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - EncNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_160k_ade20k/encnet_r101-d8_512x512_160k_ade20k_20200622_073348-7989641f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_160k_ade20k/encnet_r101-d8_512x512_160k_ade20k-20200622_073348.log.json + Paper: + Title: Context Encoding for Semantic Segmentation + URL: https://arxiv.org/abs/1803.08904 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/enc_head.py#L63 + Framework: PyTorch diff --git a/configs/erfnet/README.md b/configs/erfnet/README.md index 4f7d21572a..55d71973a3 100644 --- a/configs/erfnet/README.md +++ b/configs/erfnet/README.md @@ -1,6 +1,6 @@ # ERFNet -[ERFNet: Efficient Residual Factorized ConvNet for Real-time Semantic Segmentation](http://www.robesafe.uah.es/personal/eduardo.romera/pdfs/Romera17tits.pdf) +> [ERFNet: Efficient Residual Factorized ConvNet for Real-time Semantic Segmentation](http://www.robesafe.uah.es/personal/eduardo.romera/pdfs/Romera17tits.pdf) ## Introduction @@ -22,6 +22,22 @@ Semantic segmentation is a challenging task that addresses most of the perceptio +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ---: | ------------- | --------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| ERFNet | ERFNet | 512x1024 | 160000 | 6.04 | 15.26 | V100 | 72.5 | 74.75 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/erfnet/erfnet_fcn_4xb4-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes/erfnet_fcn_4x4_512x1024_160k_cityscapes_20220704_162145-dc90157a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes/erfnet_fcn_4x4_512x1024_160k_cityscapes_20220704_162145.log.json) | + +Note: + +- The model is trained from scratch. + +- Last deconvolution layer in the [original paper](https://github.com/Eromera/erfnet_pytorch/blob/master/train/erfnet.py#L123) is replaced by a naive `FCNHead` decoder head and a bilinear upsampling layer, found more effective and efficient. + +- This model performance is sensitive to the seed values used, please refer to the log file for the specific settings of the seed. If you choose a different seed, the results might differ from the table results. + ## Citation ```bibtex @@ -36,19 +52,3 @@ Semantic segmentation is a challenging task that addresses most of the perceptio publisher={IEEE} } ``` - -## Results and models - -### Cityscapes - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------- | -------------- | ---: | ------------- | ------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| ERFNet | ERFNet | 512x1024 | 160000 | 6.04 | 15.26 | 72.5 | 74.75 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/erfnet/erfnet_fcn_4xb4-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes/erfnet_fcn_4x4_512x1024_160k_cityscapes_20220704_162145-dc90157a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes/erfnet_fcn_4x4_512x1024_160k_cityscapes_20220704_162145.log.json) | - -Note: - -- The model is trained from scratch. - -- Last deconvolution layer in the [original paper](https://github.com/Eromera/erfnet_pytorch/blob/master/train/erfnet.py#L123) is replaced by a naive `FCNHead` decoder head and a bilinear upsampling layer, found more effective and efficient. - -- This model performance is sensitive to the seed values used, please refer to the log file for the specific settings of the seed. If you choose a different seed, the results might differ from the table results. diff --git a/configs/erfnet/erfnet.yml b/configs/erfnet/erfnet.yml deleted file mode 100644 index 5f87f020cf..0000000000 --- a/configs/erfnet/erfnet.yml +++ /dev/null @@ -1,37 +0,0 @@ -Collections: -- Name: ERFNet - Metadata: - Training Data: - - Cityscapes - Paper: - URL: http://www.robesafe.uah.es/personal/eduardo.romera/pdfs/Romera17tits.pdf - Title: 'ERFNet: Efficient Residual Factorized ConvNet for Real-time Semantic Segmentation' - README: configs/erfnet/README.md - Code: - URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/erfnet.py#L321 - Version: v0.20.0 - Converted From: - Code: https://github.com/Eromera/erfnet_pytorch -Models: -- Name: erfnet_fcn_4xb4-160k_cityscapes-512x1024 - In Collection: ERFNet - Metadata: - backbone: ERFNet - crop size: (512,1024) - lr schd: 160000 - inference time (ms/im): - - value: 65.53 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 6.04 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 72.5 - mIoU(ms+flip): 74.75 - Config: configs/erfnet/erfnet_fcn_4xb4-160k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes/erfnet_fcn_4x4_512x1024_160k_cityscapes_20220704_162145-dc90157a.pth diff --git a/configs/erfnet/metafile.yaml b/configs/erfnet/metafile.yaml new file mode 100644 index 0000000000..bf514124ee --- /dev/null +++ b/configs/erfnet/metafile.yaml @@ -0,0 +1,37 @@ +Collections: +- Name: ERFNet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + Paper: + Title: 'ERFNet: Efficient Residual Factorized ConvNet for Real-time Semantic Segmentation' + URL: http://www.robesafe.uah.es/personal/eduardo.romera/pdfs/Romera17tits.pdf + README: configs/erfnet/README.md + Frameworks: + - PyTorch +Models: +- Name: erfnet_fcn_4xb4-160k_cityscapes-512x1024 + In Collection: ERFNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 72.5 + mIoU(ms+flip): 74.75 + Config: configs/erfnet/erfnet_fcn_4xb4-160k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - ERFNet + - ERFNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.04 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes/erfnet_fcn_4x4_512x1024_160k_cityscapes_20220704_162145-dc90157a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes/erfnet_fcn_4x4_512x1024_160k_cityscapes_20220704_162145.log.json + Paper: + Title: 'ERFNet: Efficient Residual Factorized ConvNet for Real-time Semantic Segmentation' + URL: http://www.robesafe.uah.es/personal/eduardo.romera/pdfs/Romera17tits.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/erfnet.py#L321 + Framework: PyTorch diff --git a/configs/fastfcn/README.md b/configs/fastfcn/README.md index feedfa33a8..48644e57e3 100644 --- a/configs/fastfcn/README.md +++ b/configs/fastfcn/README.md @@ -1,6 +1,6 @@ # FastFCN -[FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation](https://arxiv.org/abs/1903.11816) +> [FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation](https://arxiv.org/abs/1903.11816) ## Introduction @@ -22,42 +22,42 @@ Modern approaches for semantic segmentation usually employ dilated convolutions -## Citation - -```bibtex -@article{wu2019fastfcn, -title={Fastfcn: Rethinking dilated convolution in the backbone for semantic segmentation}, -author={Wu, Huikai and Zhang, Junge and Huang, Kaiqi and Liang, Kongming and Yu, Yizhou}, -journal={arXiv preprint arXiv:1903.11816}, -year={2019} -} -``` - ## Results and models ### Cityscapes -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------------------------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| FastFCN + DeepLabV3 | R-50-D32 | 512x1024 | 80000 | 5.67 | 2.64 | 79.12 | 80.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes_20210928_053722-5d1a2648.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes_20210928_053722.log.json) | -| FastFCN + DeepLabV3 (4x4) | R-50-D32 | 512x1024 | 80000 | 9.79 | - | 79.52 | 80.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes_20210924_214357-72220849.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes_20210924_214357.log.json) | -| FastFCN + PSPNet | R-50-D32 | 512x1024 | 80000 | 5.67 | 4.40 | 79.26 | 80.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes_20210928_053722-57749bed.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes_20210928_053722.log.json) | -| FastFCN + PSPNet (4x4) | R-50-D32 | 512x1024 | 80000 | 9.94 | - | 78.76 | 80.03 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes_20210925_061841-77e87b0a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes_20210925_061841.log.json) | -| FastFCN + EncNet | R-50-D32 | 512x1024 | 80000 | 8.15 | 4.77 | 77.97 | 79.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes_20210928_030036-78da5046.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes_20210928_030036.log.json) | -| FastFCN + EncNet (4x4) | R-50-D32 | 512x1024 | 80000 | 15.45 | - | 78.6 | 80.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes_20210926_093217-e1eb6dbb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes_20210926_093217.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------------------- | -------------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------- | ----------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FastFCN + DeepLabV3 | R-50-D32 | 512x1024 | 80000 | 5.67 | 2.64 | V100 | 79.12 | 80.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes_20210928_053722-5d1a2648.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes_20210928_053722.log.json) | +| FastFCN + DeepLabV3 | R-50-D32 (4x4) | 512x1024 | 80000 | 9.79 | - | V100 | 79.52 | 80.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes_20210924_214357-72220849.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes_20210924_214357.log.json) | +| FastFCN + PSPNet | R-50-D32 | 512x1024 | 80000 | 5.67 | 4.40 | V100 | 79.26 | 80.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes_20210928_053722-57749bed.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes_20210928_053722.log.json) | +| FastFCN + PSPNet | R-50-D32 (4x4) | 512x1024 | 80000 | 9.94 | - | V100 | 78.76 | 80.03 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes_20210925_061841-77e87b0a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes_20210925_061841.log.json) | +| FastFCN + EncNet | R-50-D32 | 512x1024 | 80000 | 8.15 | 4.77 | V100 | 77.97 | 79.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes_20210928_030036-78da5046.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes_20210928_030036.log.json) | +| FastFCN + EncNet | R-50-D32 (4x4) | 512x1024 | 80000 | 15.45 | - | V100 | 78.6 | 80.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes_20210926_093217-e1eb6dbb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes_20210926_093217.log.json) | ### ADE20K -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------------------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| FastFCN + DeepLabV3 | R-50-D32 | 512x1024 | 80000 | 8.46 | 12.06 | 41.88 | 42.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k_20211013_190619-3aa40f2d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k_20211013_190619.log.json) | -| FastFCN + DeepLabV3 | R-50-D32 | 512x1024 | 160000 | - | - | 43.58 | 44.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k_20211008_152246-27036aee.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k_20211008_152246.log.json) | -| FastFCN + PSPNet | R-50-D32 | 512x1024 | 80000 | 8.02 | 19.21 | 41.40 | 42.12 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k_20210930_225137-993d07c8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k_20210930_225137.log.json) | -| FastFCN + PSPNet | R-50-D32 | 512x1024 | 160000 | - | - | 42.63 | 43.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k_20211008_105455-e8f5a2fd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k_20211008_105455.log.json) | -| FastFCN + EncNet | R-50-D32 | 512x1024 | 80000 | 9.67 | 17.23 | 40.88 | 42.36 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k_20210930_225214-65aef6dd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k_20210930_225214.log.json) | -| FastFCN + EncNet | R-50-D32 | 512x1024 | 160000 | - | - | 42.50 | 44.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k_20211008_105456-d875ce3c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k_20211008_105456.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------------------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FastFCN + DeepLabV3 | R-50-D32 | 512x1024 | 80000 | 8.46 | 12.06 | V100 | 41.88 | 42.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k_20211013_190619-3aa40f2d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k_20211013_190619.log.json) | +| FastFCN + DeepLabV3 | R-50-D32 | 512x1024 | 160000 | - | - | V100 | 43.58 | 44.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k_20211008_152246-27036aee.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k_20211008_152246.log.json) | +| FastFCN + PSPNet | R-50-D32 | 512x1024 | 80000 | 8.02 | 19.21 | V100 | 41.40 | 42.12 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k_20210930_225137-993d07c8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k_20210930_225137.log.json) | +| FastFCN + PSPNet | R-50-D32 | 512x1024 | 160000 | - | - | V100 | 42.63 | 43.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k_20211008_105455-e8f5a2fd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k_20211008_105455.log.json) | +| FastFCN + EncNet | R-50-D32 | 512x1024 | 80000 | 9.67 | 17.23 | V100 | 40.88 | 42.36 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k_20210930_225214-65aef6dd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k_20210930_225214.log.json) | +| FastFCN + EncNet | R-50-D32 | 512x1024 | 160000 | - | - | V100 | 42.50 | 44.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k_20211008_105456-d875ce3c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k_20211008_105456.log.json) | Note: - `4x4` means 4 GPUs with 4 samples per GPU in training, default setting is 4 GPUs with 2 samples per GPU in training. -- Results of [DeepLabV3 (mIoU: 79.32)](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/deeplabv3), [PSPNet (mIoU: 78.55)](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet) and [ENCNet (mIoU: 77.94)](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/encnet) can be found in each original repository. +- Results of [DeepLabV3 (mIoU: 79.32)](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3), [PSPNet (mIoU: 78.55)](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet) and [ENCNet (mIoU: 77.94)](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/encnet) can be found in each original repository. + +## Citation + +```bibtex +@article{wu2019fastfcn, +title={Fastfcn: Rethinking dilated convolution in the backbone for semantic segmentation}, +author={Wu, Huikai and Zhang, Junge and Huang, Kaiqi and Liang, Kongming and Yu, Yizhou}, +journal={arXiv preprint arXiv:1903.11816}, +year={2019} +} +``` diff --git a/configs/fastfcn/fastfcn.yml b/configs/fastfcn/fastfcn.yml deleted file mode 100644 index 6b4d3bc121..0000000000 --- a/configs/fastfcn/fastfcn.yml +++ /dev/null @@ -1,235 +0,0 @@ -Collections: -- Name: FastFCN - Metadata: - Training Data: - - Cityscapes - - ADE20K - Paper: - URL: https://arxiv.org/abs/1903.11816 - Title: 'FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation' - README: configs/fastfcn/README.md - Code: - URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/jpu.py#L12 - Version: v0.18.0 - Converted From: - Code: https://github.com/wuhuikai/FastFCN -Models: -- Name: fastfcn_r50-d32_jpu_aspp_4xb2-80k_cityscapes-512x1024 - In Collection: FastFCN - Metadata: - backbone: R-50-D32 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 378.79 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 5.67 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.12 - mIoU(ms+flip): 80.58 - Config: configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes_20210928_053722-5d1a2648.pth -- Name: fastfcn_r50-d32_jpu_aspp_4xb2-80k_cityscapes-512x1024 - In Collection: FastFCN - Metadata: - backbone: R-50-D32 - crop size: (512,1024) - lr schd: 80000 - Training Memory (GB): 9.79 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.52 - mIoU(ms+flip): 80.91 - Config: configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes_20210924_214357-72220849.pth -- Name: fastfcn_r50-d32_jpu_psp_4xb2-80k_cityscapes-512x1024 - In Collection: FastFCN - Metadata: - backbone: R-50-D32 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 227.27 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 5.67 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.26 - mIoU(ms+flip): 80.86 - Config: configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes_20210928_053722-57749bed.pth -- Name: fastfcn_r50-d32_jpu_psp_4xb2-80k_cityscapes-512x1024 - In Collection: FastFCN - Metadata: - backbone: R-50-D32 - crop size: (512,1024) - lr schd: 80000 - Training Memory (GB): 9.94 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.76 - mIoU(ms+flip): 80.03 - Config: configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes_20210925_061841-77e87b0a.pth -- Name: fastfcn_r50-d32_jpu_enc_4xb2-80k_cityscapes-512x1024 - In Collection: FastFCN - Metadata: - backbone: R-50-D32 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 209.64 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 8.15 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 77.97 - mIoU(ms+flip): 79.92 - Config: configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes_20210928_030036-78da5046.pth -- Name: fastfcn_r50-d32_jpu_enc_4xb2-80k_cityscapes-512x1024 - In Collection: FastFCN - Metadata: - backbone: R-50-D32 - crop size: (512,1024) - lr schd: 80000 - Training Memory (GB): 15.45 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.6 - mIoU(ms+flip): 80.25 - Config: configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes_20210926_093217-e1eb6dbb.pth -- Name: fastfcn_r50-d32_jpu_aspp_4xb4-80k_ade20k-512x512 - In Collection: FastFCN - Metadata: - backbone: R-50-D32 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 82.92 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 8.46 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 41.88 - mIoU(ms+flip): 42.91 - Config: configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k_20211013_190619-3aa40f2d.pth -- Name: fastfcn_r50-d32_jpu_aspp_4xb4-160k_ade20k-512x512 - In Collection: FastFCN - Metadata: - backbone: R-50-D32 - crop size: (512,1024) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 43.58 - mIoU(ms+flip): 44.92 - Config: configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k_20211008_152246-27036aee.pth -- Name: fastfcn_r50-d32_jpu_psp_4xb4-80k_ade20k-512x512 - In Collection: FastFCN - Metadata: - backbone: R-50-D32 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 52.06 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 8.02 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 41.4 - mIoU(ms+flip): 42.12 - Config: configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k_20210930_225137-993d07c8.pth -- Name: fastfcn_r50-d32_jpu_psp_4xb4-160k_ade20k-512x512 - In Collection: FastFCN - Metadata: - backbone: R-50-D32 - crop size: (512,1024) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 42.63 - mIoU(ms+flip): 43.71 - Config: configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k_20211008_105455-e8f5a2fd.pth -- Name: fastfcn_r50-d32_jpu_enc_4xb4-80k_ade20k-512x512 - In Collection: FastFCN - Metadata: - backbone: R-50-D32 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 58.04 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 9.67 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 40.88 - mIoU(ms+flip): 42.36 - Config: configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k_20210930_225214-65aef6dd.pth -- Name: fastfcn_r50-d32_jpu_enc_4xb4-160k_ade20k-512x512 - In Collection: FastFCN - Metadata: - backbone: R-50-D32 - crop size: (512,1024) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 42.5 - mIoU(ms+flip): 44.21 - Config: configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k_20211008_105456-d875ce3c.pth diff --git a/configs/fastfcn/metafile.yaml b/configs/fastfcn/metafile.yaml new file mode 100644 index 0000000000..f5fe03ca45 --- /dev/null +++ b/configs/fastfcn/metafile.yaml @@ -0,0 +1,311 @@ +Collections: +- Name: FastFCN + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + Paper: + Title: 'FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation' + URL: https://arxiv.org/abs/1903.11816 + README: configs/fastfcn/README.md + Frameworks: + - PyTorch +Models: +- Name: fastfcn_r50-d32_jpu_aspp_4xb2-80k_cityscapes-512x1024 + In Collection: FastFCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.12 + mIoU(ms+flip): 80.58 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D32 + - FastFCN + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 5.67 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes_20210928_053722-5d1a2648.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes_20210928_053722.log.json + Paper: + Title: 'FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation' + URL: https://arxiv.org/abs/1903.11816 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/jpu.py#L12 + Framework: PyTorch +- Name: fastfcn_r50-d32_jpu_aspp_4xb2-80k_cityscapes-512x1024 + In Collection: FastFCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.52 + mIoU(ms+flip): 80.91 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D32 + - FastFCN + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 9.79 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes_20210924_214357-72220849.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes_20210924_214357.log.json + Paper: + Title: 'FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation' + URL: https://arxiv.org/abs/1903.11816 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/jpu.py#L12 + Framework: PyTorch +- Name: fastfcn_r50-d32_jpu_psp_4xb2-80k_cityscapes-512x1024 + In Collection: FastFCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.26 + mIoU(ms+flip): 80.86 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D32 + - FastFCN + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 5.67 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes_20210928_053722-57749bed.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes_20210928_053722.log.json + Paper: + Title: 'FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation' + URL: https://arxiv.org/abs/1903.11816 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/jpu.py#L12 + Framework: PyTorch +- Name: fastfcn_r50-d32_jpu_psp_4xb2-80k_cityscapes-512x1024 + In Collection: FastFCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.76 + mIoU(ms+flip): 80.03 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D32 + - FastFCN + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.94 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes_20210925_061841-77e87b0a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes_20210925_061841.log.json + Paper: + Title: 'FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation' + URL: https://arxiv.org/abs/1903.11816 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/jpu.py#L12 + Framework: PyTorch +- Name: fastfcn_r50-d32_jpu_enc_4xb2-80k_cityscapes-512x1024 + In Collection: FastFCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.97 + mIoU(ms+flip): 79.92 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D32 + - FastFCN + - EncNet + Training Resources: 4x V100 GPUS + Memory (GB): 8.15 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes_20210928_030036-78da5046.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes_20210928_030036.log.json + Paper: + Title: 'FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation' + URL: https://arxiv.org/abs/1903.11816 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/jpu.py#L12 + Framework: PyTorch +- Name: fastfcn_r50-d32_jpu_enc_4xb2-80k_cityscapes-512x1024 + In Collection: FastFCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.6 + mIoU(ms+flip): 80.25 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D32 + - FastFCN + - EncNet + Training Resources: 4x V100 GPUS + Memory (GB): 15.45 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes_20210926_093217-e1eb6dbb.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes_20210926_093217.log.json + Paper: + Title: 'FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation' + URL: https://arxiv.org/abs/1903.11816 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/jpu.py#L12 + Framework: PyTorch +- Name: fastfcn_r50-d32_jpu_aspp_4xb4-80k_ade20k-512x512 + In Collection: FastFCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.88 + mIoU(ms+flip): 42.91 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D32 + - FastFCN + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 8.46 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k_20211013_190619-3aa40f2d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k_20211013_190619.log.json + Paper: + Title: 'FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation' + URL: https://arxiv.org/abs/1903.11816 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/jpu.py#L12 + Framework: PyTorch +- Name: fastfcn_r50-d32_jpu_aspp_4xb4-160k_ade20k-512x512 + In Collection: FastFCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.58 + mIoU(ms+flip): 44.92 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D32 + - FastFCN + - DeepLabV3 + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k_20211008_152246-27036aee.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k_20211008_152246.log.json + Paper: + Title: 'FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation' + URL: https://arxiv.org/abs/1903.11816 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/jpu.py#L12 + Framework: PyTorch +- Name: fastfcn_r50-d32_jpu_psp_4xb4-80k_ade20k-512x512 + In Collection: FastFCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.4 + mIoU(ms+flip): 42.12 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D32 + - FastFCN + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 8.02 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k_20210930_225137-993d07c8.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k_20210930_225137.log.json + Paper: + Title: 'FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation' + URL: https://arxiv.org/abs/1903.11816 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/jpu.py#L12 + Framework: PyTorch +- Name: fastfcn_r50-d32_jpu_psp_4xb4-160k_ade20k-512x512 + In Collection: FastFCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.63 + mIoU(ms+flip): 43.71 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_psp_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D32 + - FastFCN + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k_20211008_105455-e8f5a2fd.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k_20211008_105455.log.json + Paper: + Title: 'FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation' + URL: https://arxiv.org/abs/1903.11816 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/jpu.py#L12 + Framework: PyTorch +- Name: fastfcn_r50-d32_jpu_enc_4xb4-80k_ade20k-512x512 + In Collection: FastFCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 40.88 + mIoU(ms+flip): 42.36 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D32 + - FastFCN + - EncNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.67 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k_20210930_225214-65aef6dd.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k_20210930_225214.log.json + Paper: + Title: 'FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation' + URL: https://arxiv.org/abs/1903.11816 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/jpu.py#L12 + Framework: PyTorch +- Name: fastfcn_r50-d32_jpu_enc_4xb4-160k_ade20k-512x512 + In Collection: FastFCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.5 + mIoU(ms+flip): 44.21 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_enc_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D32 + - FastFCN + - EncNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k_20211008_105456-d875ce3c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k_20211008_105456.log.json + Paper: + Title: 'FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation' + URL: https://arxiv.org/abs/1903.11816 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/jpu.py#L12 + Framework: PyTorch diff --git a/configs/fastscnn/README.md b/configs/fastscnn/README.md index 3e06903ae5..6be981462a 100644 --- a/configs/fastscnn/README.md +++ b/configs/fastscnn/README.md @@ -1,6 +1,6 @@ # Fast-SCNN -[Fast-SCNN for Semantic Segmentation](https://arxiv.org/abs/1902.04502) +> [Fast-SCNN for Semantic Segmentation](https://arxiv.org/abs/1902.04502) ## Introduction @@ -22,6 +22,14 @@ The encoder-decoder framework is state-of-the-art for offline semantic image seg +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| -------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------- | ---------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FastSCNN | FastSCNN | 512x1024 | 160000 | 3.3 | 56.45 | V100 | 70.96 | 72.65 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fastscnn/fast_scnn_8xb4-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fast_scnn/fast_scnn_lr0.12_8x4_160k_cityscapes/fast_scnn_lr0.12_8x4_160k_cityscapes_20210630_164853-0cec9937.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fast_scnn/fast_scnn_lr0.12_8x4_160k_cityscapes/fast_scnn_lr0.12_8x4_160k_cityscapes_20210630_164853.log.json) | + ## Citation ```bibtex @@ -32,11 +40,3 @@ The encoder-decoder framework is state-of-the-art for offline semantic image seg year={2019} } ``` - -## Results and models - -### Cityscapes - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| -------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| FastSCNN | FastSCNN | 512x1024 | 160000 | 3.3 | 56.45 | 70.96 | 72.65 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fastscnn/fast_scnn_8xb4-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fast_scnn/fast_scnn_lr0.12_8x4_160k_cityscapes/fast_scnn_lr0.12_8x4_160k_cityscapes_20210630_164853-0cec9937.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fast_scnn/fast_scnn_lr0.12_8x4_160k_cityscapes/fast_scnn_lr0.12_8x4_160k_cityscapes_20210630_164853.log.json) | diff --git a/configs/fastscnn/fastscnn.yml b/configs/fastscnn/metafile.yaml similarity index 50% rename from configs/fastscnn/fastscnn.yml rename to configs/fastscnn/metafile.yaml index 13215c2fb1..9e33c902db 100644 --- a/configs/fastscnn/fastscnn.yml +++ b/configs/fastscnn/metafile.yaml @@ -1,35 +1,37 @@ Collections: - Name: FastSCNN + License: Apache License 2.0 Metadata: Training Data: - Cityscapes Paper: - URL: https://arxiv.org/abs/1902.04502 Title: Fast-SCNN for Semantic Segmentation + URL: https://arxiv.org/abs/1902.04502 README: configs/fastscnn/README.md - Code: - URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/fast_scnn.py#L272 - Version: v0.17.0 + Frameworks: + - PyTorch Models: - Name: fast_scnn_8xb4-160k_cityscapes-512x1024 In Collection: FastSCNN - Metadata: - backbone: FastSCNN - crop size: (512,1024) - lr schd: 160000 - inference time (ms/im): - - value: 17.71 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 3.3 Results: - - Task: Semantic Segmentation + Task: Semantic Segmentation Dataset: Cityscapes Metrics: mIoU: 70.96 mIoU(ms+flip): 72.65 Config: configs/fastscnn/fast_scnn_8xb4-160k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 32 + Architecture: + - FastSCNN + - FastSCNN + Training Resources: 8x V100 GPUS + Memory (GB): 3.3 Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fast_scnn/fast_scnn_lr0.12_8x4_160k_cityscapes/fast_scnn_lr0.12_8x4_160k_cityscapes_20210630_164853-0cec9937.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fast_scnn/fast_scnn_lr0.12_8x4_160k_cityscapes/fast_scnn_lr0.12_8x4_160k_cityscapes_20210630_164853.log.json + Paper: + Title: Fast-SCNN for Semantic Segmentation + URL: https://arxiv.org/abs/1902.04502 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/fast_scnn.py#L272 + Framework: PyTorch diff --git a/configs/fcn/README.md b/configs/fcn/README.md index 4b887f19e4..cf7379ff3d 100644 --- a/configs/fcn/README.md +++ b/configs/fcn/README.md @@ -1,6 +1,6 @@ # FCN -[Fully Convolutional Networks for Semantic Segmentation](https://arxiv.org/abs/1411.4038) +> [Fully Convolutional Networks for Semantic Segmentation](https://arxiv.org/abs/1411.4038) ## Introduction @@ -22,90 +22,90 @@ Convolutional networks are powerful visual models that yield hierarchies of feat -## Citation - -```bibtex -@article{shelhamer2017fully, - title={Fully convolutional networks for semantic segmentation}, - author={Shelhamer, Evan and Long, Jonathan and Darrell, Trevor}, - journal={IEEE transactions on pattern analysis and machine intelligence}, - volume={39}, - number={4}, - pages={640--651}, - year={2017}, - publisher={IEEE Trans Pattern Anal Mach Intell} -} -``` - ## Results and models ### Cityscapes -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ---------- | ---------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| FCN | R-50-D8 | 512x1024 | 40000 | 5.7 | 4.17 | 72.25 | 73.36 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_40k_cityscapes/fcn_r50-d8_512x1024_40k_cityscapes_20200604_192608-efe53f0d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_40k_cityscapes/fcn_r50-d8_512x1024_40k_cityscapes_20200604_192608.log.json) | -| FCN | R-101-D8 | 512x1024 | 40000 | 9.2 | 2.66 | 75.45 | 76.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_40k_cityscapes/fcn_r101-d8_512x1024_40k_cityscapes_20200604_181852-a883d3a1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_40k_cityscapes/fcn_r101-d8_512x1024_40k_cityscapes_20200604_181852.log.json) | -| FCN | R-50-D8 | 769x769 | 40000 | 6.5 | 1.80 | 71.47 | 72.54 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_40k_cityscapes/fcn_r50-d8_769x769_40k_cityscapes_20200606_113104-977b5d02.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_40k_cityscapes/fcn_r50-d8_769x769_40k_cityscapes_20200606_113104.log.json) | -| FCN | R-101-D8 | 769x769 | 40000 | 10.4 | 1.19 | 73.93 | 75.14 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_40k_cityscapes/fcn_r101-d8_769x769_40k_cityscapes_20200606_113208-7d4ab69c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_40k_cityscapes/fcn_r101-d8_769x769_40k_cityscapes_20200606_113208.log.json) | -| FCN | R-18-D8 | 512x1024 | 80000 | 1.7 | 14.65 | 71.11 | 72.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn_r18-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_512x1024_80k_cityscapes/fcn_r18-d8_512x1024_80k_cityscapes_20201225_021327-6c50f8b4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_512x1024_80k_cityscapes/fcn_r18-d8_512x1024_80k_cityscapes-20201225_021327.log.json) | -| FCN | R-50-D8 | 512x1024 | 80000 | - | | 73.61 | 74.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_80k_cityscapes/fcn_r50-d8_512x1024_80k_cityscapes_20200606_113019-03aa804d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_80k_cityscapes/fcn_r50-d8_512x1024_80k_cityscapes_20200606_113019.log.json) | -| FCN | R-101-D8 | 512x1024 | 80000 | - | - | 75.13 | 75.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_80k_cityscapes/fcn_r101-d8_512x1024_80k_cityscapes_20200606_113038-3fb937eb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_80k_cityscapes/fcn_r101-d8_512x1024_80k_cityscapes_20200606_113038.log.json) | -| FCN (FP16) | R-101-D8 | 512x1024 | 80000 | 5.37 | 8.64 | 76.80 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_fp16_512x1024_80k_cityscapes/fcn_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230921-fb13e883.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_fp16_512x1024_80k_cityscapes/fcn_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230921.log.json) | -| FCN | R-18-D8 | 769x769 | 80000 | 1.9 | 6.40 | 70.80 | 73.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn_r18-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_769x769_80k_cityscapes/fcn_r18-d8_769x769_80k_cityscapes_20201225_021451-9739d1b8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_769x769_80k_cityscapes/fcn_r18-d8_769x769_80k_cityscapes-20201225_021451.log.json) | -| FCN | R-50-D8 | 769x769 | 80000 | - | - | 72.64 | 73.32 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_80k_cityscapes/fcn_r50-d8_769x769_80k_cityscapes_20200606_195749-f5caeabc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_80k_cityscapes/fcn_r50-d8_769x769_80k_cityscapes_20200606_195749.log.json) | -| FCN | R-101-D8 | 769x769 | 80000 | - | - | 75.52 | 76.61 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_80k_cityscapes/fcn_r101-d8_769x769_80k_cityscapes_20200606_214354-45cbac68.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_80k_cityscapes/fcn_r101-d8_769x769_80k_cityscapes_20200606_214354.log.json) | -| FCN | R-18b-D8 | 512x1024 | 80000 | 1.6 | 16.74 | 70.24 | 72.77 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn_r18b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_512x1024_80k_cityscapes/fcn_r18b-d8_512x1024_80k_cityscapes_20201225_230143-92c0f445.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_512x1024_80k_cityscapes/fcn_r18b-d8_512x1024_80k_cityscapes-20201225_230143.log.json) | -| FCN | R-50b-D8 | 512x1024 | 80000 | 5.6 | 4.20 | 75.65 | 77.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn_r50b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_512x1024_80k_cityscapes/fcn_r50b-d8_512x1024_80k_cityscapes_20201225_094221-82957416.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_512x1024_80k_cityscapes/fcn_r50b-d8_512x1024_80k_cityscapes-20201225_094221.log.json) | -| FCN | R-101b-D8 | 512x1024 | 80000 | 9.1 | 2.73 | 77.37 | 78.77 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn_r101b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_512x1024_80k_cityscapes/fcn_r101b-d8_512x1024_80k_cityscapes_20201226_160213-4543858f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_512x1024_80k_cityscapes/fcn_r101b-d8_512x1024_80k_cityscapes-20201226_160213.log.json) | -| FCN | R-18b-D8 | 769x769 | 80000 | 1.7 | 6.70 | 69.66 | 72.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn_r18b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_769x769_80k_cityscapes/fcn_r18b-d8_769x769_80k_cityscapes_20201226_004430-32d504e5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_769x769_80k_cityscapes/fcn_r18b-d8_769x769_80k_cityscapes-20201226_004430.log.json) | -| FCN | R-50b-D8 | 769x769 | 80000 | 6.3 | 1.82 | 73.83 | 76.60 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn_r50b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_769x769_80k_cityscapes/fcn_r50b-d8_769x769_80k_cityscapes_20201225_094223-94552d38.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_769x769_80k_cityscapes/fcn_r50b-d8_769x769_80k_cityscapes-20201225_094223.log.json) | -| FCN | R-101b-D8 | 769x769 | 80000 | 10.3 | 1.15 | 77.02 | 78.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn_r101b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_769x769_80k_cityscapes/fcn_r101b-d8_769x769_80k_cityscapes_20201226_170012-82be37e2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_769x769_80k_cityscapes/fcn_r101b-d8_769x769_80k_cityscapes-20201226_170012.log.json) | -| FCN (D6) | R-50-D16 | 512x1024 | 40000 | 3.4 | 10.22 | 77.06 | 78.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn-d6_r50-d16_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_40k_cityscapes/fcn_d6_r50-d16_512x1024_40k_cityscapes_20210305_130133-98d5d1bc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_40k_cityscapes/fcn_d6_r50-d16_512x1024_40k_cityscapes-20210305_130133.log.json) | -| FCN (D6) | R-50-D16 | 512x1024 | 80000 | - | 10.35 | 77.27 | 78.88 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn-d6_r50-d16_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_80k_cityscapes/fcn_d6_r50-d16_512x1024_80k_cityscapes_20210306_115604-133c292f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_80k_cityscapes/fcn_d6_r50-d16_512x1024_80k_cityscapes-20210306_115604.log.json) | -| FCN (D6) | R-50-D16 | 769x769 | 40000 | 3.7 | 4.17 | 76.82 | 78.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn-d6_r50-d16_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_40k_cityscapes/fcn_d6_r50-d16_769x769_40k_cityscapes_20210305_185744-1aab18ed.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_40k_cityscapes/fcn_d6_r50-d16_769x769_40k_cityscapes-20210305_185744.log.json) | -| FCN (D6) | R-50-D16 | 769x769 | 80000 | - | 4.15 | 77.04 | 78.40 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn-d6_r50-d16_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_80k_cityscapes/fcn_d6_r50-d16_769x769_80k_cityscapes_20210305_200413-109d88eb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_80k_cityscapes/fcn_d6_r50-d16_769x769_80k_cityscapes-20210305_200413.log.json) | -| FCN (D6) | R-101-D16 | 512x1024 | 40000 | 4.5 | 8.04 | 77.36 | 79.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn-d6_r101-d16_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_40k_cityscapes/fcn_d6_r101-d16_512x1024_40k_cityscapes_20210305_130337-9cf2b450.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_40k_cityscapes/fcn_d6_r101-d16_512x1024_40k_cityscapes-20210305_130337.log.json) | -| FCN (D6) | R-101-D16 | 512x1024 | 80000 | - | 8.26 | 78.46 | 80.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn-d6_r101-d16_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_80k_cityscapes/fcn_d6_r101-d16_512x1024_80k_cityscapes_20210308_102747-cb336445.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_80k_cityscapes/fcn_d6_r101-d16_512x1024_80k_cityscapes-20210308_102747.log.json) | -| FCN (D6) | R-101-D16 | 769x769 | 40000 | 5.0 | 3.12 | 77.28 | 78.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn-d6_r101-d16_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_40k_cityscapes/fcn_d6_r101-d16_769x769_40k_cityscapes_20210308_102453-60b114e9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_40k_cityscapes/fcn_d6_r101-d16_769x769_40k_cityscapes-20210308_102453.log.json) | -| FCN (D6) | R-101-D16 | 769x769 | 80000 | - | 3.21 | 78.06 | 79.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn-d6_r101-d16_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_80k_cityscapes/fcn_d6_r101-d16_769x769_80k_cityscapes_20210306_120016-e33adc4f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_80k_cityscapes/fcn_d6_r101-d16_769x769_80k_cityscapes-20210306_120016.log.json) | -| FCN (D6) | R-50b-D16 | 512x1024 | 80000 | 3.2 | 10.16 | 76.99 | 79.03 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn-d6_r50b-d16_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b-d16_512x1024_80k_cityscapes/fcn_d6_r50b-d16_512x1024_80k_cityscapes_20210311_125550-6a0b62e9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b_d16_512x1024_80k_cityscapes/fcn_d6_r50b_d16_512x1024_80k_cityscapes-20210311_125550.log.json) | -| FCN (D6) | R-50b-D16 | 769x769 | 80000 | 3.6 | 4.17 | 76.86 | 78.52 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn-d6_r50b-d16_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b-d16_769x769_80k_cityscapes/fcn_d6_r50b-d16_769x769_80k_cityscapes_20210311_131012-d665f231.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b_d16_769x769_80k_cityscapes/fcn_d6_r50b_d16_769x769_80k_cityscapes-20210311_131012.log.json) | -| FCN (D6) | R-101b-D16 | 512x1024 | 80000 | 4.3 | 8.46 | 77.72 | 79.53 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn-d6_r101b-d16_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b-d16_512x1024_80k_cityscapes/fcn_d6_r101b-d16_512x1024_80k_cityscapes_20210311_144305-3f2eb5b4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b_d16_512x1024_80k_cityscapes/fcn_d6_r101b_d16_512x1024_80k_cityscapes-20210311_144305.log.json) | -| FCN (D6) | R-101b-D16 | 769x769 | 80000 | 4.8 | 3.32 | 77.34 | 78.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn-d6_r101b-d16_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b-d16_769x769_80k_cityscapes/fcn_d6_r101b-d16_769x769_80k_cityscapes_20210311_154527-c4d8bfbc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b_d16_769x769_80k_cityscapes/fcn_d6_r101b_d16_769x769_80k_cityscapes-20210311_154527.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | ---------- | --------- | ------: | -------- | -------------- | -------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | R-50-D8 | 512x1024 | 40000 | 5.7 | 4.17 | V100 | 72.25 | 73.36 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_40k_cityscapes/fcn_r50-d8_512x1024_40k_cityscapes_20200604_192608-efe53f0d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_40k_cityscapes/fcn_r50-d8_512x1024_40k_cityscapes_20200604_192608.log.json) | +| FCN | R-101-D8 | 512x1024 | 40000 | 9.2 | 2.66 | V100 | 75.45 | 76.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_40k_cityscapes/fcn_r101-d8_512x1024_40k_cityscapes_20200604_181852-a883d3a1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_40k_cityscapes/fcn_r101-d8_512x1024_40k_cityscapes_20200604_181852.log.json) | +| FCN | R-50-D8 | 769x769 | 40000 | 6.5 | 1.80 | V100 | 71.47 | 72.54 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_40k_cityscapes/fcn_r50-d8_769x769_40k_cityscapes_20200606_113104-977b5d02.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_40k_cityscapes/fcn_r50-d8_769x769_40k_cityscapes_20200606_113104.log.json) | +| FCN | R-101-D8 | 769x769 | 40000 | 10.4 | 1.19 | V100 | 73.93 | 75.14 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_40k_cityscapes/fcn_r101-d8_769x769_40k_cityscapes_20200606_113208-7d4ab69c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_40k_cityscapes/fcn_r101-d8_769x769_40k_cityscapes_20200606_113208.log.json) | +| FCN | R-18-D8 | 512x1024 | 80000 | 1.7 | 14.65 | V100 | 71.11 | 72.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r18-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_512x1024_80k_cityscapes/fcn_r18-d8_512x1024_80k_cityscapes_20201225_021327-6c50f8b4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_512x1024_80k_cityscapes/fcn_r18-d8_512x1024_80k_cityscapes-20201225_021327.log.json) | +| FCN | R-50-D8 | 512x1024 | 80000 | - | | V100 | 73.61 | 74.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_80k_cityscapes/fcn_r50-d8_512x1024_80k_cityscapes_20200606_113019-03aa804d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_80k_cityscapes/fcn_r50-d8_512x1024_80k_cityscapes_20200606_113019.log.json) | +| FCN | R-101-D8 | 512x1024 | 80000 | - | - | V100 | 75.13 | 75.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_80k_cityscapes/fcn_r101-d8_512x1024_80k_cityscapes_20200606_113038-3fb937eb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_80k_cityscapes/fcn_r101-d8_512x1024_80k_cityscapes_20200606_113038.log.json) | +| FCN (FP16) | R-101-D8 | 512x1024 | 80000 | 5.37 | 8.64 | V100 | 76.80 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_fp16_512x1024_80k_cityscapes/fcn_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230921-fb13e883.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_fp16_512x1024_80k_cityscapes/fcn_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230921.log.json) | +| FCN | R-18-D8 | 769x769 | 80000 | 1.9 | 6.40 | V100 | 70.80 | 73.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r18-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_769x769_80k_cityscapes/fcn_r18-d8_769x769_80k_cityscapes_20201225_021451-9739d1b8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_769x769_80k_cityscapes/fcn_r18-d8_769x769_80k_cityscapes-20201225_021451.log.json) | +| FCN | R-50-D8 | 769x769 | 80000 | - | - | V100 | 72.64 | 73.32 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_80k_cityscapes/fcn_r50-d8_769x769_80k_cityscapes_20200606_195749-f5caeabc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_80k_cityscapes/fcn_r50-d8_769x769_80k_cityscapes_20200606_195749.log.json) | +| FCN | R-101-D8 | 769x769 | 80000 | - | - | V100 | 75.52 | 76.61 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_80k_cityscapes/fcn_r101-d8_769x769_80k_cityscapes_20200606_214354-45cbac68.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_80k_cityscapes/fcn_r101-d8_769x769_80k_cityscapes_20200606_214354.log.json) | +| FCN | R-18b-D8 | 512x1024 | 80000 | 1.6 | 16.74 | V100 | 70.24 | 72.77 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r18b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_512x1024_80k_cityscapes/fcn_r18b-d8_512x1024_80k_cityscapes_20201225_230143-92c0f445.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_512x1024_80k_cityscapes/fcn_r18b-d8_512x1024_80k_cityscapes-20201225_230143.log.json) | +| FCN | R-50b-D8 | 512x1024 | 80000 | 5.6 | 4.20 | V100 | 75.65 | 77.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r50b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_512x1024_80k_cityscapes/fcn_r50b-d8_512x1024_80k_cityscapes_20201225_094221-82957416.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_512x1024_80k_cityscapes/fcn_r50b-d8_512x1024_80k_cityscapes-20201225_094221.log.json) | +| FCN | R-101b-D8 | 512x1024 | 80000 | 9.1 | 2.73 | V100 | 77.37 | 78.77 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r101b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_512x1024_80k_cityscapes/fcn_r101b-d8_512x1024_80k_cityscapes_20201226_160213-4543858f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_512x1024_80k_cityscapes/fcn_r101b-d8_512x1024_80k_cityscapes-20201226_160213.log.json) | +| FCN | R-18b-D8 | 769x769 | 80000 | 1.7 | 6.70 | V100 | 69.66 | 72.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r18b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_769x769_80k_cityscapes/fcn_r18b-d8_769x769_80k_cityscapes_20201226_004430-32d504e5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_769x769_80k_cityscapes/fcn_r18b-d8_769x769_80k_cityscapes-20201226_004430.log.json) | +| FCN | R-50b-D8 | 769x769 | 80000 | 6.3 | 1.82 | V100 | 73.83 | 76.60 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r50b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_769x769_80k_cityscapes/fcn_r50b-d8_769x769_80k_cityscapes_20201225_094223-94552d38.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_769x769_80k_cityscapes/fcn_r50b-d8_769x769_80k_cityscapes-20201225_094223.log.json) | +| FCN | R-101b-D8 | 769x769 | 80000 | 10.3 | 1.15 | V100 | 77.02 | 78.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r101b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_769x769_80k_cityscapes/fcn_r101b-d8_769x769_80k_cityscapes_20201226_170012-82be37e2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_769x769_80k_cityscapes/fcn_r101b-d8_769x769_80k_cityscapes-20201226_170012.log.json) | +| FCN (D6) | R-50-D16 | 512x1024 | 40000 | 3.4 | 10.22 | TITAN Xp | 77.06 | 78.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn-d6_r50-d16_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_40k_cityscapes/fcn_d6_r50-d16_512x1024_40k_cityscapes_20210305_130133-98d5d1bc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_40k_cityscapes/fcn_d6_r50-d16_512x1024_40k_cityscapes-20210305_130133.log.json) | +| FCN (D6) | R-50-D16 | 512x1024 | 80000 | - | 10.35 | TITAN Xp | 77.27 | 78.88 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn-d6_r50-d16_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_80k_cityscapes/fcn_d6_r50-d16_512x1024_80k_cityscapes_20210306_115604-133c292f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_80k_cityscapes/fcn_d6_r50-d16_512x1024_80k_cityscapes-20210306_115604.log.json) | +| FCN (D6) | R-50-D16 | 769x769 | 40000 | 3.7 | 4.17 | TITAN Xp | 76.82 | 78.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn-d6_r50-d16_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_40k_cityscapes/fcn_d6_r50-d16_769x769_40k_cityscapes_20210305_185744-1aab18ed.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_40k_cityscapes/fcn_d6_r50-d16_769x769_40k_cityscapes-20210305_185744.log.json) | +| FCN (D6) | R-50-D16 | 769x769 | 80000 | - | 4.15 | TITAN Xp | 77.04 | 78.40 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn-d6_r50-d16_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_80k_cityscapes/fcn_d6_r50-d16_769x769_80k_cityscapes_20210305_200413-109d88eb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_80k_cityscapes/fcn_d6_r50-d16_769x769_80k_cityscapes-20210305_200413.log.json) | +| FCN (D6) | R-101-D16 | 512x1024 | 40000 | 4.5 | 8.04 | TITAN Xp | 77.36 | 79.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn-d6_r101-d16_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_40k_cityscapes/fcn_d6_r101-d16_512x1024_40k_cityscapes_20210305_130337-9cf2b450.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_40k_cityscapes/fcn_d6_r101-d16_512x1024_40k_cityscapes-20210305_130337.log.json) | +| FCN (D6) | R-101-D16 | 512x1024 | 80000 | - | 8.26 | TITAN Xp | 78.46 | 80.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn-d6_r101-d16_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_80k_cityscapes/fcn_d6_r101-d16_512x1024_80k_cityscapes_20210308_102747-cb336445.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_80k_cityscapes/fcn_d6_r101-d16_512x1024_80k_cityscapes-20210308_102747.log.json) | +| FCN (D6) | R-101-D16 | 769x769 | 40000 | 5.0 | 3.12 | TITAN Xp | 77.28 | 78.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn-d6_r101-d16_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_40k_cityscapes/fcn_d6_r101-d16_769x769_40k_cityscapes_20210308_102453-60b114e9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_40k_cityscapes/fcn_d6_r101-d16_769x769_40k_cityscapes-20210308_102453.log.json) | +| FCN (D6) | R-101-D16 | 769x769 | 80000 | - | 3.21 | TITAN Xp | 78.06 | 79.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn-d6_r101-d16_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_80k_cityscapes/fcn_d6_r101-d16_769x769_80k_cityscapes_20210306_120016-e33adc4f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_80k_cityscapes/fcn_d6_r101-d16_769x769_80k_cityscapes-20210306_120016.log.json) | +| FCN (D6) | R-50b-D16 | 512x1024 | 80000 | 3.2 | 10.16 | TITAN Xp | 76.99 | 79.03 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn-d6_r50b-d16_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b-d16_512x1024_80k_cityscapes/fcn_d6_r50b-d16_512x1024_80k_cityscapes_20210311_125550-6a0b62e9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b_d16_512x1024_80k_cityscapes/fcn_d6_r50b_d16_512x1024_80k_cityscapes-20210311_125550.log.json) | +| FCN (D6) | R-50b-D16 | 769x769 | 80000 | 3.6 | 4.17 | TITAN Xp | 76.86 | 78.52 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn-d6_r50b-d16_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b-d16_769x769_80k_cityscapes/fcn_d6_r50b-d16_769x769_80k_cityscapes_20210311_131012-d665f231.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b_d16_769x769_80k_cityscapes/fcn_d6_r50b_d16_769x769_80k_cityscapes-20210311_131012.log.json) | +| FCN (D6) | R-101b-D16 | 512x1024 | 80000 | 4.3 | 8.46 | TITAN Xp | 77.72 | 79.53 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn-d6_r101b-d16_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b-d16_512x1024_80k_cityscapes/fcn_d6_r101b-d16_512x1024_80k_cityscapes_20210311_144305-3f2eb5b4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b_d16_512x1024_80k_cityscapes/fcn_d6_r101b_d16_512x1024_80k_cityscapes-20210311_144305.log.json) | +| FCN (D6) | R-101b-D16 | 769x769 | 80000 | 4.8 | 3.32 | TITAN Xp | 77.34 | 78.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn-d6_r101b-d16_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b-d16_769x769_80k_cityscapes/fcn_d6_r101b-d16_769x769_80k_cityscapes_20210311_154527-c4d8bfbc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b_d16_769x769_80k_cityscapes/fcn_d6_r101b_d16_769x769_80k_cityscapes-20210311_154527.log.json) | ### ADE20K -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| FCN | R-50-D8 | 512x512 | 80000 | 8.5 | 23.49 | 35.94 | 37.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_80k_ade20k/fcn_r50-d8_512x512_80k_ade20k_20200614_144016-f8ac5082.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_80k_ade20k/fcn_r50-d8_512x512_80k_ade20k_20200614_144016.log.json) | -| FCN | R-101-D8 | 512x512 | 80000 | 12 | 14.78 | 39.61 | 40.83 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn_r101-d8_4xb4-80k_ade20k-512x512.pyy) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_80k_ade20k/fcn_r101-d8_512x512_80k_ade20k_20200615_014143-bc1809f7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_80k_ade20k/fcn_r101-d8_512x512_80k_ade20k_20200615_014143.log.json) | -| FCN | R-50-D8 | 512x512 | 160000 | - | - | 36.10 | 38.08 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_160k_ade20k/fcn_r50-d8_512x512_160k_ade20k_20200615_100713-4edbc3b4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_160k_ade20k/fcn_r50-d8_512x512_160k_ade20k_20200615_100713.log.json) | -| FCN | R-101-D8 | 512x512 | 160000 | - | - | 39.91 | 41.40 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_160k_ade20k/fcn_r101-d8_512x512_160k_ade20k_20200615_105816-fd192bd5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_160k_ade20k/fcn_r101-d8_512x512_160k_ade20k_20200615_105816.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | R-50-D8 | 512x512 | 80000 | 8.5 | 23.49 | V100 | 35.94 | 37.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_80k_ade20k/fcn_r50-d8_512x512_80k_ade20k_20200614_144016-f8ac5082.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_80k_ade20k/fcn_r50-d8_512x512_80k_ade20k_20200614_144016.log.json) | +| FCN | R-101-D8 | 512x512 | 80000 | 12 | 14.78 | V100 | 39.61 | 40.83 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r101-d8_4xb4-80k_ade20k-512x512.pyy) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_80k_ade20k/fcn_r101-d8_512x512_80k_ade20k_20200615_014143-bc1809f7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_80k_ade20k/fcn_r101-d8_512x512_80k_ade20k_20200615_014143.log.json) | +| FCN | R-50-D8 | 512x512 | 160000 | - | - | V100 | 36.10 | 38.08 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_160k_ade20k/fcn_r50-d8_512x512_160k_ade20k_20200615_100713-4edbc3b4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_160k_ade20k/fcn_r50-d8_512x512_160k_ade20k_20200615_100713.log.json) | +| FCN | R-101-D8 | 512x512 | 160000 | - | - | V100 | 39.91 | 41.40 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_160k_ade20k/fcn_r101-d8_512x512_160k_ade20k_20200615_105816-fd192bd5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_160k_ade20k/fcn_r101-d8_512x512_160k_ade20k_20200615_105816.log.json) | ### Pascal VOC 2012 + Aug -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| FCN | R-50-D8 | 512x512 | 20000 | 5.7 | 23.28 | 67.08 | 69.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn_r50-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_20k_voc12aug/fcn_r50-d8_512x512_20k_voc12aug_20200617_010715-52dc5306.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_20k_voc12aug/fcn_r50-d8_512x512_20k_voc12aug_20200617_010715.log.json) | -| FCN | R-101-D8 | 512x512 | 20000 | 9.2 | 14.81 | 71.16 | 73.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn_r101-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_20k_voc12aug/fcn_r101-d8_512x512_20k_voc12aug_20200617_010842-0bb4e798.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_20k_voc12aug/fcn_r101-d8_512x512_20k_voc12aug_20200617_010842.log.json) | -| FCN | R-50-D8 | 512x512 | 40000 | - | - | 66.97 | 69.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn_r50-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_40k_voc12aug/fcn_r50-d8_512x512_40k_voc12aug_20200613_161222-5e2dbf40.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_40k_voc12aug/fcn_r50-d8_512x512_40k_voc12aug_20200613_161222.log.json) | -| FCN | R-101-D8 | 512x512 | 40000 | - | - | 69.91 | 72.38 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn_r101-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_40k_voc12aug/fcn_r101-d8_512x512_40k_voc12aug_20200613_161240-4c8bcefd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_40k_voc12aug/fcn_r101-d8_512x512_40k_voc12aug_20200613_161240.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | R-50-D8 | 512x512 | 20000 | 5.7 | 23.28 | V100 | 67.08 | 69.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r50-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_20k_voc12aug/fcn_r50-d8_512x512_20k_voc12aug_20200617_010715-52dc5306.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_20k_voc12aug/fcn_r50-d8_512x512_20k_voc12aug_20200617_010715.log.json) | +| FCN | R-101-D8 | 512x512 | 20000 | 9.2 | 14.81 | V100 | 71.16 | 73.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r101-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_20k_voc12aug/fcn_r101-d8_512x512_20k_voc12aug_20200617_010842-0bb4e798.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_20k_voc12aug/fcn_r101-d8_512x512_20k_voc12aug_20200617_010842.log.json) | +| FCN | R-50-D8 | 512x512 | 40000 | - | - | V100 | 66.97 | 69.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r50-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_40k_voc12aug/fcn_r50-d8_512x512_40k_voc12aug_20200613_161222-5e2dbf40.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_40k_voc12aug/fcn_r50-d8_512x512_40k_voc12aug_20200613_161222.log.json) | +| FCN | R-101-D8 | 512x512 | 40000 | - | - | V100 | 69.91 | 72.38 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r101-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_40k_voc12aug/fcn_r101-d8_512x512_40k_voc12aug_20200613_161240-4c8bcefd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_40k_voc12aug/fcn_r101-d8_512x512_40k_voc12aug_20200613_161240.log.json) | ### Pascal Context -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| FCN | R-101-D8 | 480x480 | 40000 | - | 9.93 | 44.43 | 45.63 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn_r101-d8_4xb4-40k_pascal-context-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context/fcn_r101-d8_480x480_40k_pascal_context_20210421_154757-b5e97937.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context/fcn_r101-d8_480x480_40k_pascal_context-20210421_154757.log.json) | -| FCN | R-101-D8 | 480x480 | 80000 | - | - | 44.13 | 45.26 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn_r101-d8_4xb4-80k_pascal-context-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context/fcn_r101-d8_480x480_80k_pascal_context_20210421_163310-4711813f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context/fcn_r101-d8_480x480_80k_pascal_context-20210421_163310.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | R-101-D8 | 480x480 | 40000 | - | 9.93 | V100 | 44.43 | 45.63 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r101-d8_4xb4-40k_pascal-context-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context/fcn_r101-d8_480x480_40k_pascal_context_20210421_154757-b5e97937.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context/fcn_r101-d8_480x480_40k_pascal_context-20210421_154757.log.json) | +| FCN | R-101-D8 | 480x480 | 80000 | - | - | V100 | 44.13 | 45.26 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r101-d8_4xb4-80k_pascal-context-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context/fcn_r101-d8_480x480_80k_pascal_context_20210421_163310-4711813f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context/fcn_r101-d8_480x480_80k_pascal_context-20210421_163310.log.json) | ### Pascal Context 59 -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| FCN | R-101-D8 | 480x480 | 40000 | - | - | 48.42 | 50.4 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn_r101-d8_4xb4-40k_pascal-context-59-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context_59/fcn_r101-d8_480x480_40k_pascal_context_59_20210415_230724-8cf83682.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context_59/fcn_r101-d8_480x480_40k_pascal_context_59-20210415_230724.log.json) | -| FCN | R-101-D8 | 480x480 | 80000 | - | - | 49.35 | 51.38 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn_r101-d8_4xb4-80k_pascal-context-59-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context_59/fcn_r101-d8_480x480_80k_pascal_context_59_20210416_110804-9a6f2c94.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context_59/fcn_r101-d8_480x480_80k_pascal_context_59-20210416_110804.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | R-101-D8 | 480x480 | 40000 | - | - | V100 | 48.42 | 50.4 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r101-d8_4xb4-40k_pascal-context-59-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context_59/fcn_r101-d8_480x480_40k_pascal_context_59_20210415_230724-8cf83682.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context_59/fcn_r101-d8_480x480_40k_pascal_context_59-20210415_230724.log.json) | +| FCN | R-101-D8 | 480x480 | 80000 | - | - | V100 | 49.35 | 51.38 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn_r101-d8_4xb4-80k_pascal-context-59-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context_59/fcn_r101-d8_480x480_80k_pascal_context_59_20210416_110804-9a6f2c94.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context_59/fcn_r101-d8_480x480_80k_pascal_context_59-20210416_110804.log.json) | Note: - `FP16` means Mixed Precision (FP16) is adopted in training. - `FCN D6` means dilation rate of convolution operator in FCN is 6. + +## Citation + +```bibtex +@article{shelhamer2017fully, + title={Fully convolutional networks for semantic segmentation}, + author={Shelhamer, Evan and Long, Jonathan and Darrell, Trevor}, + journal={IEEE transactions on pattern analysis and machine intelligence}, + volume={39}, + number={4}, + pages={640--651}, + year={2017}, + publisher={IEEE Trans Pattern Anal Mach Intell} +} +``` diff --git a/configs/fcn/fcn.yml b/configs/fcn/fcn.yml deleted file mode 100644 index 71c4b2d122..0000000000 --- a/configs/fcn/fcn.yml +++ /dev/null @@ -1,827 +0,0 @@ -Collections: -- Name: FCN - Metadata: - Training Data: - - Cityscapes - - ADE20K - - Pascal VOC 2012 + Aug - - Pascal Context - - Pascal Context 59 - Paper: - URL: https://arxiv.org/abs/1411.4038 - Title: Fully Convolutional Networks for Semantic Segmentation - README: configs/fcn/README.md - Code: - URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 - Version: v0.17.0 - Converted From: - Code: https://github.com/BVLC/caffe/wiki/Model-Zoo#fcn -Models: -- Name: fcn_r50-d8_4xb2-40k_cityscapes-512x1024 - In Collection: FCN - Metadata: - backbone: R-50-D8 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 239.81 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 5.7 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 72.25 - mIoU(ms+flip): 73.36 - Config: configs/fcn/fcn_r50-d8_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_40k_cityscapes/fcn_r50-d8_512x1024_40k_cityscapes_20200604_192608-efe53f0d.pth -- Name: fcn_r101-d8_4xb2-40k_cityscapes-512x1024 - In Collection: FCN - Metadata: - backbone: R-101-D8 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 375.94 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 9.2 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 75.45 - mIoU(ms+flip): 76.58 - Config: configs/fcn/fcn_r101-d8_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_40k_cityscapes/fcn_r101-d8_512x1024_40k_cityscapes_20200604_181852-a883d3a1.pth -- Name: fcn_r50-d8_4xb2-40k_cityscapes-769x769 - In Collection: FCN - Metadata: - backbone: R-50-D8 - crop size: (769,769) - lr schd: 40000 - inference time (ms/im): - - value: 555.56 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 6.5 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 71.47 - mIoU(ms+flip): 72.54 - Config: configs/fcn/fcn_r50-d8_4xb2-40k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_40k_cityscapes/fcn_r50-d8_769x769_40k_cityscapes_20200606_113104-977b5d02.pth -- Name: fcn_r101-d8_4xb2-40k_cityscapes-769x769 - In Collection: FCN - Metadata: - backbone: R-101-D8 - crop size: (769,769) - lr schd: 40000 - inference time (ms/im): - - value: 840.34 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 10.4 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 73.93 - mIoU(ms+flip): 75.14 - Config: configs/fcn/fcn_r101-d8_4xb2-40k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_40k_cityscapes/fcn_r101-d8_769x769_40k_cityscapes_20200606_113208-7d4ab69c.pth -- Name: fcn_r18-d8_4xb2-80k_cityscapes-512x1024 - In Collection: FCN - Metadata: - backbone: R-18-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 68.26 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 1.7 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 71.11 - mIoU(ms+flip): 72.91 - Config: configs/fcn/fcn_r18-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_512x1024_80k_cityscapes/fcn_r18-d8_512x1024_80k_cityscapes_20201225_021327-6c50f8b4.pth -- Name: fcn_r50-d8_4xb2-80k_cityscapes-512x1024 - In Collection: FCN - Metadata: - backbone: R-50-D8 - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 73.61 - mIoU(ms+flip): 74.24 - Config: configs/fcn/fcn_r50-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_80k_cityscapes/fcn_r50-d8_512x1024_80k_cityscapes_20200606_113019-03aa804d.pth -- Name: fcn_r101-d8_4xb2-80k_cityscapes-512x1024 - In Collection: FCN - Metadata: - backbone: R-101-D8 - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 75.13 - mIoU(ms+flip): 75.94 - Config: configs/fcn/fcn_r101-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_80k_cityscapes/fcn_r101-d8_512x1024_80k_cityscapes_20200606_113038-3fb937eb.pth -- Name: fcn_r101-d8_4xb2-amp-80k_cityscapes-512x1024 - In Collection: FCN - Metadata: - backbone: R-101-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 115.74 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: AMP - resolution: (512,1024) - Training Memory (GB): 5.37 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 76.8 - Config: configs/fcn/fcn_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_fp16_512x1024_80k_cityscapes/fcn_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230921-fb13e883.pth -- Name: fcn_r18-d8_4xb2-80k_cityscapes-769x769 - In Collection: FCN - Metadata: - backbone: R-18-D8 - crop size: (769,769) - lr schd: 80000 - inference time (ms/im): - - value: 156.25 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 1.9 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 70.8 - mIoU(ms+flip): 73.16 - Config: configs/fcn/fcn_r18-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_769x769_80k_cityscapes/fcn_r18-d8_769x769_80k_cityscapes_20201225_021451-9739d1b8.pth -- Name: fcn_r50-d8_4xb2-80k_cityscapes-769x769 - In Collection: FCN - Metadata: - backbone: R-50-D8 - crop size: (769,769) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 72.64 - mIoU(ms+flip): 73.32 - Config: configs/fcn/fcn_r50-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_80k_cityscapes/fcn_r50-d8_769x769_80k_cityscapes_20200606_195749-f5caeabc.pth -- Name: fcn_r101-d8_4xb2-80k_cityscapes-769x769 - In Collection: FCN - Metadata: - backbone: R-101-D8 - crop size: (769,769) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 75.52 - mIoU(ms+flip): 76.61 - Config: configs/fcn/fcn_r101-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_80k_cityscapes/fcn_r101-d8_769x769_80k_cityscapes_20200606_214354-45cbac68.pth -- Name: fcn_r18b-d8_4xb2-80k_cityscapes-512x1024 - In Collection: FCN - Metadata: - backbone: R-18b-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 59.74 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 1.6 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 70.24 - mIoU(ms+flip): 72.77 - Config: configs/fcn/fcn_r18b-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_512x1024_80k_cityscapes/fcn_r18b-d8_512x1024_80k_cityscapes_20201225_230143-92c0f445.pth -- Name: fcn_r50b-d8_4xb2-80k_cityscapes-512x1024 - In Collection: FCN - Metadata: - backbone: R-50b-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 238.1 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 5.6 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 75.65 - mIoU(ms+flip): 77.59 - Config: configs/fcn/fcn_r50b-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_512x1024_80k_cityscapes/fcn_r50b-d8_512x1024_80k_cityscapes_20201225_094221-82957416.pth -- Name: fcn_r101b-d8_4xb2-80k_cityscapes-512x1024 - In Collection: FCN - Metadata: - backbone: R-101b-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 366.3 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 9.1 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 77.37 - mIoU(ms+flip): 78.77 - Config: configs/fcn/fcn_r101b-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_512x1024_80k_cityscapes/fcn_r101b-d8_512x1024_80k_cityscapes_20201226_160213-4543858f.pth -- Name: fcn_r18b-d8_4xb2-80k_cityscapes-769x769 - In Collection: FCN - Metadata: - backbone: R-18b-D8 - crop size: (769,769) - lr schd: 80000 - inference time (ms/im): - - value: 149.25 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 1.7 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 69.66 - mIoU(ms+flip): 72.07 - Config: configs/fcn/fcn_r18b-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_769x769_80k_cityscapes/fcn_r18b-d8_769x769_80k_cityscapes_20201226_004430-32d504e5.pth -- Name: fcn_r50b-d8_4xb2-80k_cityscapes-769x769 - In Collection: FCN - Metadata: - backbone: R-50b-D8 - crop size: (769,769) - lr schd: 80000 - inference time (ms/im): - - value: 549.45 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 6.3 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 73.83 - mIoU(ms+flip): 76.6 - Config: configs/fcn/fcn_r50b-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_769x769_80k_cityscapes/fcn_r50b-d8_769x769_80k_cityscapes_20201225_094223-94552d38.pth -- Name: fcn_r101b-d8_4xb2-80k_cityscapes-769x769 - In Collection: FCN - Metadata: - backbone: R-101b-D8 - crop size: (769,769) - lr schd: 80000 - inference time (ms/im): - - value: 869.57 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 10.3 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 77.02 - mIoU(ms+flip): 78.67 - Config: configs/fcn/fcn_r101b-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_769x769_80k_cityscapes/fcn_r101b-d8_769x769_80k_cityscapes_20201226_170012-82be37e2.pth -- Name: fcn-d6_r50-d16_4xb2-40k_cityscapes-512x1024 - In Collection: FCN - Metadata: - backbone: R-50-D16 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 97.85 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 3.4 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 77.06 - mIoU(ms+flip): 78.85 - Config: configs/fcn/fcn-d6_r50-d16_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_40k_cityscapes/fcn_d6_r50-d16_512x1024_40k_cityscapes_20210305_130133-98d5d1bc.pth -- Name: fcn-d6_r50-d16_4xb2-80k_cityscapes-512x1024 - In Collection: FCN - Metadata: - backbone: R-50-D16 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 96.62 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 77.27 - mIoU(ms+flip): 78.88 - Config: configs/fcn/fcn-d6_r50-d16_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_80k_cityscapes/fcn_d6_r50-d16_512x1024_80k_cityscapes_20210306_115604-133c292f.pth -- Name: fcn-d6_r50-d16_4xb2-40k_cityscapes-769x769 - In Collection: FCN - Metadata: - backbone: R-50-D16 - crop size: (769,769) - lr schd: 40000 - inference time (ms/im): - - value: 239.81 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 3.7 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 76.82 - mIoU(ms+flip): 78.22 - Config: configs/fcn/fcn-d6_r50-d16_4xb2-40k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_40k_cityscapes/fcn_d6_r50-d16_769x769_40k_cityscapes_20210305_185744-1aab18ed.pth -- Name: fcn-d6_r50-d16_4xb2-80k_cityscapes-769x769 - In Collection: FCN - Metadata: - backbone: R-50-D16 - crop size: (769,769) - lr schd: 80000 - inference time (ms/im): - - value: 240.96 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 77.04 - mIoU(ms+flip): 78.4 - Config: configs/fcn/fcn-d6_r50-d16_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_80k_cityscapes/fcn_d6_r50-d16_769x769_80k_cityscapes_20210305_200413-109d88eb.pth -- Name: fcn-d6_r101-d16_4xb2-40k_cityscapes-512x1024 - In Collection: FCN - Metadata: - backbone: R-101-D16 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 124.38 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 4.5 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 77.36 - mIoU(ms+flip): 79.18 - Config: configs/fcn/fcn-d6_r101-d16_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_40k_cityscapes/fcn_d6_r101-d16_512x1024_40k_cityscapes_20210305_130337-9cf2b450.pth -- Name: fcn-d6_r101-d16_4xb2-80k_cityscapes-512x1024 - In Collection: FCN - Metadata: - backbone: R-101-D16 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 121.07 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.46 - mIoU(ms+flip): 80.42 - Config: configs/fcn/fcn-d6_r101-d16_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_80k_cityscapes/fcn_d6_r101-d16_512x1024_80k_cityscapes_20210308_102747-cb336445.pth -- Name: fcn-d6_r101-d16_4xb2-40k_cityscapes-769x769 - In Collection: FCN - Metadata: - backbone: R-101-D16 - crop size: (769,769) - lr schd: 40000 - inference time (ms/im): - - value: 320.51 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 5.0 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 77.28 - mIoU(ms+flip): 78.95 - Config: configs/fcn/fcn-d6_r101-d16_4xb2-40k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_40k_cityscapes/fcn_d6_r101-d16_769x769_40k_cityscapes_20210308_102453-60b114e9.pth -- Name: fcn-d6_r101-d16_4xb2-80k_cityscapes-769x769 - In Collection: FCN - Metadata: - backbone: R-101-D16 - crop size: (769,769) - lr schd: 80000 - inference time (ms/im): - - value: 311.53 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.06 - mIoU(ms+flip): 79.58 - Config: configs/fcn/fcn-d6_r101-d16_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_80k_cityscapes/fcn_d6_r101-d16_769x769_80k_cityscapes_20210306_120016-e33adc4f.pth -- Name: fcn-d6_r50b-d16_4xb2-80k_cityscapes-512x1024 - In Collection: FCN - Metadata: - backbone: R-50b-D16 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 98.43 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 3.2 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 76.99 - mIoU(ms+flip): 79.03 - Config: configs/fcn/fcn-d6_r50b-d16_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b-d16_512x1024_80k_cityscapes/fcn_d6_r50b-d16_512x1024_80k_cityscapes_20210311_125550-6a0b62e9.pth -- Name: fcn-d6_r50b-d16_4xb2-80k_cityscapes-769x769 - In Collection: FCN - Metadata: - backbone: R-50b-D16 - crop size: (769,769) - lr schd: 80000 - inference time (ms/im): - - value: 239.81 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 3.6 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 76.86 - mIoU(ms+flip): 78.52 - Config: configs/fcn/fcn-d6_r50b-d16_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b-d16_769x769_80k_cityscapes/fcn_d6_r50b-d16_769x769_80k_cityscapes_20210311_131012-d665f231.pth -- Name: fcn-d6_r101b-d16_4xb2-80k_cityscapes-512x1024 - In Collection: FCN - Metadata: - backbone: R-101b-D16 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 118.2 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 4.3 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 77.72 - mIoU(ms+flip): 79.53 - Config: configs/fcn/fcn-d6_r101b-d16_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b-d16_512x1024_80k_cityscapes/fcn_d6_r101b-d16_512x1024_80k_cityscapes_20210311_144305-3f2eb5b4.pth -- Name: fcn-d6_r101b-d16_4xb2-80k_cityscapes-769x769 - In Collection: FCN - Metadata: - backbone: R-101b-D16 - crop size: (769,769) - lr schd: 80000 - inference time (ms/im): - - value: 301.2 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 4.8 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 77.34 - mIoU(ms+flip): 78.91 - Config: configs/fcn/fcn-d6_r101b-d16_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b-d16_769x769_80k_cityscapes/fcn_d6_r101b-d16_769x769_80k_cityscapes_20210311_154527-c4d8bfbc.pth -- Name: fcn_r50-d8_4xb4-80k_ade20k-512x512 - In Collection: FCN - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 42.57 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 8.5 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 35.94 - mIoU(ms+flip): 37.94 - Config: configs/fcn/fcn_r50-d8_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_80k_ade20k/fcn_r50-d8_512x512_80k_ade20k_20200614_144016-f8ac5082.pth -- Name: fcn_r101-d8_4xb4-80k_ade20k-512x512 - In Collection: FCN - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 67.66 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 12.0 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 39.61 - mIoU(ms+flip): 40.83 - Config: configs/fcn/fcn_r101-d8_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_80k_ade20k/fcn_r101-d8_512x512_80k_ade20k_20200615_014143-bc1809f7.pth -- Name: fcn_r50-d8_4xb4-160k_ade20k-512x512 - In Collection: FCN - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 36.1 - mIoU(ms+flip): 38.08 - Config: configs/fcn/fcn_r50-d8_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_160k_ade20k/fcn_r50-d8_512x512_160k_ade20k_20200615_100713-4edbc3b4.pth -- Name: fcn_r101-d8_4xb4-160k_ade20k-512x512 - In Collection: FCN - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 39.91 - mIoU(ms+flip): 41.4 - Config: configs/fcn/fcn_r101-d8_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_160k_ade20k/fcn_r101-d8_512x512_160k_ade20k_20200615_105816-fd192bd5.pth -- Name: fcn_r50-d8_4xb4-20k_voc12aug-512x512 - In Collection: FCN - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 20000 - inference time (ms/im): - - value: 42.96 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 5.7 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 67.08 - mIoU(ms+flip): 69.94 - Config: configs/fcn/fcn_r50-d8_4xb4-20k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_20k_voc12aug/fcn_r50-d8_512x512_20k_voc12aug_20200617_010715-52dc5306.pth -- Name: fcn_r101-d8_4xb4-20k_voc12aug-512x512 - In Collection: FCN - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 20000 - inference time (ms/im): - - value: 67.52 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 9.2 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 71.16 - mIoU(ms+flip): 73.57 - Config: configs/fcn/fcn_r101-d8_4xb4-20k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_20k_voc12aug/fcn_r101-d8_512x512_20k_voc12aug_20200617_010842-0bb4e798.pth -- Name: fcn_r50-d8_4xb4-40k_voc12aug-512x512 - In Collection: FCN - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 66.97 - mIoU(ms+flip): 69.04 - Config: configs/fcn/fcn_r50-d8_4xb4-40k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_40k_voc12aug/fcn_r50-d8_512x512_40k_voc12aug_20200613_161222-5e2dbf40.pth -- Name: fcn_r101-d8_4xb4-40k_voc12aug-512x512 - In Collection: FCN - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 69.91 - mIoU(ms+flip): 72.38 - Config: configs/fcn/fcn_r101-d8_4xb4-40k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_40k_voc12aug/fcn_r101-d8_512x512_40k_voc12aug_20200613_161240-4c8bcefd.pth -- Name: fcn_r101-d8_4xb4-40k_pascal-context-480x480 - In Collection: FCN - Metadata: - backbone: R-101-D8 - crop size: (480,480) - lr schd: 40000 - inference time (ms/im): - - value: 100.7 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (480,480) - Results: - - Task: Semantic Segmentation - Dataset: Pascal Context - Metrics: - mIoU: 44.43 - mIoU(ms+flip): 45.63 - Config: configs/fcn/fcn_r101-d8_4xb4-40k_pascal-context-480x480.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context/fcn_r101-d8_480x480_40k_pascal_context_20210421_154757-b5e97937.pth -- Name: fcn_r101-d8_4xb4-80k_pascal-context-480x480 - In Collection: FCN - Metadata: - backbone: R-101-D8 - crop size: (480,480) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal Context - Metrics: - mIoU: 44.13 - mIoU(ms+flip): 45.26 - Config: configs/fcn/fcn_r101-d8_4xb4-80k_pascal-context-480x480.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context/fcn_r101-d8_480x480_80k_pascal_context_20210421_163310-4711813f.pth -- Name: fcn_r101-d8_4xb4-40k_pascal-context-59-480x480 - In Collection: FCN - Metadata: - backbone: R-101-D8 - crop size: (480,480) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal Context 59 - Metrics: - mIoU: 48.42 - mIoU(ms+flip): 50.4 - Config: configs/fcn/fcn_r101-d8_4xb4-40k_pascal-context-59-480x480.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context_59/fcn_r101-d8_480x480_40k_pascal_context_59_20210415_230724-8cf83682.pth -- Name: fcn_r101-d8_4xb4-80k_pascal-context-59-480x480 - In Collection: FCN - Metadata: - backbone: R-101-D8 - crop size: (480,480) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal Context 59 - Metrics: - mIoU: 49.35 - mIoU(ms+flip): 51.38 - Config: configs/fcn/fcn_r101-d8_4xb4-80k_pascal-context-59-480x480.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context_59/fcn_r101-d8_480x480_80k_pascal_context_59_20210416_110804-9a6f2c94.pth diff --git a/configs/fcn/metafile.yaml b/configs/fcn/metafile.yaml new file mode 100644 index 0000000000..f3d80f652e --- /dev/null +++ b/configs/fcn/metafile.yaml @@ -0,0 +1,997 @@ +Collections: +- Name: FCN + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + - Pascal Context + - Pascal Context 59 + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + README: configs/fcn/README.md + Frameworks: + - PyTorch +Models: +- Name: fcn_r50-d8_4xb2-40k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 72.25 + mIoU(ms+flip): 73.36 + Config: configs/fcn/fcn_r50-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 5.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_40k_cityscapes/fcn_r50-d8_512x1024_40k_cityscapes_20200604_192608-efe53f0d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_40k_cityscapes/fcn_r50-d8_512x1024_40k_cityscapes_20200604_192608.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r101-d8_4xb2-40k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.45 + mIoU(ms+flip): 76.58 + Config: configs/fcn/fcn_r101-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 9.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_40k_cityscapes/fcn_r101-d8_512x1024_40k_cityscapes_20200604_181852-a883d3a1.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_40k_cityscapes/fcn_r101-d8_512x1024_40k_cityscapes_20200604_181852.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r50-d8_4xb2-40k_cityscapes-769x769 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 71.47 + mIoU(ms+flip): 72.54 + Config: configs/fcn/fcn_r50-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 6.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_40k_cityscapes/fcn_r50-d8_769x769_40k_cityscapes_20200606_113104-977b5d02.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_40k_cityscapes/fcn_r50-d8_769x769_40k_cityscapes_20200606_113104.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r101-d8_4xb2-40k_cityscapes-769x769 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.93 + mIoU(ms+flip): 75.14 + Config: configs/fcn/fcn_r101-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 10.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_40k_cityscapes/fcn_r101-d8_769x769_40k_cityscapes_20200606_113208-7d4ab69c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_40k_cityscapes/fcn_r101-d8_769x769_40k_cityscapes_20200606_113208.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r18-d8_4xb2-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 71.11 + mIoU(ms+flip): 72.91 + Config: configs/fcn/fcn_r18-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 1.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_512x1024_80k_cityscapes/fcn_r18-d8_512x1024_80k_cityscapes_20201225_021327-6c50f8b4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_512x1024_80k_cityscapes/fcn_r18-d8_512x1024_80k_cityscapes-20201225_021327.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r50-d8_4xb2-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.61 + mIoU(ms+flip): 74.24 + Config: configs/fcn/fcn_r50-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_80k_cityscapes/fcn_r50-d8_512x1024_80k_cityscapes_20200606_113019-03aa804d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_80k_cityscapes/fcn_r50-d8_512x1024_80k_cityscapes_20200606_113019.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r101-d8_4xb2-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.13 + mIoU(ms+flip): 75.94 + Config: configs/fcn/fcn_r101-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_80k_cityscapes/fcn_r101-d8_512x1024_80k_cityscapes_20200606_113038-3fb937eb.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_80k_cityscapes/fcn_r101-d8_512x1024_80k_cityscapes_20200606_113038.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r101-d8_4xb2-amp-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.8 + Config: configs/fcn/fcn_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - FCN + - (FP16) + Training Resources: 4x V100 GPUS + Memory (GB): 5.37 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_fp16_512x1024_80k_cityscapes/fcn_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230921-fb13e883.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_fp16_512x1024_80k_cityscapes/fcn_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230921.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r18-d8_4xb2-80k_cityscapes-769x769 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 70.8 + mIoU(ms+flip): 73.16 + Config: configs/fcn/fcn_r18-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 1.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_769x769_80k_cityscapes/fcn_r18-d8_769x769_80k_cityscapes_20201225_021451-9739d1b8.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_769x769_80k_cityscapes/fcn_r18-d8_769x769_80k_cityscapes-20201225_021451.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r50-d8_4xb2-80k_cityscapes-769x769 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 72.64 + mIoU(ms+flip): 73.32 + Config: configs/fcn/fcn_r50-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_80k_cityscapes/fcn_r50-d8_769x769_80k_cityscapes_20200606_195749-f5caeabc.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_80k_cityscapes/fcn_r50-d8_769x769_80k_cityscapes_20200606_195749.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r101-d8_4xb2-80k_cityscapes-769x769 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.52 + mIoU(ms+flip): 76.61 + Config: configs/fcn/fcn_r101-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_80k_cityscapes/fcn_r101-d8_769x769_80k_cityscapes_20200606_214354-45cbac68.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_80k_cityscapes/fcn_r101-d8_769x769_80k_cityscapes_20200606_214354.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r18b-d8_4xb2-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 70.24 + mIoU(ms+flip): 72.77 + Config: configs/fcn/fcn_r18b-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18b-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 1.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_512x1024_80k_cityscapes/fcn_r18b-d8_512x1024_80k_cityscapes_20201225_230143-92c0f445.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_512x1024_80k_cityscapes/fcn_r18b-d8_512x1024_80k_cityscapes-20201225_230143.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r50b-d8_4xb2-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.65 + mIoU(ms+flip): 77.59 + Config: configs/fcn/fcn_r50b-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50b-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 5.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_512x1024_80k_cityscapes/fcn_r50b-d8_512x1024_80k_cityscapes_20201225_094221-82957416.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_512x1024_80k_cityscapes/fcn_r50b-d8_512x1024_80k_cityscapes-20201225_094221.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r101b-d8_4xb2-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.37 + mIoU(ms+flip): 78.77 + Config: configs/fcn/fcn_r101b-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101b-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 9.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_512x1024_80k_cityscapes/fcn_r101b-d8_512x1024_80k_cityscapes_20201226_160213-4543858f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_512x1024_80k_cityscapes/fcn_r101b-d8_512x1024_80k_cityscapes-20201226_160213.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r18b-d8_4xb2-80k_cityscapes-769x769 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 69.66 + mIoU(ms+flip): 72.07 + Config: configs/fcn/fcn_r18b-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18b-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 1.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_769x769_80k_cityscapes/fcn_r18b-d8_769x769_80k_cityscapes_20201226_004430-32d504e5.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_769x769_80k_cityscapes/fcn_r18b-d8_769x769_80k_cityscapes-20201226_004430.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r50b-d8_4xb2-80k_cityscapes-769x769 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.83 + mIoU(ms+flip): 76.6 + Config: configs/fcn/fcn_r50b-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50b-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 6.3 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_769x769_80k_cityscapes/fcn_r50b-d8_769x769_80k_cityscapes_20201225_094223-94552d38.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_769x769_80k_cityscapes/fcn_r50b-d8_769x769_80k_cityscapes-20201225_094223.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r101b-d8_4xb2-80k_cityscapes-769x769 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.02 + mIoU(ms+flip): 78.67 + Config: configs/fcn/fcn_r101b-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101b-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 10.3 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_769x769_80k_cityscapes/fcn_r101b-d8_769x769_80k_cityscapes_20201226_170012-82be37e2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_769x769_80k_cityscapes/fcn_r101b-d8_769x769_80k_cityscapes-20201226_170012.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn-d6_r50-d16_4xb2-40k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.06 + mIoU(ms+flip): 78.85 + Config: configs/fcn/fcn-d6_r50-d16_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D16 + - FCN + - (D6) + Training Resources: 4x TITAN Xp GPUS + Memory (GB): 3.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_40k_cityscapes/fcn_d6_r50-d16_512x1024_40k_cityscapes_20210305_130133-98d5d1bc.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_40k_cityscapes/fcn_d6_r50-d16_512x1024_40k_cityscapes-20210305_130133.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn-d6_r50-d16_4xb2-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.27 + mIoU(ms+flip): 78.88 + Config: configs/fcn/fcn-d6_r50-d16_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D16 + - FCN + - (D6) + Training Resources: 4x TITAN Xp GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_80k_cityscapes/fcn_d6_r50-d16_512x1024_80k_cityscapes_20210306_115604-133c292f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_80k_cityscapes/fcn_d6_r50-d16_512x1024_80k_cityscapes-20210306_115604.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn-d6_r50-d16_4xb2-40k_cityscapes-769x769 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.82 + mIoU(ms+flip): 78.22 + Config: configs/fcn/fcn-d6_r50-d16_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D16 + - FCN + - (D6) + Training Resources: 4x TITAN Xp GPUS + Memory (GB): 3.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_40k_cityscapes/fcn_d6_r50-d16_769x769_40k_cityscapes_20210305_185744-1aab18ed.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_40k_cityscapes/fcn_d6_r50-d16_769x769_40k_cityscapes-20210305_185744.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn-d6_r50-d16_4xb2-80k_cityscapes-769x769 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.04 + mIoU(ms+flip): 78.4 + Config: configs/fcn/fcn-d6_r50-d16_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D16 + - FCN + - (D6) + Training Resources: 4x TITAN Xp GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_80k_cityscapes/fcn_d6_r50-d16_769x769_80k_cityscapes_20210305_200413-109d88eb.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_80k_cityscapes/fcn_d6_r50-d16_769x769_80k_cityscapes-20210305_200413.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn-d6_r101-d16_4xb2-40k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.36 + mIoU(ms+flip): 79.18 + Config: configs/fcn/fcn-d6_r101-d16_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D16 + - FCN + - (D6) + Training Resources: 4x TITAN Xp GPUS + Memory (GB): 4.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_40k_cityscapes/fcn_d6_r101-d16_512x1024_40k_cityscapes_20210305_130337-9cf2b450.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_40k_cityscapes/fcn_d6_r101-d16_512x1024_40k_cityscapes-20210305_130337.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn-d6_r101-d16_4xb2-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.46 + mIoU(ms+flip): 80.42 + Config: configs/fcn/fcn-d6_r101-d16_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D16 + - FCN + - (D6) + Training Resources: 4x TITAN Xp GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_80k_cityscapes/fcn_d6_r101-d16_512x1024_80k_cityscapes_20210308_102747-cb336445.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_80k_cityscapes/fcn_d6_r101-d16_512x1024_80k_cityscapes-20210308_102747.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn-d6_r101-d16_4xb2-40k_cityscapes-769x769 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.28 + mIoU(ms+flip): 78.95 + Config: configs/fcn/fcn-d6_r101-d16_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D16 + - FCN + - (D6) + Training Resources: 4x TITAN Xp GPUS + Memory (GB): 5.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_40k_cityscapes/fcn_d6_r101-d16_769x769_40k_cityscapes_20210308_102453-60b114e9.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_40k_cityscapes/fcn_d6_r101-d16_769x769_40k_cityscapes-20210308_102453.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn-d6_r101-d16_4xb2-80k_cityscapes-769x769 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.06 + mIoU(ms+flip): 79.58 + Config: configs/fcn/fcn-d6_r101-d16_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D16 + - FCN + - (D6) + Training Resources: 4x TITAN Xp GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_80k_cityscapes/fcn_d6_r101-d16_769x769_80k_cityscapes_20210306_120016-e33adc4f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_80k_cityscapes/fcn_d6_r101-d16_769x769_80k_cityscapes-20210306_120016.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn-d6_r50b-d16_4xb2-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.99 + mIoU(ms+flip): 79.03 + Config: configs/fcn/fcn-d6_r50b-d16_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50b-D16 + - FCN + - (D6) + Training Resources: 4x TITAN Xp GPUS + Memory (GB): 3.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b-d16_512x1024_80k_cityscapes/fcn_d6_r50b-d16_512x1024_80k_cityscapes_20210311_125550-6a0b62e9.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b_d16_512x1024_80k_cityscapes/fcn_d6_r50b_d16_512x1024_80k_cityscapes-20210311_125550.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn-d6_r50b-d16_4xb2-80k_cityscapes-769x769 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.86 + mIoU(ms+flip): 78.52 + Config: configs/fcn/fcn-d6_r50b-d16_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50b-D16 + - FCN + - (D6) + Training Resources: 4x TITAN Xp GPUS + Memory (GB): 3.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b-d16_769x769_80k_cityscapes/fcn_d6_r50b-d16_769x769_80k_cityscapes_20210311_131012-d665f231.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b_d16_769x769_80k_cityscapes/fcn_d6_r50b_d16_769x769_80k_cityscapes-20210311_131012.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn-d6_r101b-d16_4xb2-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.72 + mIoU(ms+flip): 79.53 + Config: configs/fcn/fcn-d6_r101b-d16_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101b-D16 + - FCN + - (D6) + Training Resources: 4x TITAN Xp GPUS + Memory (GB): 4.3 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b-d16_512x1024_80k_cityscapes/fcn_d6_r101b-d16_512x1024_80k_cityscapes_20210311_144305-3f2eb5b4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b_d16_512x1024_80k_cityscapes/fcn_d6_r101b_d16_512x1024_80k_cityscapes-20210311_144305.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn-d6_r101b-d16_4xb2-80k_cityscapes-769x769 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.34 + mIoU(ms+flip): 78.91 + Config: configs/fcn/fcn-d6_r101b-d16_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101b-D16 + - FCN + - (D6) + Training Resources: 4x TITAN Xp GPUS + Memory (GB): 4.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b-d16_769x769_80k_cityscapes/fcn_d6_r101b-d16_769x769_80k_cityscapes_20210311_154527-c4d8bfbc.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b_d16_769x769_80k_cityscapes/fcn_d6_r101b_d16_769x769_80k_cityscapes-20210311_154527.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r50-d8_4xb4-80k_ade20k-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 35.94 + mIoU(ms+flip): 37.94 + Config: configs/fcn/fcn_r50-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 8.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_80k_ade20k/fcn_r50-d8_512x512_80k_ade20k_20200614_144016-f8ac5082.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_80k_ade20k/fcn_r50-d8_512x512_80k_ade20k_20200614_144016.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r101-d8_4xb4-80k_ade20k-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 39.61 + mIoU(ms+flip): 40.83 + Config: configs/fcn/fcn_r101-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 12.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_80k_ade20k/fcn_r101-d8_512x512_80k_ade20k_20200615_014143-bc1809f7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_80k_ade20k/fcn_r101-d8_512x512_80k_ade20k_20200615_014143.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r50-d8_4xb4-160k_ade20k-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 36.1 + mIoU(ms+flip): 38.08 + Config: configs/fcn/fcn_r50-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_160k_ade20k/fcn_r50-d8_512x512_160k_ade20k_20200615_100713-4edbc3b4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_160k_ade20k/fcn_r50-d8_512x512_160k_ade20k_20200615_100713.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r101-d8_4xb4-160k_ade20k-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 39.91 + mIoU(ms+flip): 41.4 + Config: configs/fcn/fcn_r101-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_160k_ade20k/fcn_r101-d8_512x512_160k_ade20k_20200615_105816-fd192bd5.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_160k_ade20k/fcn_r101-d8_512x512_160k_ade20k_20200615_105816.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r50-d8_4xb4-20k_voc12aug-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 67.08 + mIoU(ms+flip): 69.94 + Config: configs/fcn/fcn_r50-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 5.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_20k_voc12aug/fcn_r50-d8_512x512_20k_voc12aug_20200617_010715-52dc5306.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_20k_voc12aug/fcn_r50-d8_512x512_20k_voc12aug_20200617_010715.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r101-d8_4xb4-20k_voc12aug-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 71.16 + mIoU(ms+flip): 73.57 + Config: configs/fcn/fcn_r101-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 9.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_20k_voc12aug/fcn_r101-d8_512x512_20k_voc12aug_20200617_010842-0bb4e798.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_20k_voc12aug/fcn_r101-d8_512x512_20k_voc12aug_20200617_010842.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r50-d8_4xb4-40k_voc12aug-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 66.97 + mIoU(ms+flip): 69.04 + Config: configs/fcn/fcn_r50-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_40k_voc12aug/fcn_r50-d8_512x512_40k_voc12aug_20200613_161222-5e2dbf40.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_40k_voc12aug/fcn_r50-d8_512x512_40k_voc12aug_20200613_161222.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r101-d8_4xb4-40k_voc12aug-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 69.91 + mIoU(ms+flip): 72.38 + Config: configs/fcn/fcn_r101-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_40k_voc12aug/fcn_r101-d8_512x512_40k_voc12aug_20200613_161240-4c8bcefd.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_40k_voc12aug/fcn_r101-d8_512x512_40k_voc12aug_20200613_161240.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r101-d8_4xb4-40k_pascal-context-480x480 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal Context + Metrics: + mIoU: 44.43 + mIoU(ms+flip): 45.63 + Config: configs/fcn/fcn_r101-d8_4xb4-40k_pascal-context-480x480.py + Metadata: + Training Data: Pascal Context + Batch Size: 16 + Architecture: + - R-101-D8 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context/fcn_r101-d8_480x480_40k_pascal_context_20210421_154757-b5e97937.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context/fcn_r101-d8_480x480_40k_pascal_context-20210421_154757.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r101-d8_4xb4-80k_pascal-context-480x480 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal Context + Metrics: + mIoU: 44.13 + mIoU(ms+flip): 45.26 + Config: configs/fcn/fcn_r101-d8_4xb4-80k_pascal-context-480x480.py + Metadata: + Training Data: Pascal Context + Batch Size: 16 + Architecture: + - R-101-D8 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context/fcn_r101-d8_480x480_80k_pascal_context_20210421_163310-4711813f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context/fcn_r101-d8_480x480_80k_pascal_context-20210421_163310.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r101-d8_4xb4-40k_pascal-context-59-480x480 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal Context 59 + Metrics: + mIoU: 48.42 + mIoU(ms+flip): 50.4 + Config: configs/fcn/fcn_r101-d8_4xb4-40k_pascal-context-59-480x480.py + Metadata: + Training Data: Pascal Context 59 + Batch Size: 16 + Architecture: + - R-101-D8 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context_59/fcn_r101-d8_480x480_40k_pascal_context_59_20210415_230724-8cf83682.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context_59/fcn_r101-d8_480x480_40k_pascal_context_59-20210415_230724.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch +- Name: fcn_r101-d8_4xb4-80k_pascal-context-59-480x480 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal Context 59 + Metrics: + mIoU: 49.35 + mIoU(ms+flip): 51.38 + Config: configs/fcn/fcn_r101-d8_4xb4-80k_pascal-context-59-480x480.py + Metadata: + Training Data: Pascal Context 59 + Batch Size: 16 + Architecture: + - R-101-D8 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context_59/fcn_r101-d8_480x480_80k_pascal_context_59_20210416_110804-9a6f2c94.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context_59/fcn_r101-d8_480x480_80k_pascal_context_59-20210416_110804.log.json + Paper: + Title: Fully Convolutional Networks for Semantic Segmentation + URL: https://arxiv.org/abs/1411.4038 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Framework: PyTorch diff --git a/configs/gcnet/README.md b/configs/gcnet/README.md index fa37f76468..ba1a21e851 100644 --- a/configs/gcnet/README.md +++ b/configs/gcnet/README.md @@ -1,6 +1,6 @@ # GCNet -[GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond](https://arxiv.org/abs/1904.11492) +> [GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond](https://arxiv.org/abs/1904.11492) ## Introduction @@ -22,6 +22,39 @@ The Non-Local Network (NLNet) presents a pioneering approach for capturing long- +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| GCNet | R-50-D8 | 512x1024 | 40000 | 5.8 | 3.93 | V100 | 77.69 | 78.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes/gcnet_r50-d8_512x1024_40k_cityscapes_20200618_074436-4b0fd17b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes/gcnet_r50-d8_512x1024_40k_cityscapes_20200618_074436.log.json) | +| GCNet | R-101-D8 | 512x1024 | 40000 | 9.2 | 2.61 | V100 | 78.28 | 79.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes/gcnet_r101-d8_512x1024_40k_cityscapes_20200618_074436-5e62567f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes/gcnet_r101-d8_512x1024_40k_cityscapes_20200618_074436.log.json) | +| GCNet | R-50-D8 | 769x769 | 40000 | 6.5 | 1.67 | V100 | 78.12 | 80.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_40k_cityscapes/gcnet_r50-d8_769x769_40k_cityscapes_20200618_182814-a26f4471.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_40k_cityscapes/gcnet_r50-d8_769x769_40k_cityscapes_20200618_182814.log.json) | +| GCNet | R-101-D8 | 769x769 | 40000 | 10.5 | 1.13 | V100 | 78.95 | 80.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_40k_cityscapes/gcnet_r101-d8_769x769_40k_cityscapes_20200619_092550-ca4f0a84.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_40k_cityscapes/gcnet_r101-d8_769x769_40k_cityscapes_20200619_092550.log.json) | +| GCNet | R-50-D8 | 512x1024 | 80000 | - | - | V100 | 78.48 | 80.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes/gcnet_r50-d8_512x1024_80k_cityscapes_20200618_074450-ef8f069b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes/gcnet_r50-d8_512x1024_80k_cityscapes_20200618_074450.log.json) | +| GCNet | R-101-D8 | 512x1024 | 80000 | - | - | V100 | 79.03 | 79.84 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r101-d8_4xb2-80k_cityscapes-512x1024.pyy) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes/gcnet_r101-d8_512x1024_80k_cityscapes_20200618_074450-778ebf69.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes/gcnet_r101-d8_512x1024_80k_cityscapes_20200618_074450.log.json) | +| GCNet | R-50-D8 | 769x769 | 80000 | - | - | V100 | 78.68 | 80.66 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_80k_cityscapes/gcnet_r50-d8_769x769_80k_cityscapes_20200619_092516-4839565b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_80k_cityscapes/gcnet_r50-d8_769x769_80k_cityscapes_20200619_092516.log.json) | +| GCNet | R-101-D8 | 769x769 | 80000 | - | - | V100 | 79.18 | 80.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_80k_cityscapes/gcnet_r101-d8_769x769_80k_cityscapes_20200619_092628-8e043423.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_80k_cityscapes/gcnet_r101-d8_769x769_80k_cityscapes_20200619_092628.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| GCNet | R-50-D8 | 512x512 | 80000 | 8.5 | 23.38 | V100 | 41.47 | 42.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_80k_ade20k/gcnet_r50-d8_512x512_80k_ade20k_20200614_185146-91a6da41.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_80k_ade20k/gcnet_r50-d8_512x512_80k_ade20k_20200614_185146.log.json) | +| GCNet | R-101-D8 | 512x512 | 80000 | 12 | 15.20 | V100 | 42.82 | 44.54 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_80k_ade20k/gcnet_r101-d8_512x512_80k_ade20k_20200615_020811-c3fcb6dd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_80k_ade20k/gcnet_r101-d8_512x512_80k_ade20k_20200615_020811.log.json) | +| GCNet | R-50-D8 | 512x512 | 160000 | - | - | V100 | 42.37 | 43.52 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_160k_ade20k/gcnet_r50-d8_512x512_160k_ade20k_20200615_224122-d95f3e1f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_160k_ade20k/gcnet_r50-d8_512x512_160k_ade20k_20200615_224122.log.json) | +| GCNet | R-101-D8 | 512x512 | 160000 | - | - | V100 | 43.69 | 45.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_160k_ade20k/gcnet_r101-d8_512x512_160k_ade20k_20200615_225406-615528d7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_160k_ade20k/gcnet_r101-d8_512x512_160k_ade20k_20200615_225406.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| GCNet | R-50-D8 | 512x512 | 20000 | 5.8 | 23.35 | V100 | 76.42 | 77.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r50-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_20k_voc12aug/gcnet_r50-d8_512x512_20k_voc12aug_20200617_165701-3cbfdab1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_20k_voc12aug/gcnet_r50-d8_512x512_20k_voc12aug_20200617_165701.log.json) | +| GCNet | R-101-D8 | 512x512 | 20000 | 9.2 | 14.80 | V100 | 77.41 | 78.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r101-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_20k_voc12aug/gcnet_r101-d8_512x512_20k_voc12aug_20200617_165713-6c720aa9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_20k_voc12aug/gcnet_r101-d8_512x512_20k_voc12aug_20200617_165713.log.json) | +| GCNet | R-50-D8 | 512x512 | 40000 | - | - | V100 | 76.24 | 77.63 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r50-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_40k_voc12aug/gcnet_r50-d8_512x512_40k_voc12aug_20200613_195105-9797336d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_40k_voc12aug/gcnet_r50-d8_512x512_40k_voc12aug_20200613_195105.log.json) | +| GCNet | R-101-D8 | 512x512 | 40000 | - | - | V100 | 77.84 | 78.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet/gcnet_r101-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_40k_voc12aug/gcnet_r101-d8_512x512_40k_voc12aug_20200613_185806-1e38208d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_40k_voc12aug/gcnet_r101-d8_512x512_40k_voc12aug_20200613_185806.log.json) | + ## Citation ```bibtex @@ -33,36 +66,3 @@ The Non-Local Network (NLNet) presents a pioneering approach for capturing long- year={2019} } ``` - -## Results and models - -### Cityscapes - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| GCNet | R-50-D8 | 512x1024 | 40000 | 5.8 | 3.93 | 77.69 | 78.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/gcnet/gcnet_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes/gcnet_r50-d8_512x1024_40k_cityscapes_20200618_074436-4b0fd17b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes/gcnet_r50-d8_512x1024_40k_cityscapes_20200618_074436.log.json) | -| GCNet | R-101-D8 | 512x1024 | 40000 | 9.2 | 2.61 | 78.28 | 79.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/gcnet/gcnet_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes/gcnet_r101-d8_512x1024_40k_cityscapes_20200618_074436-5e62567f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes/gcnet_r101-d8_512x1024_40k_cityscapes_20200618_074436.log.json) | -| GCNet | R-50-D8 | 769x769 | 40000 | 6.5 | 1.67 | 78.12 | 80.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/gcnet/gcnet_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_40k_cityscapes/gcnet_r50-d8_769x769_40k_cityscapes_20200618_182814-a26f4471.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_40k_cityscapes/gcnet_r50-d8_769x769_40k_cityscapes_20200618_182814.log.json) | -| GCNet | R-101-D8 | 769x769 | 40000 | 10.5 | 1.13 | 78.95 | 80.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/gcnet/gcnet_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_40k_cityscapes/gcnet_r101-d8_769x769_40k_cityscapes_20200619_092550-ca4f0a84.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_40k_cityscapes/gcnet_r101-d8_769x769_40k_cityscapes_20200619_092550.log.json) | -| GCNet | R-50-D8 | 512x1024 | 80000 | - | - | 78.48 | 80.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/gcnet/gcnet_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes/gcnet_r50-d8_512x1024_80k_cityscapes_20200618_074450-ef8f069b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes/gcnet_r50-d8_512x1024_80k_cityscapes_20200618_074450.log.json) | -| GCNet | R-101-D8 | 512x1024 | 80000 | - | - | 79.03 | 79.84 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/gcnet/gcnet_r101-d8_4xb2-80k_cityscapes-512x1024.pyy) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes/gcnet_r101-d8_512x1024_80k_cityscapes_20200618_074450-778ebf69.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes/gcnet_r101-d8_512x1024_80k_cityscapes_20200618_074450.log.json) | -| GCNet | R-50-D8 | 769x769 | 80000 | - | - | 78.68 | 80.66 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/gcnet/gcnet_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_80k_cityscapes/gcnet_r50-d8_769x769_80k_cityscapes_20200619_092516-4839565b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_80k_cityscapes/gcnet_r50-d8_769x769_80k_cityscapes_20200619_092516.log.json) | -| GCNet | R-101-D8 | 769x769 | 80000 | - | - | 79.18 | 80.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/gcnet/gcnet_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_80k_cityscapes/gcnet_r101-d8_769x769_80k_cityscapes_20200619_092628-8e043423.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_80k_cityscapes/gcnet_r101-d8_769x769_80k_cityscapes_20200619_092628.log.json) | - -### ADE20K - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| GCNet | R-50-D8 | 512x512 | 80000 | 8.5 | 23.38 | 41.47 | 42.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/gcnet/gcnet_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_80k_ade20k/gcnet_r50-d8_512x512_80k_ade20k_20200614_185146-91a6da41.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_80k_ade20k/gcnet_r50-d8_512x512_80k_ade20k_20200614_185146.log.json) | -| GCNet | R-101-D8 | 512x512 | 80000 | 12 | 15.20 | 42.82 | 44.54 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/gcnet/gcnet_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_80k_ade20k/gcnet_r101-d8_512x512_80k_ade20k_20200615_020811-c3fcb6dd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_80k_ade20k/gcnet_r101-d8_512x512_80k_ade20k_20200615_020811.log.json) | -| GCNet | R-50-D8 | 512x512 | 160000 | - | - | 42.37 | 43.52 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/gcnet/gcnet_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_160k_ade20k/gcnet_r50-d8_512x512_160k_ade20k_20200615_224122-d95f3e1f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_160k_ade20k/gcnet_r50-d8_512x512_160k_ade20k_20200615_224122.log.json) | -| GCNet | R-101-D8 | 512x512 | 160000 | - | - | 43.69 | 45.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/gcnet/gcnet_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_160k_ade20k/gcnet_r101-d8_512x512_160k_ade20k_20200615_225406-615528d7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_160k_ade20k/gcnet_r101-d8_512x512_160k_ade20k_20200615_225406.log.json) | - -### Pascal VOC 2012 + Aug - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| GCNet | R-50-D8 | 512x512 | 20000 | 5.8 | 23.35 | 76.42 | 77.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/gcnet/gcnet_r50-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_20k_voc12aug/gcnet_r50-d8_512x512_20k_voc12aug_20200617_165701-3cbfdab1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_20k_voc12aug/gcnet_r50-d8_512x512_20k_voc12aug_20200617_165701.log.json) | -| GCNet | R-101-D8 | 512x512 | 20000 | 9.2 | 14.80 | 77.41 | 78.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/gcnet/gcnet_r101-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_20k_voc12aug/gcnet_r101-d8_512x512_20k_voc12aug_20200617_165713-6c720aa9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_20k_voc12aug/gcnet_r101-d8_512x512_20k_voc12aug_20200617_165713.log.json) | -| GCNet | R-50-D8 | 512x512 | 40000 | - | - | 76.24 | 77.63 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/gcnet/gcnet_r50-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_40k_voc12aug/gcnet_r50-d8_512x512_40k_voc12aug_20200613_195105-9797336d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_40k_voc12aug/gcnet_r50-d8_512x512_40k_voc12aug_20200613_195105.log.json) | -| GCNet | R-101-D8 | 512x512 | 40000 | - | - | 77.84 | 78.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/gcnet/gcnet_r101-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_40k_voc12aug/gcnet_r101-d8_512x512_40k_voc12aug_20200613_185806-1e38208d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_40k_voc12aug/gcnet_r101-d8_512x512_40k_voc12aug_20200613_185806.log.json) | diff --git a/configs/gcnet/gcnet.yml b/configs/gcnet/gcnet.yml deleted file mode 100644 index dfd8cf56c4..0000000000 --- a/configs/gcnet/gcnet.yml +++ /dev/null @@ -1,305 +0,0 @@ -Collections: -- Name: GCNet - Metadata: - Training Data: - - Cityscapes - - ADE20K - - Pascal VOC 2012 + Aug - Paper: - URL: https://arxiv.org/abs/1904.11492 - Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' - README: configs/gcnet/README.md - Code: - URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 - Version: v0.17.0 - Converted From: - Code: https://github.com/xvjiarui/GCNet -Models: -- Name: gcnet_r50-d8_4xb2-40k_cityscapes-512x1024 - In Collection: GCNet - Metadata: - backbone: R-50-D8 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 254.45 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 5.8 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 77.69 - mIoU(ms+flip): 78.56 - Config: configs/gcnet/gcnet_r50-d8_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes/gcnet_r50-d8_512x1024_40k_cityscapes_20200618_074436-4b0fd17b.pth -- Name: gcnet_r101-d8_4xb2-40k_cityscapes-512x1024 - In Collection: GCNet - Metadata: - backbone: R-101-D8 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 383.14 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 9.2 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.28 - mIoU(ms+flip): 79.34 - Config: configs/gcnet/gcnet_r101-d8_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes/gcnet_r101-d8_512x1024_40k_cityscapes_20200618_074436-5e62567f.pth -- Name: gcnet_r50-d8_4xb2-40k_cityscapes-769x769 - In Collection: GCNet - Metadata: - backbone: R-50-D8 - crop size: (769,769) - lr schd: 40000 - inference time (ms/im): - - value: 598.8 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 6.5 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.12 - mIoU(ms+flip): 80.09 - Config: configs/gcnet/gcnet_r50-d8_4xb2-40k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_40k_cityscapes/gcnet_r50-d8_769x769_40k_cityscapes_20200618_182814-a26f4471.pth -- Name: gcnet_r101-d8_4xb2-40k_cityscapes-769x769 - In Collection: GCNet - Metadata: - backbone: R-101-D8 - crop size: (769,769) - lr schd: 40000 - inference time (ms/im): - - value: 884.96 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 10.5 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.95 - mIoU(ms+flip): 80.71 - Config: configs/gcnet/gcnet_r101-d8_4xb2-40k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_40k_cityscapes/gcnet_r101-d8_769x769_40k_cityscapes_20200619_092550-ca4f0a84.pth -- Name: gcnet_r50-d8_4xb2-80k_cityscapes-512x1024 - In Collection: GCNet - Metadata: - backbone: R-50-D8 - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.48 - mIoU(ms+flip): 80.01 - Config: configs/gcnet/gcnet_r50-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes/gcnet_r50-d8_512x1024_80k_cityscapes_20200618_074450-ef8f069b.pth -- Name: gcnet_r101-d8_4xb2-80k_cityscapes-512x1024 - In Collection: GCNet - Metadata: - backbone: R-101-D8 - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.03 - mIoU(ms+flip): 79.84 - Config: configs/gcnet/gcnet_r101-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes/gcnet_r101-d8_512x1024_80k_cityscapes_20200618_074450-778ebf69.pth -- Name: gcnet_r50-d8_4xb2-80k_cityscapes-769x769 - In Collection: GCNet - Metadata: - backbone: R-50-D8 - crop size: (769,769) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.68 - mIoU(ms+flip): 80.66 - Config: configs/gcnet/gcnet_r50-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_80k_cityscapes/gcnet_r50-d8_769x769_80k_cityscapes_20200619_092516-4839565b.pth -- Name: gcnet_r101-d8_4xb2-80k_cityscapes-769x769 - In Collection: GCNet - Metadata: - backbone: R-101-D8 - crop size: (769,769) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.18 - mIoU(ms+flip): 80.71 - Config: configs/gcnet/gcnet_r101-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_80k_cityscapes/gcnet_r101-d8_769x769_80k_cityscapes_20200619_092628-8e043423.pth -- Name: gcnet_r50-d8_4xb4-80k_ade20k-512x512 - In Collection: GCNet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 42.77 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 8.5 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 41.47 - mIoU(ms+flip): 42.85 - Config: configs/gcnet/gcnet_r50-d8_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_80k_ade20k/gcnet_r50-d8_512x512_80k_ade20k_20200614_185146-91a6da41.pth -- Name: gcnet_r101-d8_4xb4-80k_ade20k-512x512 - In Collection: GCNet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 65.79 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 12.0 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 42.82 - mIoU(ms+flip): 44.54 - Config: configs/gcnet/gcnet_r101-d8_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_80k_ade20k/gcnet_r101-d8_512x512_80k_ade20k_20200615_020811-c3fcb6dd.pth -- Name: gcnet_r50-d8_4xb4-160k_ade20k-512x512 - In Collection: GCNet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 42.37 - mIoU(ms+flip): 43.52 - Config: configs/gcnet/gcnet_r50-d8_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_160k_ade20k/gcnet_r50-d8_512x512_160k_ade20k_20200615_224122-d95f3e1f.pth -- Name: gcnet_r101-d8_4xb4-160k_ade20k-512x512 - In Collection: GCNet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 43.69 - mIoU(ms+flip): 45.21 - Config: configs/gcnet/gcnet_r101-d8_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_160k_ade20k/gcnet_r101-d8_512x512_160k_ade20k_20200615_225406-615528d7.pth -- Name: gcnet_r50-d8_4xb4-20k_voc12aug-512x512 - In Collection: GCNet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 20000 - inference time (ms/im): - - value: 42.83 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 5.8 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 76.42 - mIoU(ms+flip): 77.51 - Config: configs/gcnet/gcnet_r50-d8_4xb4-20k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_20k_voc12aug/gcnet_r50-d8_512x512_20k_voc12aug_20200617_165701-3cbfdab1.pth -- Name: gcnet_r101-d8_4xb4-20k_voc12aug-512x512 - In Collection: GCNet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 20000 - inference time (ms/im): - - value: 67.57 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 9.2 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 77.41 - mIoU(ms+flip): 78.56 - Config: configs/gcnet/gcnet_r101-d8_4xb4-20k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_20k_voc12aug/gcnet_r101-d8_512x512_20k_voc12aug_20200617_165713-6c720aa9.pth -- Name: gcnet_r50-d8_4xb4-40k_voc12aug-512x512 - In Collection: GCNet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 76.24 - mIoU(ms+flip): 77.63 - Config: configs/gcnet/gcnet_r50-d8_4xb4-40k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_40k_voc12aug/gcnet_r50-d8_512x512_40k_voc12aug_20200613_195105-9797336d.pth -- Name: gcnet_r101-d8_4xb4-40k_voc12aug-512x512 - In Collection: GCNet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 77.84 - mIoU(ms+flip): 78.59 - Config: configs/gcnet/gcnet_r101-d8_4xb4-40k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_40k_voc12aug/gcnet_r101-d8_512x512_40k_voc12aug_20200613_185806-1e38208d.pth diff --git a/configs/gcnet/metafile.yaml b/configs/gcnet/metafile.yaml new file mode 100644 index 0000000000..1f3c4623a0 --- /dev/null +++ b/configs/gcnet/metafile.yaml @@ -0,0 +1,391 @@ +Collections: +- Name: GCNet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + README: configs/gcnet/README.md + Frameworks: + - PyTorch +Models: +- Name: gcnet_r50-d8_4xb2-40k_cityscapes-512x1024 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.69 + mIoU(ms+flip): 78.56 + Config: configs/gcnet/gcnet_r50-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - GCNet + Training Resources: 4x V100 GPUS + Memory (GB): 5.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes/gcnet_r50-d8_512x1024_40k_cityscapes_20200618_074436-4b0fd17b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes/gcnet_r50-d8_512x1024_40k_cityscapes_20200618_074436.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch +- Name: gcnet_r101-d8_4xb2-40k_cityscapes-512x1024 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.28 + mIoU(ms+flip): 79.34 + Config: configs/gcnet/gcnet_r101-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - GCNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes/gcnet_r101-d8_512x1024_40k_cityscapes_20200618_074436-5e62567f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes/gcnet_r101-d8_512x1024_40k_cityscapes_20200618_074436.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch +- Name: gcnet_r50-d8_4xb2-40k_cityscapes-769x769 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.12 + mIoU(ms+flip): 80.09 + Config: configs/gcnet/gcnet_r50-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - GCNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_40k_cityscapes/gcnet_r50-d8_769x769_40k_cityscapes_20200618_182814-a26f4471.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_40k_cityscapes/gcnet_r50-d8_769x769_40k_cityscapes_20200618_182814.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch +- Name: gcnet_r101-d8_4xb2-40k_cityscapes-769x769 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.95 + mIoU(ms+flip): 80.71 + Config: configs/gcnet/gcnet_r101-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - GCNet + Training Resources: 4x V100 GPUS + Memory (GB): 10.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_40k_cityscapes/gcnet_r101-d8_769x769_40k_cityscapes_20200619_092550-ca4f0a84.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_40k_cityscapes/gcnet_r101-d8_769x769_40k_cityscapes_20200619_092550.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch +- Name: gcnet_r50-d8_4xb2-80k_cityscapes-512x1024 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.48 + mIoU(ms+flip): 80.01 + Config: configs/gcnet/gcnet_r50-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - GCNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes/gcnet_r50-d8_512x1024_80k_cityscapes_20200618_074450-ef8f069b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes/gcnet_r50-d8_512x1024_80k_cityscapes_20200618_074450.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch +- Name: gcnet_r101-d8_4xb2-80k_cityscapes-512x1024 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.03 + mIoU(ms+flip): 79.84 + Config: configs/gcnet/gcnet_r101-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - GCNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes/gcnet_r101-d8_512x1024_80k_cityscapes_20200618_074450-778ebf69.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes/gcnet_r101-d8_512x1024_80k_cityscapes_20200618_074450.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch +- Name: gcnet_r50-d8_4xb2-80k_cityscapes-769x769 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.68 + mIoU(ms+flip): 80.66 + Config: configs/gcnet/gcnet_r50-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - GCNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_80k_cityscapes/gcnet_r50-d8_769x769_80k_cityscapes_20200619_092516-4839565b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_80k_cityscapes/gcnet_r50-d8_769x769_80k_cityscapes_20200619_092516.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch +- Name: gcnet_r101-d8_4xb2-80k_cityscapes-769x769 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.18 + mIoU(ms+flip): 80.71 + Config: configs/gcnet/gcnet_r101-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - GCNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_80k_cityscapes/gcnet_r101-d8_769x769_80k_cityscapes_20200619_092628-8e043423.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_80k_cityscapes/gcnet_r101-d8_769x769_80k_cityscapes_20200619_092628.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch +- Name: gcnet_r50-d8_4xb4-80k_ade20k-512x512 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.47 + mIoU(ms+flip): 42.85 + Config: configs/gcnet/gcnet_r50-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - GCNet + Training Resources: 4x V100 GPUS + Memory (GB): 8.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_80k_ade20k/gcnet_r50-d8_512x512_80k_ade20k_20200614_185146-91a6da41.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_80k_ade20k/gcnet_r50-d8_512x512_80k_ade20k_20200614_185146.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch +- Name: gcnet_r101-d8_4xb4-80k_ade20k-512x512 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.82 + mIoU(ms+flip): 44.54 + Config: configs/gcnet/gcnet_r101-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - GCNet + Training Resources: 4x V100 GPUS + Memory (GB): 12.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_80k_ade20k/gcnet_r101-d8_512x512_80k_ade20k_20200615_020811-c3fcb6dd.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_80k_ade20k/gcnet_r101-d8_512x512_80k_ade20k_20200615_020811.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch +- Name: gcnet_r50-d8_4xb4-160k_ade20k-512x512 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.37 + mIoU(ms+flip): 43.52 + Config: configs/gcnet/gcnet_r50-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - GCNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_160k_ade20k/gcnet_r50-d8_512x512_160k_ade20k_20200615_224122-d95f3e1f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_160k_ade20k/gcnet_r50-d8_512x512_160k_ade20k_20200615_224122.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch +- Name: gcnet_r101-d8_4xb4-160k_ade20k-512x512 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.69 + mIoU(ms+flip): 45.21 + Config: configs/gcnet/gcnet_r101-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - GCNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_160k_ade20k/gcnet_r101-d8_512x512_160k_ade20k_20200615_225406-615528d7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_160k_ade20k/gcnet_r101-d8_512x512_160k_ade20k_20200615_225406.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch +- Name: gcnet_r50-d8_4xb4-20k_voc12aug-512x512 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.42 + mIoU(ms+flip): 77.51 + Config: configs/gcnet/gcnet_r50-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - GCNet + Training Resources: 4x V100 GPUS + Memory (GB): 5.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_20k_voc12aug/gcnet_r50-d8_512x512_20k_voc12aug_20200617_165701-3cbfdab1.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_20k_voc12aug/gcnet_r50-d8_512x512_20k_voc12aug_20200617_165701.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch +- Name: gcnet_r101-d8_4xb4-20k_voc12aug-512x512 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.41 + mIoU(ms+flip): 78.56 + Config: configs/gcnet/gcnet_r101-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - GCNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_20k_voc12aug/gcnet_r101-d8_512x512_20k_voc12aug_20200617_165713-6c720aa9.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_20k_voc12aug/gcnet_r101-d8_512x512_20k_voc12aug_20200617_165713.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch +- Name: gcnet_r50-d8_4xb4-40k_voc12aug-512x512 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.24 + mIoU(ms+flip): 77.63 + Config: configs/gcnet/gcnet_r50-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - GCNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_40k_voc12aug/gcnet_r50-d8_512x512_40k_voc12aug_20200613_195105-9797336d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_40k_voc12aug/gcnet_r50-d8_512x512_40k_voc12aug_20200613_195105.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch +- Name: gcnet_r101-d8_4xb4-40k_voc12aug-512x512 + In Collection: GCNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.84 + mIoU(ms+flip): 78.59 + Config: configs/gcnet/gcnet_r101-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - GCNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_40k_voc12aug/gcnet_r101-d8_512x512_40k_voc12aug_20200613_185806-1e38208d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_40k_voc12aug/gcnet_r101-d8_512x512_40k_voc12aug_20200613_185806.log.json + Paper: + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + URL: https://arxiv.org/abs/1904.11492 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Framework: PyTorch diff --git a/configs/hrnet/README.md b/configs/hrnet/README.md index f85683b63f..b529fc895e 100644 --- a/configs/hrnet/README.md +++ b/configs/hrnet/README.md @@ -1,6 +1,6 @@ # HRNet -[Deep High-Resolution Representation Learning for Human Pose Estimation](https://arxiv.org/abs/1908.07919) +> [Deep High-Resolution Representation Learning for Human Pose Estimation](https://arxiv.org/abs/1908.07919) ## Introduction @@ -22,101 +22,101 @@ High-resolution representations are essential for position-sensitive vision prob -## Citation - -```bibtext -@inproceedings{SunXLW19, - title={Deep High-Resolution Representation Learning for Human Pose Estimation}, - author={Ke Sun and Bin Xiao and Dong Liu and Jingdong Wang}, - booktitle={CVPR}, - year={2019} -} -``` - ## Results and models ### Cityscapes -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | ------------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| FCN | HRNetV2p-W18-Small | 512x1024 | 40000 | 1.7 | 23.74 | 73.86 | 75.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr18s_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_40k_cityscapes/fcn_hr18s_512x1024_40k_cityscapes_20200601_014216-93db27d0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_40k_cityscapes/fcn_hr18s_512x1024_40k_cityscapes_20200601_014216.log.json) | -| FCN | HRNetV2p-W18 | 512x1024 | 40000 | 2.9 | 12.97 | 77.19 | 78.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr18_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_40k_cityscapes/fcn_hr18_512x1024_40k_cityscapes_20200601_014216-f196fb4e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_40k_cityscapes/fcn_hr18_512x1024_40k_cityscapes_20200601_014216.log.json) | -| FCN | HRNetV2p-W48 | 512x1024 | 40000 | 6.2 | 6.42 | 78.48 | 79.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr48_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_40k_cityscapes/fcn_hr48_512x1024_40k_cityscapes_20200601_014240-a989b146.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_40k_cityscapes/fcn_hr48_512x1024_40k_cityscapes_20200601_014240.log.json) | -| FCN | HRNetV2p-W18-Small | 512x1024 | 80000 | - | - | 75.31 | 77.48 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr18s_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_80k_cityscapes/fcn_hr18s_512x1024_80k_cityscapes_20200601_202700-1462b75d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_80k_cityscapes/fcn_hr18s_512x1024_80k_cityscapes_20200601_202700.log.json) | -| FCN | HRNetV2p-W18 | 512x1024 | 80000 | - | - | 78.65 | 80.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr18_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_80k_cityscapes/fcn_hr18_512x1024_80k_cityscapes_20200601_223255-4e7b345e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_80k_cityscapes/fcn_hr18_512x1024_80k_cityscapes_20200601_223255.log.json) | -| FCN | HRNetV2p-W48 | 512x1024 | 80000 | - | - | 79.93 | 80.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr48_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_80k_cityscapes/fcn_hr48_512x1024_80k_cityscapes_20200601_202606-58ea95d6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_80k_cityscapes/fcn_hr48_512x1024_80k_cityscapes_20200601_202606.log.json) | -| FCN | HRNetV2p-W18-Small | 512x1024 | 160000 | - | - | 76.31 | 78.31 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr18s_4xb2-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_160k_cityscapes/fcn_hr18s_512x1024_160k_cityscapes_20200602_190901-4a0797ea.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_160k_cityscapes/fcn_hr18s_512x1024_160k_cityscapes_20200602_190901.log.json) | -| FCN | HRNetV2p-W18 | 512x1024 | 160000 | - | - | 78.80 | 80.74 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr18_4xb2-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_160k_cityscapes/fcn_hr18_512x1024_160k_cityscapes_20200602_190822-221e4a4f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_160k_cityscapes/fcn_hr18_512x1024_160k_cityscapes_20200602_190822.log.json) | -| FCN | HRNetV2p-W48 | 512x1024 | 160000 | - | - | 80.65 | 81.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr48_4xb2-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_160k_cityscapes/fcn_hr48_512x1024_160k_cityscapes_20200602_190946-59b7973e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_160k_cityscapes/fcn_hr48_512x1024_160k_cityscapes_20200602_190946.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | HRNetV2p-W18-Small | 512x1024 | 40000 | 1.7 | 23.74 | V100 | 73.86 | 75.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18s_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_40k_cityscapes/fcn_hr18s_512x1024_40k_cityscapes_20200601_014216-93db27d0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_40k_cityscapes/fcn_hr18s_512x1024_40k_cityscapes_20200601_014216.log.json) | +| FCN | HRNetV2p-W18 | 512x1024 | 40000 | 2.9 | 12.97 | V100 | 77.19 | 78.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_40k_cityscapes/fcn_hr18_512x1024_40k_cityscapes_20200601_014216-f196fb4e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_40k_cityscapes/fcn_hr18_512x1024_40k_cityscapes_20200601_014216.log.json) | +| FCN | HRNetV2p-W48 | 512x1024 | 40000 | 6.2 | 6.42 | V100 | 78.48 | 79.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr48_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_40k_cityscapes/fcn_hr48_512x1024_40k_cityscapes_20200601_014240-a989b146.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_40k_cityscapes/fcn_hr48_512x1024_40k_cityscapes_20200601_014240.log.json) | +| FCN | HRNetV2p-W18-Small | 512x1024 | 80000 | - | - | V100 | 75.31 | 77.48 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18s_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_80k_cityscapes/fcn_hr18s_512x1024_80k_cityscapes_20200601_202700-1462b75d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_80k_cityscapes/fcn_hr18s_512x1024_80k_cityscapes_20200601_202700.log.json) | +| FCN | HRNetV2p-W18 | 512x1024 | 80000 | - | - | V100 | 78.65 | 80.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_80k_cityscapes/fcn_hr18_512x1024_80k_cityscapes_20200601_223255-4e7b345e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_80k_cityscapes/fcn_hr18_512x1024_80k_cityscapes_20200601_223255.log.json) | +| FCN | HRNetV2p-W48 | 512x1024 | 80000 | - | - | V100 | 79.93 | 80.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr48_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_80k_cityscapes/fcn_hr48_512x1024_80k_cityscapes_20200601_202606-58ea95d6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_80k_cityscapes/fcn_hr48_512x1024_80k_cityscapes_20200601_202606.log.json) | +| FCN | HRNetV2p-W18-Small | 512x1024 | 160000 | - | - | V100 | 76.31 | 78.31 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18s_4xb2-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_160k_cityscapes/fcn_hr18s_512x1024_160k_cityscapes_20200602_190901-4a0797ea.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_160k_cityscapes/fcn_hr18s_512x1024_160k_cityscapes_20200602_190901.log.json) | +| FCN | HRNetV2p-W18 | 512x1024 | 160000 | - | - | V100 | 78.80 | 80.74 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18_4xb2-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_160k_cityscapes/fcn_hr18_512x1024_160k_cityscapes_20200602_190822-221e4a4f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_160k_cityscapes/fcn_hr18_512x1024_160k_cityscapes_20200602_190822.log.json) | +| FCN | HRNetV2p-W48 | 512x1024 | 160000 | - | - | V100 | 80.65 | 81.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr48_4xb2-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_160k_cityscapes/fcn_hr48_512x1024_160k_cityscapes_20200602_190946-59b7973e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_160k_cityscapes/fcn_hr48_512x1024_160k_cityscapes_20200602_190946.log.json) | ### ADE20K -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | ------------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| FCN | HRNetV2p-W18-Small | 512x512 | 80000 | 3.8 | 38.66 | 31.38 | 32.45 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr18s_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_ade20k/fcn_hr18s_512x512_80k_ade20k_20200614_144345-77fc814a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_ade20k/fcn_hr18s_512x512_80k_ade20k_20200614_144345.log.json) | -| FCN | HRNetV2p-W18 | 512x512 | 80000 | 4.9 | 22.57 | 36.27 | 37.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr18_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_ade20k/fcn_hr18_512x512_80k_ade20k_20210827_114910-6c9382c0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_ade20k/fcn_hr18_512x512_80k_ade20k_20210827_114910.log.json) | -| FCN | HRNetV2p-W48 | 512x512 | 80000 | 8.2 | 21.23 | 41.90 | 43.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr48_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_ade20k/fcn_hr48_512x512_80k_ade20k_20200614_193946-7ba5258d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_ade20k/fcn_hr48_512x512_80k_ade20k_20200614_193946.log.json) | -| FCN | HRNetV2p-W18-Small | 512x512 | 160000 | - | - | 33.07 | 34.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr18s_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_160k_ade20k/fcn_hr18s_512x512_160k_ade20k_20210829_174739-f1e7c2e7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_160k_ade20k/fcn_hr18s_512x512_160k_ade20k_20210829_174739.log.json) | -| FCN | HRNetV2p-W18 | 512x512 | 160000 | - | - | 36.79 | 38.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr18_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_160k_ade20k/fcn_hr18_512x512_160k_ade20k_20200614_214426-ca961836.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_160k_ade20k/fcn_hr18_512x512_160k_ade20k_20200614_214426.log.json) | -| FCN | HRNetV2p-W48 | 512x512 | 160000 | - | - | 42.02 | 43.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr48_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_160k_ade20k/fcn_hr48_512x512_160k_ade20k_20200614_214407-a52fc02c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_160k_ade20k/fcn_hr48_512x512_160k_ade20k_20200614_214407.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FCN | HRNetV2p-W18-Small | 512x512 | 80000 | 3.8 | 38.66 | V100 | 31.38 | 32.45 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18s_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_ade20k/fcn_hr18s_512x512_80k_ade20k_20200614_144345-77fc814a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_ade20k/fcn_hr18s_512x512_80k_ade20k_20200614_144345.log.json) | +| FCN | HRNetV2p-W18 | 512x512 | 80000 | 4.9 | 22.57 | V100 | 36.27 | 37.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_ade20k/fcn_hr18_512x512_80k_ade20k_20210827_114910-6c9382c0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_ade20k/fcn_hr18_512x512_80k_ade20k_20210827_114910.log.json) | +| FCN | HRNetV2p-W48 | 512x512 | 80000 | 8.2 | 21.23 | V100 | 41.90 | 43.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr48_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_ade20k/fcn_hr48_512x512_80k_ade20k_20200614_193946-7ba5258d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_ade20k/fcn_hr48_512x512_80k_ade20k_20200614_193946.log.json) | +| FCN | HRNetV2p-W18-Small | 512x512 | 160000 | - | - | V100 | 33.07 | 34.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18s_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_160k_ade20k/fcn_hr18s_512x512_160k_ade20k_20210829_174739-f1e7c2e7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_160k_ade20k/fcn_hr18s_512x512_160k_ade20k_20210829_174739.log.json) | +| FCN | HRNetV2p-W18 | 512x512 | 160000 | - | - | V100 | 36.79 | 38.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_160k_ade20k/fcn_hr18_512x512_160k_ade20k_20200614_214426-ca961836.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_160k_ade20k/fcn_hr18_512x512_160k_ade20k_20200614_214426.log.json) | +| FCN | HRNetV2p-W48 | 512x512 | 160000 | - | - | V100 | 42.02 | 43.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr48_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_160k_ade20k/fcn_hr48_512x512_160k_ade20k_20200614_214407-a52fc02c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_160k_ade20k/fcn_hr48_512x512_160k_ade20k_20200614_214407.log.json) | ### Pascal VOC 2012 + Aug -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | ------------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| FCN | HRNetV2p-W18-Small | 512x512 | 20000 | 1.8 | 43.36 | 65.5 | 68.89 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr18s_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_20k_voc12aug/fcn_hr18s_512x512_20k_voc12aug_20210829_174910-0aceadb4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_20k_voc12aug/fcn_hr18s_512x512_20k_voc12aug_20210829_174910.log.json) | -| FCN | HRNetV2p-W18 | 512x512 | 20000 | 2.9 | 23.48 | 72.30 | 74.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr18_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_20k_voc12aug/fcn_hr18_512x512_20k_voc12aug_20200617_224503-488d45f7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_20k_voc12aug/fcn_hr18_512x512_20k_voc12aug_20200617_224503.log.json) | -| FCN | HRNetV2p-W48 | 512x512 | 20000 | 6.2 | 22.05 | 75.87 | 78.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr48_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_20k_voc12aug/fcn_hr48_512x512_20k_voc12aug_20200617_224419-89de05cd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_20k_voc12aug/fcn_hr48_512x512_20k_voc12aug_20200617_224419.log.json) | -| FCN | HRNetV2p-W18-Small | 512x512 | 40000 | - | - | 66.61 | 70.00 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr18s_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_40k_voc12aug/fcn_hr18s_512x512_40k_voc12aug_20200614_000648-4f8d6e7f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_40k_voc12aug/fcn_hr18s_512x512_40k_voc12aug_20200614_000648.log.json) | -| FCN | HRNetV2p-W18 | 512x512 | 40000 | - | - | 72.90 | 75.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr18_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_40k_voc12aug/fcn_hr18_512x512_40k_voc12aug_20200613_224401-1b4b76cd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_40k_voc12aug/fcn_hr18_512x512_40k_voc12aug_20200613_224401.log.json) | -| FCN | HRNetV2p-W48 | 512x512 | 40000 | - | - | 76.24 | 78.49 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr48_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_40k_voc12aug/fcn_hr48_512x512_40k_voc12aug_20200613_222111-1b0f18bc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_40k_voc12aug/fcn_hr48_512x512_40k_voc12aug_20200613_222111.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | HRNetV2p-W18-Small | 512x512 | 20000 | 1.8 | 43.36 | V100 | 65.5 | 68.89 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18s_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_20k_voc12aug/fcn_hr18s_512x512_20k_voc12aug_20210829_174910-0aceadb4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_20k_voc12aug/fcn_hr18s_512x512_20k_voc12aug_20210829_174910.log.json) | +| FCN | HRNetV2p-W18 | 512x512 | 20000 | 2.9 | 23.48 | V100 | 72.30 | 74.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_20k_voc12aug/fcn_hr18_512x512_20k_voc12aug_20200617_224503-488d45f7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_20k_voc12aug/fcn_hr18_512x512_20k_voc12aug_20200617_224503.log.json) | +| FCN | HRNetV2p-W48 | 512x512 | 20000 | 6.2 | 22.05 | V100 | 75.87 | 78.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr48_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_20k_voc12aug/fcn_hr48_512x512_20k_voc12aug_20200617_224419-89de05cd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_20k_voc12aug/fcn_hr48_512x512_20k_voc12aug_20200617_224419.log.json) | +| FCN | HRNetV2p-W18-Small | 512x512 | 40000 | - | - | V100 | 66.61 | 70.00 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18s_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_40k_voc12aug/fcn_hr18s_512x512_40k_voc12aug_20200614_000648-4f8d6e7f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_40k_voc12aug/fcn_hr18s_512x512_40k_voc12aug_20200614_000648.log.json) | +| FCN | HRNetV2p-W18 | 512x512 | 40000 | - | - | V100 | 72.90 | 75.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_40k_voc12aug/fcn_hr18_512x512_40k_voc12aug_20200613_224401-1b4b76cd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_40k_voc12aug/fcn_hr18_512x512_40k_voc12aug_20200613_224401.log.json) | +| FCN | HRNetV2p-W48 | 512x512 | 40000 | - | - | V100 | 76.24 | 78.49 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr48_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_40k_voc12aug/fcn_hr48_512x512_40k_voc12aug_20200613_222111-1b0f18bc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_40k_voc12aug/fcn_hr48_512x512_40k_voc12aug_20200613_222111.log.json) | ### Pascal Context -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | ------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| FCN | HRNetV2p-W48 | 480x480 | 40000 | 6.1 | 8.86 | 45.14 | 47.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr48_4xb4-40k_pascal-context-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context/fcn_hr48_480x480_40k_pascal_context_20200911_164852-667d00b0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context/fcn_hr48_480x480_40k_pascal_context-20200911_164852.log.json) | -| FCN | HRNetV2p-W48 | 480x480 | 80000 | - | - | 45.84 | 47.84 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr48_4xb4-80k_pascal-context-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context/fcn_hr48_480x480_80k_pascal_context_20200911_155322-847a6711.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context/fcn_hr48_480x480_80k_pascal_context-20200911_155322.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------ | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FCN | HRNetV2p-W48 | 480x480 | 40000 | 6.1 | 8.86 | V100 | 45.14 | 47.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr48_4xb4-40k_pascal-context-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context/fcn_hr48_480x480_40k_pascal_context_20200911_164852-667d00b0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context/fcn_hr48_480x480_40k_pascal_context-20200911_164852.log.json) | +| FCN | HRNetV2p-W48 | 480x480 | 80000 | - | - | V100 | 45.84 | 47.84 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr48_4xb4-80k_pascal-context-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context/fcn_hr48_480x480_80k_pascal_context_20200911_155322-847a6711.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context/fcn_hr48_480x480_80k_pascal_context-20200911_155322.log.json) | ### Pascal Context 59 -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | ------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| FCN | HRNetV2p-W48 | 480x480 | 40000 | - | - | 50.33 | 52.83 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr48_4xb4-40k_pascal-context-59-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context_59/fcn_hr48_480x480_40k_pascal_context_59_20210410_122738-b808b8b2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context_59/fcn_hr48_480x480_40k_pascal_context_59-20210410_122738.log.json) | -| FCN | HRNetV2p-W48 | 480x480 | 80000 | - | - | 51.12 | 53.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr48_4xb4-80k_pascal-context-59-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context_59/fcn_hr48_480x480_80k_pascal_context_59_20210411_003240-3ae7081e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context_59/fcn_hr48_480x480_80k_pascal_context_59-20210411_003240.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------ | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FCN | HRNetV2p-W48 | 480x480 | 40000 | - | - | V100 | 50.33 | 52.83 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr48_4xb4-40k_pascal-context-59-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context_59/fcn_hr48_480x480_40k_pascal_context_59_20210410_122738-b808b8b2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context_59/fcn_hr48_480x480_40k_pascal_context_59-20210410_122738.log.json) | +| FCN | HRNetV2p-W48 | 480x480 | 80000 | - | - | V100 | 51.12 | 53.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr48_4xb4-80k_pascal-context-59-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context_59/fcn_hr48_480x480_80k_pascal_context_59_20210411_003240-3ae7081e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context_59/fcn_hr48_480x480_80k_pascal_context_59-20210411_003240.log.json) | ### LoveDA -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | ------------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| FCN | HRNetV2p-W18-Small | 512x512 | 80000 | 1.59 | 24.87 | 49.28 | 49.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr18s_4xb4-80k_loveda-512x512.pyy) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_loveda/fcn_hr18s_512x512_80k_loveda_20211210_203228-60a86a7a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_loveda/fcn_hr18s_512x512_80k_loveda_20211210_203228.log.json) | -| FCN | HRNetV2p-W18 | 512x512 | 80000 | 2.76 | 12.92 | 50.81 | 50.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr18_4xb4-80k_loveda-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_loveda/fcn_hr18_512x512_80k_loveda_20211210_203952-93d9c3b3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_loveda/fcn_hr18_512x512_80k_loveda_20211210_203952.log.json) | -| FCN | HRNetV2p-W48 | 512x512 | 80000 | 6.20 | 9.61 | 51.42 | 51.64 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr48_4xb4-80k_loveda-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_loveda/fcn_hr48_512x512_80k_loveda_20211211_044756-67072f55.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_loveda/fcn_hr48_512x512_80k_loveda_20211211_044756.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | HRNetV2p-W18-Small | 512x512 | 80000 | 1.59 | 24.87 | V100 | 49.28 | 49.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18s_4xb4-80k_loveda-512x512.pyy) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_loveda/fcn_hr18s_512x512_80k_loveda_20211210_203228-60a86a7a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_loveda/fcn_hr18s_512x512_80k_loveda_20211210_203228.log.json) | +| FCN | HRNetV2p-W18 | 512x512 | 80000 | 2.76 | 12.92 | V100 | 50.81 | 50.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18_4xb4-80k_loveda-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_loveda/fcn_hr18_512x512_80k_loveda_20211210_203952-93d9c3b3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_loveda/fcn_hr18_512x512_80k_loveda_20211210_203952.log.json) | +| FCN | HRNetV2p-W48 | 512x512 | 80000 | 6.20 | 9.61 | V100 | 51.42 | 51.64 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr48_4xb4-80k_loveda-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_loveda/fcn_hr48_512x512_80k_loveda_20211211_044756-67072f55.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_loveda/fcn_hr48_512x512_80k_loveda_20211211_044756.log.json) | ### Potsdam -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | ------------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| FCN | HRNetV2p-W18-Small | 512x512 | 80000 | 1.58 | 36.00 | 77.64 | 78.8 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr18s_4xb4-80k_potsdam-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_potsdam/fcn_hr18s_512x512_80k_potsdam_20211218_205517-ba32af63.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_potsdam/fcn_hr18s_512x512_80k_potsdam_20211218_205517.log.json) | -| FCN | HRNetV2p-W18 | 512x512 | 80000 | 2.76 | 19.25 | 78.26 | 79.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr18_4xb4-80k_potsdam-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_potsdam/fcn_hr18_512x512_80k_potsdam_20211218_205517-5d0387ad.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_potsdam/fcn_hr18_512x512_80k_potsdam_20211218_205517.log.json) | -| FCN | HRNetV2p-W48 | 512x512 | 80000 | 6.20 | 16.42 | 78.39 | 79.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr48_4xb4-80k_potsdam-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_potsdam/fcn_hr48_512x512_80k_potsdam_20211219_020601-97434c78.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_potsdam/fcn_hr48_512x512_80k_potsdam_20211219_020601.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FCN | HRNetV2p-W18-Small | 512x512 | 80000 | 1.58 | 36.00 | V100 | 77.64 | 78.8 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18s_4xb4-80k_potsdam-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_potsdam/fcn_hr18s_512x512_80k_potsdam_20211218_205517-ba32af63.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_potsdam/fcn_hr18s_512x512_80k_potsdam_20211218_205517.log.json) | +| FCN | HRNetV2p-W18 | 512x512 | 80000 | 2.76 | 19.25 | V100 | 78.26 | 79.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18_4xb4-80k_potsdam-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_potsdam/fcn_hr18_512x512_80k_potsdam_20211218_205517-5d0387ad.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_potsdam/fcn_hr18_512x512_80k_potsdam_20211218_205517.log.json) | +| FCN | HRNetV2p-W48 | 512x512 | 80000 | 6.20 | 16.42 | V100 | 78.39 | 79.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr48_4xb4-80k_potsdam-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_potsdam/fcn_hr48_512x512_80k_potsdam_20211219_020601-97434c78.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_potsdam/fcn_hr48_512x512_80k_potsdam_20211219_020601.log.json) | ### Vaihingen -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | ------------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| FCN | HRNetV2p-W18-Small | 512x512 | 80000 | 1.58 | 38.11 | 71.81 | 73.1 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr18s_4xb4-80k_vaihingen-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_512x512_80k_vaihingen/fcn_hr18s_4x4_512x512_80k_vaihingen_20211231_230909-b23aae02.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_512x512_80k_vaihingen/fcn_hr18s_4x4_512x512_80k_vaihingen_20211231_230909.log.json) | -| FCN | HRNetV2p-W18 | 512x512 | 80000 | 2.76 | 19.55 | 72.57 | 74.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr18_4xb4-80k_vaihingen-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_512x512_80k_vaihingen/fcn_hr18_4x4_512x512_80k_vaihingen_20211231_231216-2ec3ae8a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_512x512_80k_vaihingen/fcn_hr18_4x4_512x512_80k_vaihingen_20211231_231216.log.json) | -| FCN | HRNetV2p-W48 | 512x512 | 80000 | 6.20 | 17.25 | 72.50 | 73.52 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr48_4xb4-80k_vaihingen-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_512x512_80k_vaihingen/fcn_hr48_4x4_512x512_80k_vaihingen_20211231_231244-7133cb22.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_512x512_80k_vaihingen/fcn_hr48_4x4_512x512_80k_vaihingen_20211231_231244.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FCN | HRNetV2p-W18-Small | 512x512 | 80000 | 1.58 | 38.11 | V100 | 71.81 | 73.1 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18s_4xb4-80k_vaihingen-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_512x512_80k_vaihingen/fcn_hr18s_4x4_512x512_80k_vaihingen_20211231_230909-b23aae02.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_512x512_80k_vaihingen/fcn_hr18s_4x4_512x512_80k_vaihingen_20211231_230909.log.json) | +| FCN | HRNetV2p-W18 | 512x512 | 80000 | 2.76 | 19.55 | V100 | 72.57 | 74.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18_4xb4-80k_vaihingen-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_512x512_80k_vaihingen/fcn_hr18_4x4_512x512_80k_vaihingen_20211231_231216-2ec3ae8a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_512x512_80k_vaihingen/fcn_hr18_4x4_512x512_80k_vaihingen_20211231_231216.log.json) | +| FCN | HRNetV2p-W48 | 512x512 | 80000 | 6.20 | 17.25 | V100 | 72.50 | 73.52 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr48_4xb4-80k_vaihingen-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_512x512_80k_vaihingen/fcn_hr48_4x4_512x512_80k_vaihingen_20211231_231244-7133cb22.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_512x512_80k_vaihingen/fcn_hr48_4x4_512x512_80k_vaihingen_20211231_231244.log.json) | ### iSAID -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | ------------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| FCN | HRNetV2p-W18-Small | 896x896 | 80000 | 4.95 | 13.84 | 62.30 | 62.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr18s_4xb4-80k_isaid-896x896.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_896x896_80k_isaid/fcn_hr18s_4x4_896x896_80k_isaid_20220118_001603-3cc0769b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_896x896_80k_isaid/fcn_hr18s_4x4_896x896_80k_isaid_20220118_001603.log.json) | -| FCN | HRNetV2p-W18 | 896x896 | 80000 | 8.30 | 7.71 | 65.06 | 65.60 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr18_4xb4-80k_isaid-896x896.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_896x896_80k_isaid/fcn_hr18_4x4_896x896_80k_isaid_20220110_182230-49bf752e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_896x896_80k_isaid/fcn_hr18_4x4_896x896_80k_isaid_20220110_182230.log.json) | -| FCN | HRNetV2p-W48 | 896x896 | 80000 | 16.89 | 7.34 | 67.80 | 68.53 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/hrnet/fcn_hr48_4xb4-80k_isaid-896x896.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_896x896_80k_isaid/fcn_hr48_4x4_896x896_80k_isaid_20220114_174643-547fc420.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_896x896_80k_isaid/fcn_hr48_4x4_896x896_80k_isaid_20220114_174643.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | HRNetV2p-W18-Small | 896x896 | 80000 | 4.95 | 13.84 | V100 | 62.30 | 62.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18s_4xb4-80k_isaid-896x896.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_896x896_80k_isaid/fcn_hr18s_4x4_896x896_80k_isaid_20220118_001603-3cc0769b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_896x896_80k_isaid/fcn_hr18s_4x4_896x896_80k_isaid_20220118_001603.log.json) | +| FCN | HRNetV2p-W18 | 896x896 | 80000 | 8.30 | 7.71 | V100 | 65.06 | 65.60 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr18_4xb4-80k_isaid-896x896.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_896x896_80k_isaid/fcn_hr18_4x4_896x896_80k_isaid_20220110_182230-49bf752e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_896x896_80k_isaid/fcn_hr18_4x4_896x896_80k_isaid_20220110_182230.log.json) | +| FCN | HRNetV2p-W48 | 896x896 | 80000 | 16.89 | 7.34 | V100 | 67.80 | 68.53 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet/fcn_hr48_4xb4-80k_isaid-896x896.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_896x896_80k_isaid/fcn_hr48_4x4_896x896_80k_isaid_20220114_174643-547fc420.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_896x896_80k_isaid/fcn_hr48_4x4_896x896_80k_isaid_20220114_174643.log.json) | Note: - `896x896` is the Crop Size of iSAID dataset, which is followed by the implementation of [PointFlow: Flowing Semantics Through Points for Aerial Image Segmentation](https://arxiv.org/pdf/2103.06564.pdf) + +## Citation + +```bibtext +@inproceedings{SunXLW19, + title={Deep High-Resolution Representation Learning for Human Pose Estimation}, + author={Ke Sun and Bin Xiao and Dong Liu and Jingdong Wang}, + booktitle={CVPR}, + year={2019} +} +``` diff --git a/configs/hrnet/hrnet.yml b/configs/hrnet/hrnet.yml deleted file mode 100644 index 77f556e17a..0000000000 --- a/configs/hrnet/hrnet.yml +++ /dev/null @@ -1,695 +0,0 @@ -Models: -- Name: fcn_hr18s_4xb2-40k_cityscapes-512x1024 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W18-Small - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 42.12 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 1.7 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 73.86 - mIoU(ms+flip): 75.91 - Config: configs/hrnet/fcn_hr18s_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_40k_cityscapes/fcn_hr18s_512x1024_40k_cityscapes_20200601_014216-93db27d0.pth -- Name: fcn_hr18_4xb2-40k_cityscapes-512x1024 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W18 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 77.1 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 2.9 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 77.19 - mIoU(ms+flip): 78.92 - Config: configs/hrnet/fcn_hr18_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_40k_cityscapes/fcn_hr18_512x1024_40k_cityscapes_20200601_014216-f196fb4e.pth -- Name: fcn_hr48_4xb2-40k_cityscapes-512x1024 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W48 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 155.76 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 6.2 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.48 - mIoU(ms+flip): 79.69 - Config: configs/hrnet/fcn_hr48_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_40k_cityscapes/fcn_hr48_512x1024_40k_cityscapes_20200601_014240-a989b146.pth -- Name: fcn_hr18s_4xb2-80k_cityscapes-512x1024 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W18-Small - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 75.31 - mIoU(ms+flip): 77.48 - Config: configs/hrnet/fcn_hr18s_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_80k_cityscapes/fcn_hr18s_512x1024_80k_cityscapes_20200601_202700-1462b75d.pth -- Name: fcn_hr18_4xb2-80k_cityscapes-512x1024 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W18 - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.65 - mIoU(ms+flip): 80.35 - Config: configs/hrnet/fcn_hr18_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_80k_cityscapes/fcn_hr18_512x1024_80k_cityscapes_20200601_223255-4e7b345e.pth -- Name: fcn_hr48_4xb2-80k_cityscapes-512x1024 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W48 - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.93 - mIoU(ms+flip): 80.72 - Config: configs/hrnet/fcn_hr48_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_80k_cityscapes/fcn_hr48_512x1024_80k_cityscapes_20200601_202606-58ea95d6.pth -- Name: fcn_hr18s_4xb2-160k_cityscapes-512x1024 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W18-Small - crop size: (512,1024) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 76.31 - mIoU(ms+flip): 78.31 - Config: configs/hrnet/fcn_hr18s_4xb2-160k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_160k_cityscapes/fcn_hr18s_512x1024_160k_cityscapes_20200602_190901-4a0797ea.pth -- Name: fcn_hr18_4xb2-160k_cityscapes-512x1024 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W18 - crop size: (512,1024) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.8 - mIoU(ms+flip): 80.74 - Config: configs/hrnet/fcn_hr18_4xb2-160k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_160k_cityscapes/fcn_hr18_512x1024_160k_cityscapes_20200602_190822-221e4a4f.pth -- Name: fcn_hr48_4xb2-160k_cityscapes-512x1024 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W48 - crop size: (512,1024) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 80.65 - mIoU(ms+flip): 81.92 - Config: configs/hrnet/fcn_hr48_4xb2-160k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_160k_cityscapes/fcn_hr48_512x1024_160k_cityscapes_20200602_190946-59b7973e.pth -- Name: fcn_hr18s_4xb4-80k_ade20k-512x512 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W18-Small - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 25.87 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 3.8 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 31.38 - mIoU(ms+flip): 32.45 - Config: configs/hrnet/fcn_hr18s_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_ade20k/fcn_hr18s_512x512_80k_ade20k_20200614_144345-77fc814a.pth -- Name: fcn_hr18_4xb4-80k_ade20k-512x512 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W18 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 44.31 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 4.9 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 36.27 - mIoU(ms+flip): 37.28 - Config: configs/hrnet/fcn_hr18_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_ade20k/fcn_hr18_512x512_80k_ade20k_20210827_114910-6c9382c0.pth -- Name: fcn_hr48_4xb4-80k_ade20k-512x512 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W48 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 47.1 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 8.2 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 41.9 - mIoU(ms+flip): 43.27 - Config: configs/hrnet/fcn_hr48_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_ade20k/fcn_hr48_512x512_80k_ade20k_20200614_193946-7ba5258d.pth -- Name: fcn_hr18s_4xb4-160k_ade20k-512x512 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W18-Small - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 33.07 - mIoU(ms+flip): 34.56 - Config: configs/hrnet/fcn_hr18s_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_160k_ade20k/fcn_hr18s_512x512_160k_ade20k_20210829_174739-f1e7c2e7.pth -- Name: fcn_hr18_4xb4-160k_ade20k-512x512 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W18 - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 36.79 - mIoU(ms+flip): 38.58 - Config: configs/hrnet/fcn_hr18_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_160k_ade20k/fcn_hr18_512x512_160k_ade20k_20200614_214426-ca961836.pth -- Name: fcn_hr48_4xb4-160k_ade20k-512x512 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W48 - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 42.02 - mIoU(ms+flip): 43.86 - Config: configs/hrnet/fcn_hr48_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_160k_ade20k/fcn_hr48_512x512_160k_ade20k_20200614_214407-a52fc02c.pth -- Name: fcn_hr18s_4xb4-20k_voc12aug-512x512 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W18-Small - crop size: (512,512) - lr schd: 20000 - inference time (ms/im): - - value: 23.06 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 1.8 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 65.5 - mIoU(ms+flip): 68.89 - Config: configs/hrnet/fcn_hr18s_4xb4-20k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_20k_voc12aug/fcn_hr18s_512x512_20k_voc12aug_20210829_174910-0aceadb4.pth -- Name: fcn_hr18_4xb4-20k_voc12aug-512x512 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W18 - crop size: (512,512) - lr schd: 20000 - inference time (ms/im): - - value: 42.59 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 2.9 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 72.3 - mIoU(ms+flip): 74.71 - Config: configs/hrnet/fcn_hr18_4xb4-20k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_20k_voc12aug/fcn_hr18_512x512_20k_voc12aug_20200617_224503-488d45f7.pth -- Name: fcn_hr48_4xb4-20k_voc12aug-512x512 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W48 - crop size: (512,512) - lr schd: 20000 - inference time (ms/im): - - value: 45.35 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 6.2 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 75.87 - mIoU(ms+flip): 78.58 - Config: configs/hrnet/fcn_hr48_4xb4-20k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_20k_voc12aug/fcn_hr48_512x512_20k_voc12aug_20200617_224419-89de05cd.pth -- Name: fcn_hr18s_4xb4-40k_voc12aug-512x512 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W18-Small - crop size: (512,512) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 66.61 - mIoU(ms+flip): 70.0 - Config: configs/hrnet/fcn_hr18s_4xb4-40k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_40k_voc12aug/fcn_hr18s_512x512_40k_voc12aug_20200614_000648-4f8d6e7f.pth -- Name: fcn_hr18_4xb4-40k_voc12aug-512x512 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W18 - crop size: (512,512) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 72.9 - mIoU(ms+flip): 75.59 - Config: configs/hrnet/fcn_hr18_4xb4-40k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_40k_voc12aug/fcn_hr18_512x512_40k_voc12aug_20200613_224401-1b4b76cd.pth -- Name: fcn_hr48_4xb4-40k_voc12aug-512x512 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W48 - crop size: (512,512) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 76.24 - mIoU(ms+flip): 78.49 - Config: configs/hrnet/fcn_hr48_4xb4-40k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_40k_voc12aug/fcn_hr48_512x512_40k_voc12aug_20200613_222111-1b0f18bc.pth -- Name: fcn_hr48_4xb4-40k_pascal-context-480x480 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W48 - crop size: (480,480) - lr schd: 40000 - inference time (ms/im): - - value: 112.87 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (480,480) - Training Memory (GB): 6.1 - Results: - - Task: Semantic Segmentation - Dataset: Pascal Context - Metrics: - mIoU: 45.14 - mIoU(ms+flip): 47.42 - Config: configs/hrnet/fcn_hr48_4xb4-40k_pascal-context-480x480.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context/fcn_hr48_480x480_40k_pascal_context_20200911_164852-667d00b0.pth -- Name: fcn_hr48_4xb4-80k_pascal-context-480x480 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W48 - crop size: (480,480) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal Context - Metrics: - mIoU: 45.84 - mIoU(ms+flip): 47.84 - Config: configs/hrnet/fcn_hr48_4xb4-80k_pascal-context-480x480.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context/fcn_hr48_480x480_80k_pascal_context_20200911_155322-847a6711.pth -- Name: fcn_hr48_4xb4-40k_pascal-context-59-480x480 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W48 - crop size: (480,480) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal Context 59 - Metrics: - mIoU: 50.33 - mIoU(ms+flip): 52.83 - Config: configs/hrnet/fcn_hr48_4xb4-40k_pascal-context-59-480x480.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context_59/fcn_hr48_480x480_40k_pascal_context_59_20210410_122738-b808b8b2.pth -- Name: fcn_hr48_4xb4-80k_pascal-context-59-480x480 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W48 - crop size: (480,480) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal Context 59 - Metrics: - mIoU: 51.12 - mIoU(ms+flip): 53.56 - Config: configs/hrnet/fcn_hr48_4xb4-80k_pascal-context-59-480x480.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context_59/fcn_hr48_480x480_80k_pascal_context_59_20210411_003240-3ae7081e.pth -- Name: fcn_hr18s_4xb4-80k_loveda-512x512 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W18-Small - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 40.21 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 1.59 - Results: - - Task: Semantic Segmentation - Dataset: LoveDA - Metrics: - mIoU: 49.28 - mIoU(ms+flip): 49.42 - Config: configs/hrnet/fcn_hr18s_4xb4-80k_loveda-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_loveda/fcn_hr18s_512x512_80k_loveda_20211210_203228-60a86a7a.pth -- Name: fcn_hr18_4xb4-80k_loveda-512x512 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W18 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 77.4 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 2.76 - Results: - - Task: Semantic Segmentation - Dataset: LoveDA - Metrics: - mIoU: 50.81 - mIoU(ms+flip): 50.95 - Config: configs/hrnet/fcn_hr18_4xb4-80k_loveda-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_loveda/fcn_hr18_512x512_80k_loveda_20211210_203952-93d9c3b3.pth -- Name: fcn_hr48_4xb4-80k_loveda-512x512 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W48 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 104.06 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 6.2 - Results: - - Task: Semantic Segmentation - Dataset: LoveDA - Metrics: - mIoU: 51.42 - mIoU(ms+flip): 51.64 - Config: configs/hrnet/fcn_hr48_4xb4-80k_loveda-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_loveda/fcn_hr48_512x512_80k_loveda_20211211_044756-67072f55.pth -- Name: fcn_hr18s_4xb4-80k_potsdam-512x512 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W18-Small - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 27.78 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 1.58 - Results: - - Task: Semantic Segmentation - Dataset: Potsdam - Metrics: - mIoU: 77.64 - mIoU(ms+flip): 78.8 - Config: configs/hrnet/fcn_hr18s_4xb4-80k_potsdam-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_potsdam/fcn_hr18s_512x512_80k_potsdam_20211218_205517-ba32af63.pth -- Name: fcn_hr18_4xb4-80k_potsdam-512x512 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W18 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 51.95 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 2.76 - Results: - - Task: Semantic Segmentation - Dataset: Potsdam - Metrics: - mIoU: 78.26 - mIoU(ms+flip): 79.24 - Config: configs/hrnet/fcn_hr18_4xb4-80k_potsdam-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_potsdam/fcn_hr18_512x512_80k_potsdam_20211218_205517-5d0387ad.pth -- Name: fcn_hr48_4xb4-80k_potsdam-512x512 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W48 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 60.9 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 6.2 - Results: - - Task: Semantic Segmentation - Dataset: Potsdam - Metrics: - mIoU: 78.39 - mIoU(ms+flip): 79.34 - Config: configs/hrnet/fcn_hr48_4xb4-80k_potsdam-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_potsdam/fcn_hr48_512x512_80k_potsdam_20211219_020601-97434c78.pth -- Name: fcn_hr18s_4xb4-80k_vaihingen-512x512 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W18-Small - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 26.24 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 1.58 - Results: - - Task: Semantic Segmentation - Dataset: Vaihingen - Metrics: - mIoU: 71.81 - mIoU(ms+flip): 73.1 - Config: configs/hrnet/fcn_hr18s_4xb4-80k_vaihingen-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_512x512_80k_vaihingen/fcn_hr18s_4x4_512x512_80k_vaihingen_20211231_230909-b23aae02.pth -- Name: fcn_hr18_4xb4-80k_vaihingen-512x512 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W18 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 51.15 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 2.76 - Results: - - Task: Semantic Segmentation - Dataset: Vaihingen - Metrics: - mIoU: 72.57 - mIoU(ms+flip): 74.09 - Config: configs/hrnet/fcn_hr18_4xb4-80k_vaihingen-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_512x512_80k_vaihingen/fcn_hr18_4x4_512x512_80k_vaihingen_20211231_231216-2ec3ae8a.pth -- Name: fcn_hr48_4xb4-80k_vaihingen-512x512 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W48 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 57.97 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 6.2 - Results: - - Task: Semantic Segmentation - Dataset: Vaihingen - Metrics: - mIoU: 72.5 - mIoU(ms+flip): 73.52 - Config: configs/hrnet/fcn_hr48_4xb4-80k_vaihingen-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_512x512_80k_vaihingen/fcn_hr48_4x4_512x512_80k_vaihingen_20211231_231244-7133cb22.pth -- Name: fcn_hr18s_4xb4-80k_isaid-896x896 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W18-Small - crop size: (896,896) - lr schd: 80000 - inference time (ms/im): - - value: 72.25 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (896,896) - Training Memory (GB): 4.95 - Results: - - Task: Semantic Segmentation - Dataset: iSAID - Metrics: - mIoU: 62.3 - mIoU(ms+flip): 62.97 - Config: configs/hrnet/fcn_hr18s_4xb4-80k_isaid-896x896.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_896x896_80k_isaid/fcn_hr18s_4x4_896x896_80k_isaid_20220118_001603-3cc0769b.pth -- Name: fcn_hr18_4xb4-80k_isaid-896x896 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W18 - crop size: (896,896) - lr schd: 80000 - inference time (ms/im): - - value: 129.7 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (896,896) - Training Memory (GB): 8.3 - Results: - - Task: Semantic Segmentation - Dataset: iSAID - Metrics: - mIoU: 65.06 - mIoU(ms+flip): 65.6 - Config: configs/hrnet/fcn_hr18_4xb4-80k_isaid-896x896.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_896x896_80k_isaid/fcn_hr18_4x4_896x896_80k_isaid_20220110_182230-49bf752e.pth -- Name: fcn_hr48_4xb4-80k_isaid-896x896 - In Collection: FCN - Metadata: - backbone: HRNetV2p-W48 - crop size: (896,896) - lr schd: 80000 - inference time (ms/im): - - value: 136.24 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (896,896) - Training Memory (GB): 16.89 - Results: - - Task: Semantic Segmentation - Dataset: iSAID - Metrics: - mIoU: 67.8 - mIoU(ms+flip): 68.53 - Config: configs/hrnet/fcn_hr48_4xb4-80k_isaid-896x896.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_896x896_80k_isaid/fcn_hr48_4x4_896x896_80k_isaid_20220114_174643-547fc420.pth diff --git a/configs/hrnet/metafile.yaml b/configs/hrnet/metafile.yaml new file mode 100644 index 0000000000..11c30165a5 --- /dev/null +++ b/configs/hrnet/metafile.yaml @@ -0,0 +1,874 @@ +Models: +- Name: fcn_hr18s_4xb2-40k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.86 + mIoU(ms+flip): 75.91 + Config: configs/hrnet/fcn_hr18s_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - HRNetV2p-W18-Small + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 1.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_40k_cityscapes/fcn_hr18s_512x1024_40k_cityscapes_20200601_014216-93db27d0.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_40k_cityscapes/fcn_hr18s_512x1024_40k_cityscapes_20200601_014216.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18_4xb2-40k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.19 + mIoU(ms+flip): 78.92 + Config: configs/hrnet/fcn_hr18_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - HRNetV2p-W18 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 2.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_40k_cityscapes/fcn_hr18_512x1024_40k_cityscapes_20200601_014216-f196fb4e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_40k_cityscapes/fcn_hr18_512x1024_40k_cityscapes_20200601_014216.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr48_4xb2-40k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.48 + mIoU(ms+flip): 79.69 + Config: configs/hrnet/fcn_hr48_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - HRNetV2p-W48 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 6.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_40k_cityscapes/fcn_hr48_512x1024_40k_cityscapes_20200601_014240-a989b146.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_40k_cityscapes/fcn_hr48_512x1024_40k_cityscapes_20200601_014240.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18s_4xb2-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.31 + mIoU(ms+flip): 77.48 + Config: configs/hrnet/fcn_hr18s_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - HRNetV2p-W18-Small + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_80k_cityscapes/fcn_hr18s_512x1024_80k_cityscapes_20200601_202700-1462b75d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_80k_cityscapes/fcn_hr18s_512x1024_80k_cityscapes_20200601_202700.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18_4xb2-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.65 + mIoU(ms+flip): 80.35 + Config: configs/hrnet/fcn_hr18_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - HRNetV2p-W18 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_80k_cityscapes/fcn_hr18_512x1024_80k_cityscapes_20200601_223255-4e7b345e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_80k_cityscapes/fcn_hr18_512x1024_80k_cityscapes_20200601_223255.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr48_4xb2-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.93 + mIoU(ms+flip): 80.72 + Config: configs/hrnet/fcn_hr48_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - HRNetV2p-W48 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_80k_cityscapes/fcn_hr48_512x1024_80k_cityscapes_20200601_202606-58ea95d6.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_80k_cityscapes/fcn_hr48_512x1024_80k_cityscapes_20200601_202606.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18s_4xb2-160k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.31 + mIoU(ms+flip): 78.31 + Config: configs/hrnet/fcn_hr18s_4xb2-160k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - HRNetV2p-W18-Small + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_160k_cityscapes/fcn_hr18s_512x1024_160k_cityscapes_20200602_190901-4a0797ea.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_160k_cityscapes/fcn_hr18s_512x1024_160k_cityscapes_20200602_190901.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18_4xb2-160k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.8 + mIoU(ms+flip): 80.74 + Config: configs/hrnet/fcn_hr18_4xb2-160k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - HRNetV2p-W18 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_160k_cityscapes/fcn_hr18_512x1024_160k_cityscapes_20200602_190822-221e4a4f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_160k_cityscapes/fcn_hr18_512x1024_160k_cityscapes_20200602_190822.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr48_4xb2-160k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.65 + mIoU(ms+flip): 81.92 + Config: configs/hrnet/fcn_hr48_4xb2-160k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - HRNetV2p-W48 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_160k_cityscapes/fcn_hr48_512x1024_160k_cityscapes_20200602_190946-59b7973e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_160k_cityscapes/fcn_hr48_512x1024_160k_cityscapes_20200602_190946.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18s_4xb4-80k_ade20k-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 31.38 + mIoU(ms+flip): 32.45 + Config: configs/hrnet/fcn_hr18s_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - HRNetV2p-W18-Small + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 3.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_ade20k/fcn_hr18s_512x512_80k_ade20k_20200614_144345-77fc814a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_ade20k/fcn_hr18s_512x512_80k_ade20k_20200614_144345.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18_4xb4-80k_ade20k-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 36.27 + mIoU(ms+flip): 37.28 + Config: configs/hrnet/fcn_hr18_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - HRNetV2p-W18 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 4.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_ade20k/fcn_hr18_512x512_80k_ade20k_20210827_114910-6c9382c0.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_ade20k/fcn_hr18_512x512_80k_ade20k_20210827_114910.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr48_4xb4-80k_ade20k-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.9 + mIoU(ms+flip): 43.27 + Config: configs/hrnet/fcn_hr48_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 8.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_ade20k/fcn_hr48_512x512_80k_ade20k_20200614_193946-7ba5258d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_ade20k/fcn_hr48_512x512_80k_ade20k_20200614_193946.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18s_4xb4-160k_ade20k-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 33.07 + mIoU(ms+flip): 34.56 + Config: configs/hrnet/fcn_hr18s_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - HRNetV2p-W18-Small + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_160k_ade20k/fcn_hr18s_512x512_160k_ade20k_20210829_174739-f1e7c2e7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_160k_ade20k/fcn_hr18s_512x512_160k_ade20k_20210829_174739.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18_4xb4-160k_ade20k-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 36.79 + mIoU(ms+flip): 38.58 + Config: configs/hrnet/fcn_hr18_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - HRNetV2p-W18 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_160k_ade20k/fcn_hr18_512x512_160k_ade20k_20200614_214426-ca961836.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_160k_ade20k/fcn_hr18_512x512_160k_ade20k_20200614_214426.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr48_4xb4-160k_ade20k-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.02 + mIoU(ms+flip): 43.86 + Config: configs/hrnet/fcn_hr48_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_160k_ade20k/fcn_hr48_512x512_160k_ade20k_20200614_214407-a52fc02c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_160k_ade20k/fcn_hr48_512x512_160k_ade20k_20200614_214407.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18s_4xb4-20k_voc12aug-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 65.5 + mIoU(ms+flip): 68.89 + Config: configs/hrnet/fcn_hr18s_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - HRNetV2p-W18-Small + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 1.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_20k_voc12aug/fcn_hr18s_512x512_20k_voc12aug_20210829_174910-0aceadb4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_20k_voc12aug/fcn_hr18s_512x512_20k_voc12aug_20210829_174910.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18_4xb4-20k_voc12aug-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 72.3 + mIoU(ms+flip): 74.71 + Config: configs/hrnet/fcn_hr18_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - HRNetV2p-W18 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 2.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_20k_voc12aug/fcn_hr18_512x512_20k_voc12aug_20200617_224503-488d45f7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_20k_voc12aug/fcn_hr18_512x512_20k_voc12aug_20200617_224503.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr48_4xb4-20k_voc12aug-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 75.87 + mIoU(ms+flip): 78.58 + Config: configs/hrnet/fcn_hr48_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 6.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_20k_voc12aug/fcn_hr48_512x512_20k_voc12aug_20200617_224419-89de05cd.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_20k_voc12aug/fcn_hr48_512x512_20k_voc12aug_20200617_224419.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18s_4xb4-40k_voc12aug-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 66.61 + mIoU(ms+flip): 70.0 + Config: configs/hrnet/fcn_hr18s_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - HRNetV2p-W18-Small + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_40k_voc12aug/fcn_hr18s_512x512_40k_voc12aug_20200614_000648-4f8d6e7f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_40k_voc12aug/fcn_hr18s_512x512_40k_voc12aug_20200614_000648.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18_4xb4-40k_voc12aug-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 72.9 + mIoU(ms+flip): 75.59 + Config: configs/hrnet/fcn_hr18_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - HRNetV2p-W18 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_40k_voc12aug/fcn_hr18_512x512_40k_voc12aug_20200613_224401-1b4b76cd.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_40k_voc12aug/fcn_hr18_512x512_40k_voc12aug_20200613_224401.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr48_4xb4-40k_voc12aug-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.24 + mIoU(ms+flip): 78.49 + Config: configs/hrnet/fcn_hr48_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_40k_voc12aug/fcn_hr48_512x512_40k_voc12aug_20200613_222111-1b0f18bc.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_40k_voc12aug/fcn_hr48_512x512_40k_voc12aug_20200613_222111.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr48_4xb4-40k_pascal-context-480x480 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal Context + Metrics: + mIoU: 45.14 + mIoU(ms+flip): 47.42 + Config: configs/hrnet/fcn_hr48_4xb4-40k_pascal-context-480x480.py + Metadata: + Training Data: Pascal Context + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 6.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context/fcn_hr48_480x480_40k_pascal_context_20200911_164852-667d00b0.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context/fcn_hr48_480x480_40k_pascal_context-20200911_164852.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr48_4xb4-80k_pascal-context-480x480 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal Context + Metrics: + mIoU: 45.84 + mIoU(ms+flip): 47.84 + Config: configs/hrnet/fcn_hr48_4xb4-80k_pascal-context-480x480.py + Metadata: + Training Data: Pascal Context + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context/fcn_hr48_480x480_80k_pascal_context_20200911_155322-847a6711.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context/fcn_hr48_480x480_80k_pascal_context-20200911_155322.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr48_4xb4-40k_pascal-context-59-480x480 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal Context 59 + Metrics: + mIoU: 50.33 + mIoU(ms+flip): 52.83 + Config: configs/hrnet/fcn_hr48_4xb4-40k_pascal-context-59-480x480.py + Metadata: + Training Data: Pascal Context 59 + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context_59/fcn_hr48_480x480_40k_pascal_context_59_20210410_122738-b808b8b2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context_59/fcn_hr48_480x480_40k_pascal_context_59-20210410_122738.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr48_4xb4-80k_pascal-context-59-480x480 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Pascal Context 59 + Metrics: + mIoU: 51.12 + mIoU(ms+flip): 53.56 + Config: configs/hrnet/fcn_hr48_4xb4-80k_pascal-context-59-480x480.py + Metadata: + Training Data: Pascal Context 59 + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - FCN + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context_59/fcn_hr48_480x480_80k_pascal_context_59_20210411_003240-3ae7081e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context_59/fcn_hr48_480x480_80k_pascal_context_59-20210411_003240.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18s_4xb4-80k_loveda-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: LoveDA + Metrics: + mIoU: 49.28 + mIoU(ms+flip): 49.42 + Config: configs/hrnet/fcn_hr18s_4xb4-80k_loveda-512x512.py + Metadata: + Training Data: LoveDA + Batch Size: 16 + Architecture: + - HRNetV2p-W18-Small + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 1.59 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_loveda/fcn_hr18s_512x512_80k_loveda_20211210_203228-60a86a7a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_loveda/fcn_hr18s_512x512_80k_loveda_20211210_203228.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18_4xb4-80k_loveda-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: LoveDA + Metrics: + mIoU: 50.81 + mIoU(ms+flip): 50.95 + Config: configs/hrnet/fcn_hr18_4xb4-80k_loveda-512x512.py + Metadata: + Training Data: LoveDA + Batch Size: 16 + Architecture: + - HRNetV2p-W18 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 2.76 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_loveda/fcn_hr18_512x512_80k_loveda_20211210_203952-93d9c3b3.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_loveda/fcn_hr18_512x512_80k_loveda_20211210_203952.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr48_4xb4-80k_loveda-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: LoveDA + Metrics: + mIoU: 51.42 + mIoU(ms+flip): 51.64 + Config: configs/hrnet/fcn_hr48_4xb4-80k_loveda-512x512.py + Metadata: + Training Data: LoveDA + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 6.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_loveda/fcn_hr48_512x512_80k_loveda_20211211_044756-67072f55.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_loveda/fcn_hr48_512x512_80k_loveda_20211211_044756.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18s_4xb4-80k_potsdam-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Potsdam + Metrics: + mIoU: 77.64 + mIoU(ms+flip): 78.8 + Config: configs/hrnet/fcn_hr18s_4xb4-80k_potsdam-512x512.py + Metadata: + Training Data: Potsdam + Batch Size: 16 + Architecture: + - HRNetV2p-W18-Small + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 1.58 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_potsdam/fcn_hr18s_512x512_80k_potsdam_20211218_205517-ba32af63.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_potsdam/fcn_hr18s_512x512_80k_potsdam_20211218_205517.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18_4xb4-80k_potsdam-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Potsdam + Metrics: + mIoU: 78.26 + mIoU(ms+flip): 79.24 + Config: configs/hrnet/fcn_hr18_4xb4-80k_potsdam-512x512.py + Metadata: + Training Data: Potsdam + Batch Size: 16 + Architecture: + - HRNetV2p-W18 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 2.76 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_potsdam/fcn_hr18_512x512_80k_potsdam_20211218_205517-5d0387ad.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_potsdam/fcn_hr18_512x512_80k_potsdam_20211218_205517.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr48_4xb4-80k_potsdam-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Potsdam + Metrics: + mIoU: 78.39 + mIoU(ms+flip): 79.34 + Config: configs/hrnet/fcn_hr48_4xb4-80k_potsdam-512x512.py + Metadata: + Training Data: Potsdam + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 6.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_potsdam/fcn_hr48_512x512_80k_potsdam_20211219_020601-97434c78.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_potsdam/fcn_hr48_512x512_80k_potsdam_20211219_020601.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18s_4xb4-80k_vaihingen-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Vaihingen + Metrics: + mIoU: 71.81 + mIoU(ms+flip): 73.1 + Config: configs/hrnet/fcn_hr18s_4xb4-80k_vaihingen-512x512.py + Metadata: + Training Data: Vaihingen + Batch Size: 16 + Architecture: + - HRNetV2p-W18-Small + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 1.58 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_512x512_80k_vaihingen/fcn_hr18s_4x4_512x512_80k_vaihingen_20211231_230909-b23aae02.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_512x512_80k_vaihingen/fcn_hr18s_4x4_512x512_80k_vaihingen_20211231_230909.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18_4xb4-80k_vaihingen-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Vaihingen + Metrics: + mIoU: 72.57 + mIoU(ms+flip): 74.09 + Config: configs/hrnet/fcn_hr18_4xb4-80k_vaihingen-512x512.py + Metadata: + Training Data: Vaihingen + Batch Size: 16 + Architecture: + - HRNetV2p-W18 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 2.76 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_512x512_80k_vaihingen/fcn_hr18_4x4_512x512_80k_vaihingen_20211231_231216-2ec3ae8a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_512x512_80k_vaihingen/fcn_hr18_4x4_512x512_80k_vaihingen_20211231_231216.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr48_4xb4-80k_vaihingen-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Vaihingen + Metrics: + mIoU: 72.5 + mIoU(ms+flip): 73.52 + Config: configs/hrnet/fcn_hr48_4xb4-80k_vaihingen-512x512.py + Metadata: + Training Data: Vaihingen + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 6.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_512x512_80k_vaihingen/fcn_hr48_4x4_512x512_80k_vaihingen_20211231_231244-7133cb22.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_512x512_80k_vaihingen/fcn_hr48_4x4_512x512_80k_vaihingen_20211231_231244.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18s_4xb4-80k_isaid-896x896 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: iSAID + Metrics: + mIoU: 62.3 + mIoU(ms+flip): 62.97 + Config: configs/hrnet/fcn_hr18s_4xb4-80k_isaid-896x896.py + Metadata: + Training Data: iSAID + Batch Size: 16 + Architecture: + - HRNetV2p-W18-Small + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 4.95 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_896x896_80k_isaid/fcn_hr18s_4x4_896x896_80k_isaid_20220118_001603-3cc0769b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_896x896_80k_isaid/fcn_hr18s_4x4_896x896_80k_isaid_20220118_001603.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr18_4xb4-80k_isaid-896x896 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: iSAID + Metrics: + mIoU: 65.06 + mIoU(ms+flip): 65.6 + Config: configs/hrnet/fcn_hr18_4xb4-80k_isaid-896x896.py + Metadata: + Training Data: iSAID + Batch Size: 16 + Architecture: + - HRNetV2p-W18 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 8.3 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_896x896_80k_isaid/fcn_hr18_4x4_896x896_80k_isaid_20220110_182230-49bf752e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_896x896_80k_isaid/fcn_hr18_4x4_896x896_80k_isaid_20220110_182230.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch +- Name: fcn_hr48_4xb4-80k_isaid-896x896 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: iSAID + Metrics: + mIoU: 67.8 + mIoU(ms+flip): 68.53 + Config: configs/hrnet/fcn_hr48_4xb4-80k_isaid-896x896.py + Metadata: + Training Data: iSAID + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 16.89 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_896x896_80k_isaid/fcn_hr48_4x4_896x896_80k_isaid_20220114_174643-547fc420.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_896x896_80k_isaid/fcn_hr48_4x4_896x896_80k_isaid_20220114_174643.log.json + Paper: + Title: Deep High-Resolution Representation Learning for Human Pose Estimation + URL: https://arxiv.org/abs/1908.07919 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/hrnet.py#L218 + Framework: PyTorch diff --git a/configs/icnet/README.md b/configs/icnet/README.md index 134f53b29f..fa2327fc39 100644 --- a/configs/icnet/README.md +++ b/configs/icnet/README.md @@ -1,6 +1,6 @@ # ICNet -[ICNet for Real-time Semantic Segmentation on High-resolution Images](https://arxiv.org/abs/1704.08545) +> [ICNet for Real-time Semantic Segmentation on High-resolution Images](https://arxiv.org/abs/1704.08545) ## Introduction @@ -22,6 +22,27 @@ We focus on the challenging task of real-time semantic segmentation in this pape +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| ICNet | R-18-D8 | 832x832 | 80000 | 1.70 | 27.12 | V100 | 68.14 | 70.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/icnet/icnet_r18-d8_4xb2-80k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_80k_cityscapes/icnet_r18-d8_832x832_80k_cityscapes_20210925_225521-2e36638d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_80k_cityscapes/icnet_r18-d8_832x832_80k_cityscapes_20210925_225521.log.json) | +| ICNet | R-18-D8 | 832x832 | 160000 | - | - | V100 | 71.64 | 74.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/icnet/icnet_r18-d8_4xb2-160k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_160k_cityscapes/icnet_r18-d8_832x832_160k_cityscapes_20210925_230153-2c6eb6e0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_160k_cityscapes/icnet_r18-d8_832x832_160k_cityscapes_20210925_230153.log.json) | +| ICNet (in1k-pre) | R-18-D8 | 832x832 | 80000 | - | - | V100 | 72.51 | 74.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/icnet/icnet_r18-d8-in1k-pre_4xb2-80k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes_20210925_230354-1cbe3022.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes_20210925_230354.log.json) | +| ICNet (in1k-pre) | R-18-D8 | 832x832 | 160000 | - | - | V100 | 74.43 | 76.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/icnet/icnet_r18-d8-in1k-pre_4xb2-160k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes_20210926_052702-619c8ae1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes_20210926_052702.log.json) | +| ICNet | R-50-D8 | 832x832 | 80000 | 2.53 | 20.08 | V100 | 68.91 | 69.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/icnet/icnet_r50-d8_4xb2-80k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_80k_cityscapes/icnet_r50-d8_832x832_80k_cityscapes_20210926_044625-c6407341.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_80k_cityscapes/icnet_r50-d8_832x832_80k_cityscapes_20210926_044625.log.json) | +| ICNet | R-50-D8 | 832x832 | 160000 | - | - | V100 | 73.82 | 75.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/icnet/icnet_r50-d8_4xb2-160k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_160k_cityscapes/icnet_r50-d8_832x832_160k_cityscapes_20210925_232612-a95f0d4e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_160k_cityscapes/icnet_r50-d8_832x832_160k_cityscapes_20210925_232612.log.json) | +| ICNet (in1k-pre) | R-50-D8 | 832x832 | 80000 | - | - | V100 | 74.58 | 76.41 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/icnet/icnet_r50-d8-in1k-pre_4xb2-80k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes_20210926_032943-1743dc7b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes_20210926_032943.log.json) | +| ICNet (in1k-pre) | R-50-D8 | 832x832 | 160000 | - | - | V100 | 76.29 | 78.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/icnet/icnet_r50-d8-in1k-pre_4xb2-160k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes_20210926_042715-ce310aea.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes_20210926_042715.log.json) | +| ICNet | R-101-D8 | 832x832 | 80000 | 3.08 | 16.95 | V100 | 70.28 | 71.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/icnet/icnet_r101-d8_4xb2-80k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_80k_cityscapes/icnet_r101-d8_832x832_80k_cityscapes_20210926_072447-b52f936e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_80k_cityscapes/icnet_r101-d8_832x832_80k_cityscapes_20210926_072447.log.json) | +| ICNet | R-101-D8 | 832x832 | 160000 | - | - | V100 | 73.80 | 76.10 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/icnet/icnet_r101-d8_4xb2-160k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_160k_cityscapes/icnet_r101-d8_832x832_160k_cityscapes_20210926_092350-3a1ebf1a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_160k_cityscapes/icnet_r101-d8_832x832_160k_cityscapes_20210926_092350.log.json) | +| ICNet (in1k-pre) | R-101-D8 | 832x832 | 80000 | - | - | V100 | 75.57 | 77.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/icnet/icnet_r101-d8-in1k-pre_4xb2-80k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes_20210926_020414-7ceb12c5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes_20210926_020414.log.json) | +| ICNet (in1k-pre) | R-101-D8 | 832x832 | 160000 | - | - | V100 | 76.15 | 77.98 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/icnet/icnet_r101-d8-in1k-pre_4xb2-160k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes_20210925_232612-9484ae8a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes_20210925_232612.log.json) | + +Note: `in1k-pre` means pretrained model is used. + ## Citation ```bibtext @@ -33,24 +54,3 @@ We focus on the challenging task of real-time semantic segmentation in this pape year={2018} } ``` - -## Results and models - -### Cityscapes - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ---------------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| ICNet | R-18-D8 | 832x832 | 80000 | 1.70 | 27.12 | 68.14 | 70.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/icnet/icnet_r18-d8_4xb2-80k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_80k_cityscapes/icnet_r18-d8_832x832_80k_cityscapes_20210925_225521-2e36638d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_80k_cityscapes/icnet_r18-d8_832x832_80k_cityscapes_20210925_225521.log.json) | -| ICNet | R-18-D8 | 832x832 | 160000 | - | - | 71.64 | 74.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/icnet/icnet_r18-d8_4xb2-160k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_160k_cityscapes/icnet_r18-d8_832x832_160k_cityscapes_20210925_230153-2c6eb6e0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_160k_cityscapes/icnet_r18-d8_832x832_160k_cityscapes_20210925_230153.log.json) | -| ICNet (in1k-pre) | R-18-D8 | 832x832 | 80000 | - | - | 72.51 | 74.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/icnet/icnet_r18-d8-in1k-pre_4xb2-80k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes_20210925_230354-1cbe3022.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes_20210925_230354.log.json) | -| ICNet (in1k-pre) | R-18-D8 | 832x832 | 160000 | - | - | 74.43 | 76.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/icnet/icnet_r18-d8-in1k-pre_4xb2-160k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes_20210926_052702-619c8ae1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes_20210926_052702.log.json) | -| ICNet | R-50-D8 | 832x832 | 80000 | 2.53 | 20.08 | 68.91 | 69.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/icnet/icnet_r50-d8_4xb2-80k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_80k_cityscapes/icnet_r50-d8_832x832_80k_cityscapes_20210926_044625-c6407341.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_80k_cityscapes/icnet_r50-d8_832x832_80k_cityscapes_20210926_044625.log.json) | -| ICNet | R-50-D8 | 832x832 | 160000 | - | - | 73.82 | 75.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/icnet/icnet_r50-d8_4xb2-160k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_160k_cityscapes/icnet_r50-d8_832x832_160k_cityscapes_20210925_232612-a95f0d4e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_160k_cityscapes/icnet_r50-d8_832x832_160k_cityscapes_20210925_232612.log.json) | -| ICNet (in1k-pre) | R-50-D8 | 832x832 | 80000 | - | - | 74.58 | 76.41 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/icnet/icnet_r50-d8-in1k-pre_4xb2-80k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes_20210926_032943-1743dc7b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes_20210926_032943.log.json) | -| ICNet (in1k-pre) | R-50-D8 | 832x832 | 160000 | - | - | 76.29 | 78.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/icnet/icnet_r50-d8-in1k-pre_4xb2-160k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes_20210926_042715-ce310aea.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes_20210926_042715.log.json) | -| ICNet | R-101-D8 | 832x832 | 80000 | 3.08 | 16.95 | 70.28 | 71.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/icnet/icnet_r101-d8_4xb2-80k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_80k_cityscapes/icnet_r101-d8_832x832_80k_cityscapes_20210926_072447-b52f936e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_80k_cityscapes/icnet_r101-d8_832x832_80k_cityscapes_20210926_072447.log.json) | -| ICNet | R-101-D8 | 832x832 | 160000 | - | - | 73.80 | 76.10 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/icnet/icnet_r101-d8_4xb2-160k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_160k_cityscapes/icnet_r101-d8_832x832_160k_cityscapes_20210926_092350-3a1ebf1a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_160k_cityscapes/icnet_r101-d8_832x832_160k_cityscapes_20210926_092350.log.json) | -| ICNet (in1k-pre) | R-101-D8 | 832x832 | 80000 | - | - | 75.57 | 77.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/icnet/icnet_r101-d8-in1k-pre_4xb2-80k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes_20210926_020414-7ceb12c5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes_20210926_020414.log.json) | -| ICNet (in1k-pre) | R-101-D8 | 832x832 | 160000 | - | - | 76.15 | 77.98 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/icnet/icnet_r101-d8-in1k-pre_4xb2-160k_cityscapes-832x832.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes_20210925_232612-9484ae8a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes_20210925_232612.log.json) | - -Note: `in1k-pre` means pretrained model is used. diff --git a/configs/icnet/icnet.yml b/configs/icnet/icnet.yml deleted file mode 100644 index 5ded544726..0000000000 --- a/configs/icnet/icnet.yml +++ /dev/null @@ -1,207 +0,0 @@ -Collections: -- Name: ICNet - Metadata: - Training Data: - - Cityscapes - Paper: - URL: https://arxiv.org/abs/1704.08545 - Title: ICNet for Real-time Semantic Segmentation on High-resolution Images - README: configs/icnet/README.md - Code: - URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77 - Version: v0.18.0 - Converted From: - Code: https://github.com/hszhao/ICNet -Models: -- Name: icnet_r18-d8_4xb2-80k_cityscapes-832x832 - In Collection: ICNet - Metadata: - backbone: R-18-D8 - crop size: (832,832) - lr schd: 80000 - inference time (ms/im): - - value: 36.87 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (832,832) - Training Memory (GB): 1.7 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 68.14 - mIoU(ms+flip): 70.16 - Config: configs/icnet/icnet_r18-d8_4xb2-80k_cityscapes-832x832.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_80k_cityscapes/icnet_r18-d8_832x832_80k_cityscapes_20210925_225521-2e36638d.pth -- Name: icnet_r18-d8_4xb2-160k_cityscapes-832x832 - In Collection: ICNet - Metadata: - backbone: R-18-D8 - crop size: (832,832) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 71.64 - mIoU(ms+flip): 74.18 - Config: configs/icnet/icnet_r18-d8_4xb2-160k_cityscapes-832x832.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_160k_cityscapes/icnet_r18-d8_832x832_160k_cityscapes_20210925_230153-2c6eb6e0.pth -- Name: icnet_r18-d8-in1k-pre_4xb2-80k_cityscapes-832x832 - In Collection: ICNet - Metadata: - backbone: R-18-D8 - crop size: (832,832) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 72.51 - mIoU(ms+flip): 74.78 - Config: configs/icnet/icnet_r18-d8-in1k-pre_4xb2-80k_cityscapes-832x832.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes_20210925_230354-1cbe3022.pth -- Name: icnet_r18-d8-in1k-pre_4xb2-160k_cityscapes-832x832 - In Collection: ICNet - Metadata: - backbone: R-18-D8 - crop size: (832,832) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 74.43 - mIoU(ms+flip): 76.72 - Config: configs/icnet/icnet_r18-d8-in1k-pre_4xb2-160k_cityscapes-832x832.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes_20210926_052702-619c8ae1.pth -- Name: icnet_r50-d8_4xb2-80k_cityscapes-832x832 - In Collection: ICNet - Metadata: - backbone: R-50-D8 - crop size: (832,832) - lr schd: 80000 - inference time (ms/im): - - value: 49.8 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (832,832) - Training Memory (GB): 2.53 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 68.91 - mIoU(ms+flip): 69.72 - Config: configs/icnet/icnet_r50-d8_4xb2-80k_cityscapes-832x832.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_80k_cityscapes/icnet_r50-d8_832x832_80k_cityscapes_20210926_044625-c6407341.pth -- Name: icnet_r50-d8_4xb2-160k_cityscapes-832x832 - In Collection: ICNet - Metadata: - backbone: R-50-D8 - crop size: (832,832) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 73.82 - mIoU(ms+flip): 75.67 - Config: configs/icnet/icnet_r50-d8_4xb2-160k_cityscapes-832x832.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_160k_cityscapes/icnet_r50-d8_832x832_160k_cityscapes_20210925_232612-a95f0d4e.pth -- Name: icnet_r50-d8-in1k-pre_4xb2-80k_cityscapes-832x832 - In Collection: ICNet - Metadata: - backbone: R-50-D8 - crop size: (832,832) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 74.58 - mIoU(ms+flip): 76.41 - Config: configs/icnet/icnet_r50-d8-in1k-pre_4xb2-80k_cityscapes-832x832.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes_20210926_032943-1743dc7b.pth -- Name: icnet_r50-d8-in1k-pre_4xb2-160k_cityscapes-832x832 - In Collection: ICNet - Metadata: - backbone: R-50-D8 - crop size: (832,832) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 76.29 - mIoU(ms+flip): 78.09 - Config: configs/icnet/icnet_r50-d8-in1k-pre_4xb2-160k_cityscapes-832x832.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes_20210926_042715-ce310aea.pth -- Name: icnet_r101-d8_4xb2-80k_cityscapes-832x832 - In Collection: ICNet - Metadata: - backbone: R-101-D8 - crop size: (832,832) - lr schd: 80000 - inference time (ms/im): - - value: 59.0 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (832,832) - Training Memory (GB): 3.08 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 70.28 - mIoU(ms+flip): 71.95 - Config: configs/icnet/icnet_r101-d8_4xb2-80k_cityscapes-832x832.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_80k_cityscapes/icnet_r101-d8_832x832_80k_cityscapes_20210926_072447-b52f936e.pth -- Name: icnet_r101-d8_4xb2-160k_cityscapes-832x832 - In Collection: ICNet - Metadata: - backbone: R-101-D8 - crop size: (832,832) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 73.8 - mIoU(ms+flip): 76.1 - Config: configs/icnet/icnet_r101-d8_4xb2-160k_cityscapes-832x832.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_160k_cityscapes/icnet_r101-d8_832x832_160k_cityscapes_20210926_092350-3a1ebf1a.pth -- Name: icnet_r101-d8-in1k-pre_4xb2-80k_cityscapes-832x832 - In Collection: ICNet - Metadata: - backbone: R-101-D8 - crop size: (832,832) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 75.57 - mIoU(ms+flip): 77.86 - Config: configs/icnet/icnet_r101-d8-in1k-pre_4xb2-80k_cityscapes-832x832.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes_20210926_020414-7ceb12c5.pth -- Name: icnet_r101-d8-in1k-pre_4xb2-160k_cityscapes-832x832 - In Collection: ICNet - Metadata: - backbone: R-101-D8 - crop size: (832,832) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 76.15 - mIoU(ms+flip): 77.98 - Config: configs/icnet/icnet_r101-d8-in1k-pre_4xb2-160k_cityscapes-832x832.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes_20210925_232612-9484ae8a.pth diff --git a/configs/icnet/metafile.yaml b/configs/icnet/metafile.yaml new file mode 100644 index 0000000000..1d843ee4b6 --- /dev/null +++ b/configs/icnet/metafile.yaml @@ -0,0 +1,298 @@ +Collections: +- Name: ICNet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + Paper: + Title: ICNet for Real-time Semantic Segmentation on High-resolution Images + URL: https://arxiv.org/abs/1704.08545 + README: configs/icnet/README.md + Frameworks: + - PyTorch +Models: +- Name: icnet_r18-d8_4xb2-80k_cityscapes-832x832 + In Collection: ICNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 68.14 + mIoU(ms+flip): 70.16 + Config: configs/icnet/icnet_r18-d8_4xb2-80k_cityscapes-832x832.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18-D8 + - ICNet + Training Resources: 4x V100 GPUS + Memory (GB): 1.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_80k_cityscapes/icnet_r18-d8_832x832_80k_cityscapes_20210925_225521-2e36638d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_80k_cityscapes/icnet_r18-d8_832x832_80k_cityscapes_20210925_225521.log.json + Paper: + Title: ICNet for Real-time Semantic Segmentation on High-resolution Images + URL: https://arxiv.org/abs/1704.08545 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77 + Framework: PyTorch +- Name: icnet_r18-d8_4xb2-160k_cityscapes-832x832 + In Collection: ICNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 71.64 + mIoU(ms+flip): 74.18 + Config: configs/icnet/icnet_r18-d8_4xb2-160k_cityscapes-832x832.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18-D8 + - ICNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_160k_cityscapes/icnet_r18-d8_832x832_160k_cityscapes_20210925_230153-2c6eb6e0.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_160k_cityscapes/icnet_r18-d8_832x832_160k_cityscapes_20210925_230153.log.json + Paper: + Title: ICNet for Real-time Semantic Segmentation on High-resolution Images + URL: https://arxiv.org/abs/1704.08545 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77 + Framework: PyTorch +- Name: icnet_r18-d8-in1k-pre_4xb2-80k_cityscapes-832x832 + In Collection: ICNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 72.51 + mIoU(ms+flip): 74.78 + Config: configs/icnet/icnet_r18-d8-in1k-pre_4xb2-80k_cityscapes-832x832.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18-D8 + - ICNet + - (in1k-pre) + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes_20210925_230354-1cbe3022.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes_20210925_230354.log.json + Paper: + Title: ICNet for Real-time Semantic Segmentation on High-resolution Images + URL: https://arxiv.org/abs/1704.08545 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77 + Framework: PyTorch +- Name: icnet_r18-d8-in1k-pre_4xb2-160k_cityscapes-832x832 + In Collection: ICNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.43 + mIoU(ms+flip): 76.72 + Config: configs/icnet/icnet_r18-d8-in1k-pre_4xb2-160k_cityscapes-832x832.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18-D8 + - ICNet + - (in1k-pre) + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes_20210926_052702-619c8ae1.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes_20210926_052702.log.json + Paper: + Title: ICNet for Real-time Semantic Segmentation on High-resolution Images + URL: https://arxiv.org/abs/1704.08545 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77 + Framework: PyTorch +- Name: icnet_r50-d8_4xb2-80k_cityscapes-832x832 + In Collection: ICNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 68.91 + mIoU(ms+flip): 69.72 + Config: configs/icnet/icnet_r50-d8_4xb2-80k_cityscapes-832x832.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - ICNet + Training Resources: 4x V100 GPUS + Memory (GB): 2.53 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_80k_cityscapes/icnet_r50-d8_832x832_80k_cityscapes_20210926_044625-c6407341.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_80k_cityscapes/icnet_r50-d8_832x832_80k_cityscapes_20210926_044625.log.json + Paper: + Title: ICNet for Real-time Semantic Segmentation on High-resolution Images + URL: https://arxiv.org/abs/1704.08545 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77 + Framework: PyTorch +- Name: icnet_r50-d8_4xb2-160k_cityscapes-832x832 + In Collection: ICNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.82 + mIoU(ms+flip): 75.67 + Config: configs/icnet/icnet_r50-d8_4xb2-160k_cityscapes-832x832.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - ICNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_160k_cityscapes/icnet_r50-d8_832x832_160k_cityscapes_20210925_232612-a95f0d4e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_160k_cityscapes/icnet_r50-d8_832x832_160k_cityscapes_20210925_232612.log.json + Paper: + Title: ICNet for Real-time Semantic Segmentation on High-resolution Images + URL: https://arxiv.org/abs/1704.08545 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77 + Framework: PyTorch +- Name: icnet_r50-d8-in1k-pre_4xb2-80k_cityscapes-832x832 + In Collection: ICNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.58 + mIoU(ms+flip): 76.41 + Config: configs/icnet/icnet_r50-d8-in1k-pre_4xb2-80k_cityscapes-832x832.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - ICNet + - (in1k-pre) + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes_20210926_032943-1743dc7b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes_20210926_032943.log.json + Paper: + Title: ICNet for Real-time Semantic Segmentation on High-resolution Images + URL: https://arxiv.org/abs/1704.08545 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77 + Framework: PyTorch +- Name: icnet_r50-d8-in1k-pre_4xb2-160k_cityscapes-832x832 + In Collection: ICNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.29 + mIoU(ms+flip): 78.09 + Config: configs/icnet/icnet_r50-d8-in1k-pre_4xb2-160k_cityscapes-832x832.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - ICNet + - (in1k-pre) + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes_20210926_042715-ce310aea.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes_20210926_042715.log.json + Paper: + Title: ICNet for Real-time Semantic Segmentation on High-resolution Images + URL: https://arxiv.org/abs/1704.08545 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77 + Framework: PyTorch +- Name: icnet_r101-d8_4xb2-80k_cityscapes-832x832 + In Collection: ICNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 70.28 + mIoU(ms+flip): 71.95 + Config: configs/icnet/icnet_r101-d8_4xb2-80k_cityscapes-832x832.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - ICNet + Training Resources: 4x V100 GPUS + Memory (GB): 3.08 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_80k_cityscapes/icnet_r101-d8_832x832_80k_cityscapes_20210926_072447-b52f936e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_80k_cityscapes/icnet_r101-d8_832x832_80k_cityscapes_20210926_072447.log.json + Paper: + Title: ICNet for Real-time Semantic Segmentation on High-resolution Images + URL: https://arxiv.org/abs/1704.08545 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77 + Framework: PyTorch +- Name: icnet_r101-d8_4xb2-160k_cityscapes-832x832 + In Collection: ICNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.8 + mIoU(ms+flip): 76.1 + Config: configs/icnet/icnet_r101-d8_4xb2-160k_cityscapes-832x832.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - ICNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_160k_cityscapes/icnet_r101-d8_832x832_160k_cityscapes_20210926_092350-3a1ebf1a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_160k_cityscapes/icnet_r101-d8_832x832_160k_cityscapes_20210926_092350.log.json + Paper: + Title: ICNet for Real-time Semantic Segmentation on High-resolution Images + URL: https://arxiv.org/abs/1704.08545 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77 + Framework: PyTorch +- Name: icnet_r101-d8-in1k-pre_4xb2-80k_cityscapes-832x832 + In Collection: ICNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.57 + mIoU(ms+flip): 77.86 + Config: configs/icnet/icnet_r101-d8-in1k-pre_4xb2-80k_cityscapes-832x832.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - ICNet + - (in1k-pre) + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes_20210926_020414-7ceb12c5.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes_20210926_020414.log.json + Paper: + Title: ICNet for Real-time Semantic Segmentation on High-resolution Images + URL: https://arxiv.org/abs/1704.08545 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77 + Framework: PyTorch +- Name: icnet_r101-d8-in1k-pre_4xb2-160k_cityscapes-832x832 + In Collection: ICNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.15 + mIoU(ms+flip): 77.98 + Config: configs/icnet/icnet_r101-d8-in1k-pre_4xb2-160k_cityscapes-832x832.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - ICNet + - (in1k-pre) + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes_20210925_232612-9484ae8a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes_20210925_232612.log.json + Paper: + Title: ICNet for Real-time Semantic Segmentation on High-resolution Images + URL: https://arxiv.org/abs/1704.08545 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77 + Framework: PyTorch diff --git a/configs/isanet/README.md b/configs/isanet/README.md index db93dae234..c11744ffef 100644 --- a/configs/isanet/README.md +++ b/configs/isanet/README.md @@ -1,6 +1,6 @@ # ISANet -[Interlaced Sparse Self-Attention for Semantic Segmentation](https://arxiv.org/abs/1907.12273) +> [Interlaced Sparse Self-Attention for Semantic Segmentation](https://arxiv.org/abs/1907.12273) ## Introduction @@ -22,6 +22,39 @@ In this paper, we present a so-called interlaced sparse self-attention approach +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------- | -------: | -------------- | ------ | ----- | ------------: | -----------------------------------------------------------------------------------------------------------------------------: | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| ISANet | R-50-D8 | 512x1024 | 40000 | 5.869 | 2.91 | V100 | 78.49 | 79.44 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_40k_cityscapes/isanet_r50-d8_512x1024_40k_cityscapes_20210901_054739-981bd763.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_40k_cityscapes/isanet_r50-d8_512x1024_40k_cityscapes_20210901_054739.log.json) | +| ISANet | R-50-D8 | 512x1024 | 80000 | 5.869 | 2.91 | V100 | 78.68 | 80.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_80k_cityscapes/isanet_r50-d8_512x1024_80k_cityscapes_20210901_074202-89384497.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_80k_cityscapes/isanet_r50-d8_512x1024_80k_cityscapes_20210901_074202.log.json) | +| ISANet | R-50-D8 | 769x769 | 40000 | 6.759 | 1.54 | V100 | 78.70 | 80.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_40k_cityscapes/isanet_r50-d8_769x769_40k_cityscapes_20210903_050200-4ae7e65b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_40k_cityscapes/isanet_r50-d8_769x769_40k_cityscapes_20210903_050200.log.json) | +| ISANet | R-50-D8 | 769x769 | 80000 | 6.759 | 1.54 | V100 | 79.29 | 80.53 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_80k_cityscapes/isanet_r50-d8_769x769_80k_cityscapes_20210903_101126-99b54519.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_80k_cityscapes/isanet_r50-d8_769x769_80k_cityscapes_20210903_101126.log.json) | +| ISANet | R-101-D8 | 512x1024 | 40000 | 9.425 | 2.35 | V100 | 79.58 | 81.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_40k_cityscapes/isanet_r101-d8_512x1024_40k_cityscapes_20210901_145553-293e6bd6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_40k_cityscapes/isanet_r101-d8_512x1024_40k_cityscapes_20210901_145553.log.json) | +| ISANet | R-101-D8 | 512x1024 | 80000 | 9.425 | 2.35 | V100 | 80.32 | 81.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_80k_cityscapes/isanet_r101-d8_512x1024_80k_cityscapes_20210901_145243-5b99c9b2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_80k_cityscapes/isanet_r101-d8_512x1024_80k_cityscapes_20210901_145243.log.json) | +| ISANet | R-101-D8 | 769x769 | 40000 | 10.815 | 0.92 | V100 | 79.68 | 80.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_40k_cityscapes/isanet_r101-d8_769x769_40k_cityscapes_20210903_111320-509e7224.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_40k_cityscapes/isanet_r101-d8_769x769_40k_cityscapes_20210903_111320.log.json) | +| ISANet | R-101-D8 | 769x769 | 80000 | 10.815 | 0.92 | V100 | 80.61 | 81.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_80k_cityscapes/isanet_r101-d8_769x769_80k_cityscapes_20210903_111319-24f71dfa.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_80k_cityscapes/isanet_r101-d8_769x769_80k_cityscapes_20210903_111319.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------- | -------: | -------------- | ------ | ----- | ------------: | -------------------------------------------------------------------------------------------------------------------------: | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| ISANet | R-50-D8 | 512x512 | 80000 | 9.0 | 22.55 | V100 | 41.12 | 42.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_80k_ade20k/isanet_r50-d8_512x512_80k_ade20k_20210903_124557-6ed83a0c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_80k_ade20k/isanet_r50-d8_512x512_80k_ade20k_20210903_124557.log.json) | +| ISANet | R-50-D8 | 512x512 | 160000 | 9.0 | 22.55 | V100 | 42.59 | 43.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_160k_ade20k/isanet_r50-d8_512x512_160k_ade20k_20210903_104850-f752d0a3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_160k_ade20k/isanet_r50-d8_512x512_160k_ade20k_20210903_104850.log.json) | +| ISANet | R-101-D8 | 512x512 | 80000 | 12.562 | 10.56 | V100 | 43.51 | 44.38 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_80k_ade20k/isanet_r101-d8_512x512_80k_ade20k_20210903_162056-68b235c2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_80k_ade20k/isanet_r101-d8_512x512_80k_ade20k_20210903_162056.log.json) | +| ISANet | R-101-D8 | 512x512 | 160000 | 12.562 | 10.56 | V100 | 43.80 | 45.4 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_160k_ade20k/isanet_r101-d8_512x512_160k_ade20k_20210903_211431-a7879dcd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_160k_ade20k/isanet_r101-d8_512x512_160k_ade20k_20210903_211431.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------- | -------: | -------------- | ------ | ----- | ------------: | --------------------------------------------------------------------------------------------------------------------------: | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| ISANet | R-50-D8 | 512x512 | 20000 | 5.9 | 23.08 | V100 | 76.78 | 77.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r50-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_20k_voc12aug/isanet_r50-d8_512x512_20k_voc12aug_20210901_164838-79d59b80.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_20k_voc12aug/isanet_r50-d8_512x512_20k_voc12aug_20210901_164838.log.json) | +| ISANet | R-50-D8 | 512x512 | 40000 | 5.9 | 23.08 | V100 | 76.20 | 77.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r50-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_40k_voc12aug/isanet_r50-d8_512x512_40k_voc12aug_20210901_151349-7d08a54e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_40k_voc12aug/isanet_r50-d8_512x512_40k_voc12aug_20210901_151349.log.json) | +| ISANet | R-101-D8 | 512x512 | 20000 | 9.465 | 7.42 | V100 | 78.46 | 79.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r101-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_20k_voc12aug/isanet_r101-d8_512x512_20k_voc12aug_20210901_115805-3ccbf355.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_20k_voc12aug/isanet_r101-d8_512x512_20k_voc12aug_20210901_115805.log.json) | +| ISANet | R-101-D8 | 512x512 | 40000 | 9.465 | 7.42 | V100 | 78.12 | 79.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet/isanet_r101-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_40k_voc12aug/isanet_r101-d8_512x512_40k_voc12aug_20210901_145814-bc71233b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_40k_voc12aug/isanet_r101-d8_512x512_40k_voc12aug_20210901_145814.log.json) | + ## Citation ```bibetex @@ -45,36 +78,3 @@ The technical report above is also presented at: publisher={Springer} } ``` - -## Results and models - -### Cityscapes - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------- | -------: | -------------- | ----- | ------------: | --------------------------------------------------------------------------------------------------------------------------------: | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| ISANet | R-50-D8 | 512x1024 | 40000 | 5.869 | 2.91 | 78.49 | 79.44 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/isanet/isanet_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_40k_cityscapes/isanet_r50-d8_512x1024_40k_cityscapes_20210901_054739-981bd763.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_40k_cityscapes/isanet_r50-d8_512x1024_40k_cityscapes_20210901_054739.log.json) | -| ISANet | R-50-D8 | 512x1024 | 80000 | 5.869 | 2.91 | 78.68 | 80.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/isanet/isanet_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_80k_cityscapes/isanet_r50-d8_512x1024_80k_cityscapes_20210901_074202-89384497.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_80k_cityscapes/isanet_r50-d8_512x1024_80k_cityscapes_20210901_074202.log.json) | -| ISANet | R-50-D8 | 769x769 | 40000 | 6.759 | 1.54 | 78.70 | 80.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/isanet/isanet_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_40k_cityscapes/isanet_r50-d8_769x769_40k_cityscapes_20210903_050200-4ae7e65b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_40k_cityscapes/isanet_r50-d8_769x769_40k_cityscapes_20210903_050200.log.json) | -| ISANet | R-50-D8 | 769x769 | 80000 | 6.759 | 1.54 | 79.29 | 80.53 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/isanet/isanet_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_80k_cityscapes/isanet_r50-d8_769x769_80k_cityscapes_20210903_101126-99b54519.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_80k_cityscapes/isanet_r50-d8_769x769_80k_cityscapes_20210903_101126.log.json) | -| ISANet | R-101-D8 | 512x1024 | 40000 | 9.425 | 2.35 | 79.58 | 81.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/isanet/isanet_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_40k_cityscapes/isanet_r101-d8_512x1024_40k_cityscapes_20210901_145553-293e6bd6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_40k_cityscapes/isanet_r101-d8_512x1024_40k_cityscapes_20210901_145553.log.json) | -| ISANet | R-101-D8 | 512x1024 | 80000 | 9.425 | 2.35 | 80.32 | 81.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/isanet/isanet_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_80k_cityscapes/isanet_r101-d8_512x1024_80k_cityscapes_20210901_145243-5b99c9b2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_80k_cityscapes/isanet_r101-d8_512x1024_80k_cityscapes_20210901_145243.log.json) | -| ISANet | R-101-D8 | 769x769 | 40000 | 10.815 | 0.92 | 79.68 | 80.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/isanet/isanet_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_40k_cityscapes/isanet_r101-d8_769x769_40k_cityscapes_20210903_111320-509e7224.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_40k_cityscapes/isanet_r101-d8_769x769_40k_cityscapes_20210903_111320.log.json) | -| ISANet | R-101-D8 | 769x769 | 80000 | 10.815 | 0.92 | 80.61 | 81.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/isanet/isanet_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_80k_cityscapes/isanet_r101-d8_769x769_80k_cityscapes_20210903_111319-24f71dfa.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_80k_cityscapes/isanet_r101-d8_769x769_80k_cityscapes_20210903_111319.log.json) | - -### ADE20K - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------- | -------: | -------------- | ----- | ------------: | ----------------------------------------------------------------------------------------------------------------------------: | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| ISANet | R-50-D8 | 512x512 | 80000 | 9.0 | 22.55 | 41.12 | 42.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/isanet/isanet_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_80k_ade20k/isanet_r50-d8_512x512_80k_ade20k_20210903_124557-6ed83a0c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_80k_ade20k/isanet_r50-d8_512x512_80k_ade20k_20210903_124557.log.json) | -| ISANet | R-50-D8 | 512x512 | 160000 | 9.0 | 22.55 | 42.59 | 43.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/isanet/isanet_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_160k_ade20k/isanet_r50-d8_512x512_160k_ade20k_20210903_104850-f752d0a3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_160k_ade20k/isanet_r50-d8_512x512_160k_ade20k_20210903_104850.log.json) | -| ISANet | R-101-D8 | 512x512 | 80000 | 12.562 | 10.56 | 43.51 | 44.38 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/isanet/isanet_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_80k_ade20k/isanet_r101-d8_512x512_80k_ade20k_20210903_162056-68b235c2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_80k_ade20k/isanet_r101-d8_512x512_80k_ade20k_20210903_162056.log.json) | -| ISANet | R-101-D8 | 512x512 | 160000 | 12.562 | 10.56 | 43.80 | 45.4 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/isanet/isanet_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_160k_ade20k/isanet_r101-d8_512x512_160k_ade20k_20210903_211431-a7879dcd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_160k_ade20k/isanet_r101-d8_512x512_160k_ade20k_20210903_211431.log.json) | - -### Pascal VOC 2012 + Aug - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------- | -------: | -------------- | ----- | ------------: | -----------------------------------------------------------------------------------------------------------------------------: | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| ISANet | R-50-D8 | 512x512 | 20000 | 5.9 | 23.08 | 76.78 | 77.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/isanet/isanet_r50-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_20k_voc12aug/isanet_r50-d8_512x512_20k_voc12aug_20210901_164838-79d59b80.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_20k_voc12aug/isanet_r50-d8_512x512_20k_voc12aug_20210901_164838.log.json) | -| ISANet | R-50-D8 | 512x512 | 40000 | 5.9 | 23.08 | 76.20 | 77.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/isanet/isanet_r50-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_40k_voc12aug/isanet_r50-d8_512x512_40k_voc12aug_20210901_151349-7d08a54e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_40k_voc12aug/isanet_r50-d8_512x512_40k_voc12aug_20210901_151349.log.json) | -| ISANet | R-101-D8 | 512x512 | 20000 | 9.465 | 7.42 | 78.46 | 79.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/isanet/isanet_r101-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_20k_voc12aug/isanet_r101-d8_512x512_20k_voc12aug_20210901_115805-3ccbf355.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_20k_voc12aug/isanet_r101-d8_512x512_20k_voc12aug_20210901_115805.log.json) | -| ISANet | R-101-D8 | 512x512 | 40000 | 9.465 | 7.42 | 78.12 | 79.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/isanet/isanet_r101-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_40k_voc12aug/isanet_r101-d8_512x512_40k_voc12aug_20210901_145814-bc71233b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_40k_voc12aug/isanet_r101-d8_512x512_40k_voc12aug_20210901_145814.log.json) | diff --git a/configs/isanet/isanet.yml b/configs/isanet/isanet.yml deleted file mode 100644 index 405b3c1231..0000000000 --- a/configs/isanet/isanet.yml +++ /dev/null @@ -1,369 +0,0 @@ -Collections: -- Name: ISANet - Metadata: - Training Data: - - Cityscapes - - ADE20K - - Pascal VOC 2012 + Aug - Paper: - URL: https://arxiv.org/abs/1907.12273 - Title: Interlaced Sparse Self-Attention for Semantic Segmentation - README: configs/isanet/README.md - Code: - URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 - Version: v0.18.0 - Converted From: - Code: https://github.com/openseg-group/openseg.pytorch -Models: -- Name: isanet_r50-d8_4xb2-40k_cityscapes-512x1024 - In Collection: ISANet - Metadata: - backbone: R-50-D8 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 343.64 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 5.869 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.49 - mIoU(ms+flip): 79.44 - Config: configs/isanet/isanet_r50-d8_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_40k_cityscapes/isanet_r50-d8_512x1024_40k_cityscapes_20210901_054739-981bd763.pth -- Name: isanet_r50-d8_4xb2-80k_cityscapes-512x1024 - In Collection: ISANet - Metadata: - backbone: R-50-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 343.64 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 5.869 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.68 - mIoU(ms+flip): 80.25 - Config: configs/isanet/isanet_r50-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_80k_cityscapes/isanet_r50-d8_512x1024_80k_cityscapes_20210901_074202-89384497.pth -- Name: isanet_r50-d8_4xb2-40k_cityscapes-769x769 - In Collection: ISANet - Metadata: - backbone: R-50-D8 - crop size: (769,769) - lr schd: 40000 - inference time (ms/im): - - value: 649.35 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 6.759 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.7 - mIoU(ms+flip): 80.28 - Config: configs/isanet/isanet_r50-d8_4xb2-40k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_40k_cityscapes/isanet_r50-d8_769x769_40k_cityscapes_20210903_050200-4ae7e65b.pth -- Name: isanet_r50-d8_4xb2-80k_cityscapes-769x769 - In Collection: ISANet - Metadata: - backbone: R-50-D8 - crop size: (769,769) - lr schd: 80000 - inference time (ms/im): - - value: 649.35 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 6.759 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.29 - mIoU(ms+flip): 80.53 - Config: configs/isanet/isanet_r50-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_80k_cityscapes/isanet_r50-d8_769x769_80k_cityscapes_20210903_101126-99b54519.pth -- Name: isanet_r101-d8_4xb2-40k_cityscapes-512x1024 - In Collection: ISANet - Metadata: - backbone: R-101-D8 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 425.53 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 9.425 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.58 - mIoU(ms+flip): 81.05 - Config: configs/isanet/isanet_r101-d8_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_40k_cityscapes/isanet_r101-d8_512x1024_40k_cityscapes_20210901_145553-293e6bd6.pth -- Name: isanet_r101-d8_4xb2-80k_cityscapes-512x1024 - In Collection: ISANet - Metadata: - backbone: R-101-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 425.53 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 9.425 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 80.32 - mIoU(ms+flip): 81.58 - Config: configs/isanet/isanet_r101-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_80k_cityscapes/isanet_r101-d8_512x1024_80k_cityscapes_20210901_145243-5b99c9b2.pth -- Name: isanet_r101-d8_4xb2-40k_cityscapes-769x769 - In Collection: ISANet - Metadata: - backbone: R-101-D8 - crop size: (769,769) - lr schd: 40000 - inference time (ms/im): - - value: 1086.96 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 10.815 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.68 - mIoU(ms+flip): 80.95 - Config: configs/isanet/isanet_r101-d8_4xb2-40k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_40k_cityscapes/isanet_r101-d8_769x769_40k_cityscapes_20210903_111320-509e7224.pth -- Name: isanet_r101-d8_4xb2-80k_cityscapes-769x769 - In Collection: ISANet - Metadata: - backbone: R-101-D8 - crop size: (769,769) - lr schd: 80000 - inference time (ms/im): - - value: 1086.96 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 10.815 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 80.61 - mIoU(ms+flip): 81.59 - Config: configs/isanet/isanet_r101-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_80k_cityscapes/isanet_r101-d8_769x769_80k_cityscapes_20210903_111319-24f71dfa.pth -- Name: isanet_r50-d8_4xb4-80k_ade20k-512x512 - In Collection: ISANet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 44.35 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 9.0 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 41.12 - mIoU(ms+flip): 42.35 - Config: configs/isanet/isanet_r50-d8_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_80k_ade20k/isanet_r50-d8_512x512_80k_ade20k_20210903_124557-6ed83a0c.pth -- Name: isanet_r50-d8_4xb4-160k_ade20k-512x512 - In Collection: ISANet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 44.35 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 9.0 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 42.59 - mIoU(ms+flip): 43.07 - Config: configs/isanet/isanet_r50-d8_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_160k_ade20k/isanet_r50-d8_512x512_160k_ade20k_20210903_104850-f752d0a3.pth -- Name: isanet_r101-d8_4xb4-80k_ade20k-512x512 - In Collection: ISANet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 94.7 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 12.562 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 43.51 - mIoU(ms+flip): 44.38 - Config: configs/isanet/isanet_r101-d8_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_80k_ade20k/isanet_r101-d8_512x512_80k_ade20k_20210903_162056-68b235c2.pth -- Name: isanet_r101-d8_4xb4-160k_ade20k-512x512 - In Collection: ISANet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 94.7 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 12.562 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 43.8 - mIoU(ms+flip): 45.4 - Config: configs/isanet/isanet_r101-d8_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_160k_ade20k/isanet_r101-d8_512x512_160k_ade20k_20210903_211431-a7879dcd.pth -- Name: isanet_r50-d8_4xb4-20k_voc12aug-512x512 - In Collection: ISANet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 20000 - inference time (ms/im): - - value: 43.33 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 5.9 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 76.78 - mIoU(ms+flip): 77.79 - Config: configs/isanet/isanet_r50-d8_4xb4-20k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_20k_voc12aug/isanet_r50-d8_512x512_20k_voc12aug_20210901_164838-79d59b80.pth -- Name: isanet_r50-d8_4xb4-40k_voc12aug-512x512 - In Collection: ISANet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 40000 - inference time (ms/im): - - value: 43.33 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 5.9 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 76.2 - mIoU(ms+flip): 77.22 - Config: configs/isanet/isanet_r50-d8_4xb4-40k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_40k_voc12aug/isanet_r50-d8_512x512_40k_voc12aug_20210901_151349-7d08a54e.pth -- Name: isanet_r101-d8_4xb4-20k_voc12aug-512x512 - In Collection: ISANet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 20000 - inference time (ms/im): - - value: 134.77 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 9.465 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 78.46 - mIoU(ms+flip): 79.16 - Config: configs/isanet/isanet_r101-d8_4xb4-20k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_20k_voc12aug/isanet_r101-d8_512x512_20k_voc12aug_20210901_115805-3ccbf355.pth -- Name: isanet_r101-d8_4xb4-40k_voc12aug-512x512 - In Collection: ISANet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 40000 - inference time (ms/im): - - value: 134.77 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 9.465 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 78.12 - mIoU(ms+flip): 79.04 - Config: configs/isanet/isanet_r101-d8_4xb4-40k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_40k_voc12aug/isanet_r101-d8_512x512_40k_voc12aug_20210901_145814-bc71233b.pth diff --git a/configs/isanet/metafile.yaml b/configs/isanet/metafile.yaml new file mode 100644 index 0000000000..ad394eabb2 --- /dev/null +++ b/configs/isanet/metafile.yaml @@ -0,0 +1,399 @@ +Collections: +- Name: ISANet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + README: configs/isanet/README.md + Frameworks: + - PyTorch +Models: +- Name: isanet_r50-d8_4xb2-40k_cityscapes-512x1024 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.49 + mIoU(ms+flip): 79.44 + Config: configs/isanet/isanet_r50-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 5.869 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_40k_cityscapes/isanet_r50-d8_512x1024_40k_cityscapes_20210901_054739-981bd763.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_40k_cityscapes/isanet_r50-d8_512x1024_40k_cityscapes_20210901_054739.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch +- Name: isanet_r50-d8_4xb2-80k_cityscapes-512x1024 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.68 + mIoU(ms+flip): 80.25 + Config: configs/isanet/isanet_r50-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 5.869 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_80k_cityscapes/isanet_r50-d8_512x1024_80k_cityscapes_20210901_074202-89384497.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_80k_cityscapes/isanet_r50-d8_512x1024_80k_cityscapes_20210901_074202.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch +- Name: isanet_r50-d8_4xb2-40k_cityscapes-769x769 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.7 + mIoU(ms+flip): 80.28 + Config: configs/isanet/isanet_r50-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 6.759 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_40k_cityscapes/isanet_r50-d8_769x769_40k_cityscapes_20210903_050200-4ae7e65b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_40k_cityscapes/isanet_r50-d8_769x769_40k_cityscapes_20210903_050200.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch +- Name: isanet_r50-d8_4xb2-80k_cityscapes-769x769 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.29 + mIoU(ms+flip): 80.53 + Config: configs/isanet/isanet_r50-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 6.759 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_80k_cityscapes/isanet_r50-d8_769x769_80k_cityscapes_20210903_101126-99b54519.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_80k_cityscapes/isanet_r50-d8_769x769_80k_cityscapes_20210903_101126.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch +- Name: isanet_r101-d8_4xb2-40k_cityscapes-512x1024 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.58 + mIoU(ms+flip): 81.05 + Config: configs/isanet/isanet_r101-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 9.425 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_40k_cityscapes/isanet_r101-d8_512x1024_40k_cityscapes_20210901_145553-293e6bd6.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_40k_cityscapes/isanet_r101-d8_512x1024_40k_cityscapes_20210901_145553.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch +- Name: isanet_r101-d8_4xb2-80k_cityscapes-512x1024 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.32 + mIoU(ms+flip): 81.58 + Config: configs/isanet/isanet_r101-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 9.425 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_80k_cityscapes/isanet_r101-d8_512x1024_80k_cityscapes_20210901_145243-5b99c9b2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_80k_cityscapes/isanet_r101-d8_512x1024_80k_cityscapes_20210901_145243.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch +- Name: isanet_r101-d8_4xb2-40k_cityscapes-769x769 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.68 + mIoU(ms+flip): 80.95 + Config: configs/isanet/isanet_r101-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 10.815 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_40k_cityscapes/isanet_r101-d8_769x769_40k_cityscapes_20210903_111320-509e7224.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_40k_cityscapes/isanet_r101-d8_769x769_40k_cityscapes_20210903_111320.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch +- Name: isanet_r101-d8_4xb2-80k_cityscapes-769x769 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.61 + mIoU(ms+flip): 81.59 + Config: configs/isanet/isanet_r101-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 10.815 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_80k_cityscapes/isanet_r101-d8_769x769_80k_cityscapes_20210903_111319-24f71dfa.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_80k_cityscapes/isanet_r101-d8_769x769_80k_cityscapes_20210903_111319.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch +- Name: isanet_r50-d8_4xb4-80k_ade20k-512x512 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.12 + mIoU(ms+flip): 42.35 + Config: configs/isanet/isanet_r50-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 9.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_80k_ade20k/isanet_r50-d8_512x512_80k_ade20k_20210903_124557-6ed83a0c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_80k_ade20k/isanet_r50-d8_512x512_80k_ade20k_20210903_124557.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch +- Name: isanet_r50-d8_4xb4-160k_ade20k-512x512 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.59 + mIoU(ms+flip): 43.07 + Config: configs/isanet/isanet_r50-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 9.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_160k_ade20k/isanet_r50-d8_512x512_160k_ade20k_20210903_104850-f752d0a3.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_160k_ade20k/isanet_r50-d8_512x512_160k_ade20k_20210903_104850.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch +- Name: isanet_r101-d8_4xb4-80k_ade20k-512x512 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.51 + mIoU(ms+flip): 44.38 + Config: configs/isanet/isanet_r101-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 12.562 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_80k_ade20k/isanet_r101-d8_512x512_80k_ade20k_20210903_162056-68b235c2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_80k_ade20k/isanet_r101-d8_512x512_80k_ade20k_20210903_162056.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch +- Name: isanet_r101-d8_4xb4-160k_ade20k-512x512 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.8 + mIoU(ms+flip): 45.4 + Config: configs/isanet/isanet_r101-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 12.562 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_160k_ade20k/isanet_r101-d8_512x512_160k_ade20k_20210903_211431-a7879dcd.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_160k_ade20k/isanet_r101-d8_512x512_160k_ade20k_20210903_211431.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch +- Name: isanet_r50-d8_4xb4-20k_voc12aug-512x512 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.78 + mIoU(ms+flip): 77.79 + Config: configs/isanet/isanet_r50-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 5.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_20k_voc12aug/isanet_r50-d8_512x512_20k_voc12aug_20210901_164838-79d59b80.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_20k_voc12aug/isanet_r50-d8_512x512_20k_voc12aug_20210901_164838.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch +- Name: isanet_r50-d8_4xb4-40k_voc12aug-512x512 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.2 + mIoU(ms+flip): 77.22 + Config: configs/isanet/isanet_r50-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 5.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_40k_voc12aug/isanet_r50-d8_512x512_40k_voc12aug_20210901_151349-7d08a54e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_40k_voc12aug/isanet_r50-d8_512x512_40k_voc12aug_20210901_151349.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch +- Name: isanet_r101-d8_4xb4-20k_voc12aug-512x512 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 78.46 + mIoU(ms+flip): 79.16 + Config: configs/isanet/isanet_r101-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 9.465 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_20k_voc12aug/isanet_r101-d8_512x512_20k_voc12aug_20210901_115805-3ccbf355.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_20k_voc12aug/isanet_r101-d8_512x512_20k_voc12aug_20210901_115805.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch +- Name: isanet_r101-d8_4xb4-40k_voc12aug-512x512 + In Collection: ISANet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 78.12 + mIoU(ms+flip): 79.04 + Config: configs/isanet/isanet_r101-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - ISANet + Training Resources: 4x V100 GPUS + Memory (GB): 9.465 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_40k_voc12aug/isanet_r101-d8_512x512_40k_voc12aug_20210901_145814-bc71233b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_40k_voc12aug/isanet_r101-d8_512x512_40k_voc12aug_20210901_145814.log.json + Paper: + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + URL: https://arxiv.org/abs/1907.12273 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Framework: PyTorch diff --git a/configs/knet/README.md b/configs/knet/README.md index ed5bc06257..1f3f2ae268 100644 --- a/configs/knet/README.md +++ b/configs/knet/README.md @@ -1,6 +1,6 @@ # K-Net -[K-Net: Towards Unified Image Segmentation](https://arxiv.org/abs/2106.14855) +> [K-Net: Towards Unified Image Segmentation](https://arxiv.org/abs/2106.14855) ## Introduction @@ -22,29 +22,31 @@ Semantic, instance, and panoptic segmentations have been addressed using differe -```bibtex -@inproceedings{zhang2021knet, - title={{K-Net: Towards} Unified Image Segmentation}, - author={Wenwei Zhang and Jiangmiao Pang and Kai Chen and Chen Change Loy}, - year={2021}, - booktitle={NeurIPS}, -} -``` - ## Results and models ### ADE20K -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ---------------- | -------- | --------- | ------- | -------- | -------------- | ----- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| KNet + FCN | R-50-D8 | 512x512 | 80000 | 7.01 | 19.24 | 43.60 | 45.12 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/knet/knet-s3_r50-d8_fcn_8xb2-adamw-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_043751-abcab920.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_043751.log.json) | -| KNet + PSPNet | R-50-D8 | 512x512 | 80000 | 6.98 | 20.04 | 44.18 | 45.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/knet/knet-s3_r50-d8_pspnet_8xb2-adamw-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_054634-d2c72240.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_054634.log.json) | -| KNet + DeepLabV3 | R-50-D8 | 512x512 | 80000 | 7.42 | 12.10 | 45.06 | 46.11 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/knet/knet-s3_r50-d8_deeplabv3_8xb2-adamw-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_041642-00c8fbeb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_041642.log.json) | -| KNet + UperNet | R-50-D8 | 512x512 | 80000 | 7.34 | 17.11 | 43.45 | 44.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/knet/knet-s3_r50-d8_upernet_8xb2-adamw-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k_20220304_125657-215753b0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k_20220304_125657.log.json) | -| KNet + UperNet | Swin-T | 512x512 | 80000 | 7.57 | 15.56 | 45.84 | 46.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/knet/knet-s3_swin-t_upernet_8xb2-adamw-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k_20220303_133059-7545e1dc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k_20220303_133059.log.json) | -| KNet + UperNet | Swin-L | 512x512 | 80000 | 13.5 | 8.29 | 52.05 | 53.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/knet/knet-s3_swin-l_upernet_8xb2-adamw-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k_20220303_154559-d8da9a90.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k_20220303_154559.log.json) | -| KNet + UperNet | Swin-L | 640x640 | 80000 | 13.54 | 8.29 | 52.21 | 53.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/knet/knet-s3_swin-l_upernet_8xb2-adamw-80k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k_20220301_220747-8787fc71.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k_20220301_220747.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------------- | -------- | --------- | ------- | -------- | -------------- | ------ | ----- | ------------- | --------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| KNet + FCN | R-50-D8 | 512x512 | 80000 | 7.01 | 19.24 | V100 | 43.60 | 45.12 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/knet/knet-s3_r50-d8_fcn_8xb2-adamw-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_043751-abcab920.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_043751.log.json) | +| KNet + PSPNet | R-50-D8 | 512x512 | 80000 | 6.98 | 20.04 | V100 | 44.18 | 45.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/knet/knet-s3_r50-d8_pspnet_8xb2-adamw-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_054634-d2c72240.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_054634.log.json) | +| KNet + DeepLabV3 | R-50-D8 | 512x512 | 80000 | 7.42 | 12.10 | V100 | 45.06 | 46.11 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/knet/knet-s3_r50-d8_deeplabv3_8xb2-adamw-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_041642-00c8fbeb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_041642.log.json) | +| KNet + UperNet | R-50-D8 | 512x512 | 80000 | 7.34 | 17.11 | V100 | 43.45 | 44.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/knet/knet-s3_r50-d8_upernet_8xb2-adamw-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k_20220304_125657-215753b0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k_20220304_125657.log.json) | +| KNet + UperNet | Swin-T | 512x512 | 80000 | 7.57 | 15.56 | V100 | 45.84 | 46.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/knet/knet-s3_swin-t_upernet_8xb2-adamw-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k_20220303_133059-7545e1dc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k_20220303_133059.log.json) | +| KNet + UperNet | Swin-L | 512x512 | 80000 | 13.5 | 8.29 | V100 | 52.05 | 53.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/knet/knet-s3_swin-l_upernet_8xb2-adamw-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k_20220303_154559-d8da9a90.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k_20220303_154559.log.json) | +| KNet + UperNet | Swin-L | 640x640 | 80000 | 13.54 | 8.29 | V100 | 52.21 | 53.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/knet/knet-s3_swin-l_upernet_8xb2-adamw-80k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k_20220301_220747-8787fc71.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k_20220301_220747.log.json) | Note: - All experiments of K-Net are implemented with 8 V100 (32G) GPUs with 2 samplers per GPU. + +# Citation + +```bibtex +@inproceedings{zhang2021knet, + title={{K-Net: Towards} Unified Image Segmentation}, + author={Wenwei Zhang and Jiangmiao Pang and Kai Chen and Chen Change Loy}, + year={2021}, + booktitle={NeurIPS}, +} +``` diff --git a/configs/knet/knet.yml b/configs/knet/knet.yml deleted file mode 100644 index 1c98e4703c..0000000000 --- a/configs/knet/knet.yml +++ /dev/null @@ -1,169 +0,0 @@ -Collections: -- Name: KNet - Metadata: - Training Data: - - ADE20K - Paper: - URL: https://arxiv.org/abs/2106.14855 - Title: 'K-Net: Towards Unified Image Segmentation' - README: configs/knet/README.md - Code: - URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.23.0/mmseg/models/decode_heads/knet_head.py#L392 - Version: v0.23.0 - Converted From: - Code: https://github.com/ZwwWayne/K-Net/ -Models: -- Name: knet-s3_r50-d8_fcn_8xb2-adamw-80k_ade20k-512x512 - In Collection: KNet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 51.98 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 7.01 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 43.6 - mIoU(ms+flip): 45.12 - Config: configs/knet/knet-s3_r50-d8_fcn_8xb2-adamw-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_043751-abcab920.pth -- Name: knet-s3_r50-d8_pspnet_8xb2-adamw-80k_ade20k-512x512 - In Collection: KNet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 49.9 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 6.98 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 44.18 - mIoU(ms+flip): 45.58 - Config: configs/knet/knet-s3_r50-d8_pspnet_8xb2-adamw-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_054634-d2c72240.pth -- Name: knet-s3_r50-d8_deeplabv3_8xb2-adamw-80k_ade20k-512x512 - In Collection: KNet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 82.64 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 7.42 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 45.06 - mIoU(ms+flip): 46.11 - Config: configs/knet/knet-s3_r50-d8_deeplabv3_8xb2-adamw-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_041642-00c8fbeb.pth -- Name: knet-s3_r50-d8_upernet_8xb2-adamw-80k_ade20k-512x512 - In Collection: KNet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 58.45 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 7.34 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 43.45 - mIoU(ms+flip): 44.07 - Config: configs/knet/knet-s3_r50-d8_upernet_8xb2-adamw-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k_20220304_125657-215753b0.pth -- Name: knet-s3_swin-t_upernet_8xb2-adamw-80k_ade20k-512x512 - In Collection: KNet - Metadata: - backbone: Swin-T - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 64.27 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 7.57 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 45.84 - mIoU(ms+flip): 46.27 - Config: configs/knet/knet-s3_swin-t_upernet_8xb2-adamw-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k_20220303_133059-7545e1dc.pth -- Name: knet-s3_swin-l_upernet_8xb2-adamw-80k_ade20k-512x512 - In Collection: KNet - Metadata: - backbone: Swin-L - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 120.63 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 13.5 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 52.05 - mIoU(ms+flip): 53.24 - Config: configs/knet/knet-s3_swin-l_upernet_8xb2-adamw-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k_20220303_154559-d8da9a90.pth -- Name: knet-s3_swin-l_upernet_8xb2-adamw-80k_ade20k-640x640 - In Collection: KNet - Metadata: - backbone: Swin-L - crop size: (640,640) - lr schd: 80000 - inference time (ms/im): - - value: 120.63 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (640,640) - Training Memory (GB): 13.54 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 52.21 - mIoU(ms+flip): 53.34 - Config: configs/knet/knet-s3_swin-l_upernet_8xb2-adamw-80k_ade20k-640x640.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k_20220301_220747-8787fc71.pth diff --git a/configs/knet/metafile.yaml b/configs/knet/metafile.yaml new file mode 100644 index 0000000000..0f4ab79609 --- /dev/null +++ b/configs/knet/metafile.yaml @@ -0,0 +1,188 @@ +Collections: +- Name: KNet + License: Apache License 2.0 + Metadata: + Training Data: + - ADE20K + Paper: + Title: 'K-Net: Towards Unified Image Segmentation' + URL: https://arxiv.org/abs/2106.14855 + README: configs/knet/README.md + Frameworks: + - PyTorch +Models: +- Name: knet-s3_r50-d8_fcn_8xb2-adamw-80k_ade20k-512x512 + In Collection: KNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.6 + mIoU(ms+flip): 45.12 + Config: configs/knet/knet-s3_r50-d8_fcn_8xb2-adamw-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - KNet + - FCN + Training Resources: 8x V100 GPUS + Memory (GB): 7.01 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_043751-abcab920.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_043751.log.json + Paper: + Title: 'K-Net: Towards Unified Image Segmentation' + URL: https://arxiv.org/abs/2106.14855 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.23.0/mmseg/models/decode_heads/knet_head.py#L392 + Framework: PyTorch +- Name: knet-s3_r50-d8_pspnet_8xb2-adamw-80k_ade20k-512x512 + In Collection: KNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 44.18 + mIoU(ms+flip): 45.58 + Config: configs/knet/knet-s3_r50-d8_pspnet_8xb2-adamw-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - KNet + - PSPNet + Training Resources: 8x V100 GPUS + Memory (GB): 6.98 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_054634-d2c72240.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_054634.log.json + Paper: + Title: 'K-Net: Towards Unified Image Segmentation' + URL: https://arxiv.org/abs/2106.14855 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.23.0/mmseg/models/decode_heads/knet_head.py#L392 + Framework: PyTorch +- Name: knet-s3_r50-d8_deeplabv3_8xb2-adamw-80k_ade20k-512x512 + In Collection: KNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.06 + mIoU(ms+flip): 46.11 + Config: configs/knet/knet-s3_r50-d8_deeplabv3_8xb2-adamw-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - KNet + - DeepLabV3 + Training Resources: 8x V100 GPUS + Memory (GB): 7.42 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_041642-00c8fbeb.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_041642.log.json + Paper: + Title: 'K-Net: Towards Unified Image Segmentation' + URL: https://arxiv.org/abs/2106.14855 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.23.0/mmseg/models/decode_heads/knet_head.py#L392 + Framework: PyTorch +- Name: knet-s3_r50-d8_upernet_8xb2-adamw-80k_ade20k-512x512 + In Collection: KNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.45 + mIoU(ms+flip): 44.07 + Config: configs/knet/knet-s3_r50-d8_upernet_8xb2-adamw-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - KNet + - UperNet + Training Resources: 8x V100 GPUS + Memory (GB): 7.34 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k_20220304_125657-215753b0.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k_20220304_125657.log.json + Paper: + Title: 'K-Net: Towards Unified Image Segmentation' + URL: https://arxiv.org/abs/2106.14855 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.23.0/mmseg/models/decode_heads/knet_head.py#L392 + Framework: PyTorch +- Name: knet-s3_swin-t_upernet_8xb2-adamw-80k_ade20k-512x512 + In Collection: KNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.84 + mIoU(ms+flip): 46.27 + Config: configs/knet/knet-s3_swin-t_upernet_8xb2-adamw-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-T + - KNet + - UperNet + Training Resources: 8x V100 GPUS + Memory (GB): 7.57 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k_20220303_133059-7545e1dc.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k_20220303_133059.log.json + Paper: + Title: 'K-Net: Towards Unified Image Segmentation' + URL: https://arxiv.org/abs/2106.14855 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.23.0/mmseg/models/decode_heads/knet_head.py#L392 + Framework: PyTorch +- Name: knet-s3_swin-l_upernet_8xb2-adamw-80k_ade20k-512x512 + In Collection: KNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 52.05 + mIoU(ms+flip): 53.24 + Config: configs/knet/knet-s3_swin-l_upernet_8xb2-adamw-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-L + - KNet + - UperNet + Training Resources: 8x V100 GPUS + Memory (GB): 13.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k_20220303_154559-d8da9a90.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k_20220303_154559.log.json + Paper: + Title: 'K-Net: Towards Unified Image Segmentation' + URL: https://arxiv.org/abs/2106.14855 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.23.0/mmseg/models/decode_heads/knet_head.py#L392 + Framework: PyTorch +- Name: knet-s3_swin-l_upernet_8xb2-adamw-80k_ade20k-640x640 + In Collection: KNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 52.21 + mIoU(ms+flip): 53.34 + Config: configs/knet/knet-s3_swin-l_upernet_8xb2-adamw-80k_ade20k-640x640.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-L + - KNet + - UperNet + Training Resources: 8x V100 GPUS + Memory (GB): 13.54 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k_20220301_220747-8787fc71.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k_20220301_220747.log.json + Paper: + Title: 'K-Net: Towards Unified Image Segmentation' + URL: https://arxiv.org/abs/2106.14855 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.23.0/mmseg/models/decode_heads/knet_head.py#L392 + Framework: PyTorch diff --git a/configs/mae/README.md b/configs/mae/README.md index 330749732e..d14e3830be 100644 --- a/configs/mae/README.md +++ b/configs/mae/README.md @@ -1,6 +1,6 @@ # MAE -[Masked Autoencoders Are Scalable Vision Learners](https://arxiv.org/abs/2111.06377) +> [Masked Autoencoders Are Scalable Vision Learners](https://arxiv.org/abs/2111.06377) ## Introduction @@ -22,17 +22,6 @@ This paper shows that masked autoencoders (MAE) are scalable self-supervised lea -## Citation - -```bibtex -@article{he2021masked, - title={Masked autoencoders are scalable vision learners}, - author={He, Kaiming and Chen, Xinlei and Xie, Saining and Li, Yanghao and Doll{\'a}r, Piotr and Girshick, Ross}, - journal={arXiv preprint arXiv:2111.06377}, - year={2021} -} -``` - ## Usage To use other repositories' pre-trained models, it is necessary to convert keys. @@ -77,6 +66,17 @@ upernet_mae-base_fp16_8x2_512x512_160k_ade20k_20220426_174752-f92a2975.pth $GPUS ### ADE20K -| Method | Backbone | Crop Size | pretrain | pretrain img size | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------- | -------- | --------- | ----------- | ----------------- | ---------- | ------- | -------- | -------------- | ----- | ------------: | -------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| UPerNet | ViT-B | 512x512 | ImageNet-1K | 224x224 | 16 | 160000 | 9.96 | 7.14 | 48.13 | 48.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mae/mae-base_upernet_8xb2-amp-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k/upernet_mae-base_fp16_8x2_512x512_160k_ade20k_20220426_174752-f92a2975.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k/upernet_mae-base_fp16_8x2_512x512_160k_ade20k_20220426_174752.log.json) | +| Method | Backbone | Crop Size | pretrain | pretrain img size | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------- | -------- | --------- | ----------- | ----------------- | ---------- | ------- | -------- | -------------- | ------ | ----- | ------------: | ----------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| UPerNet | ViT-B | 512x512 | ImageNet-1K | 224x224 | 16 | 160000 | 9.96 | 7.14 | V100 | 48.13 | 48.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mae/mae-base_upernet_8xb2-amp-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k/upernet_mae-base_fp16_8x2_512x512_160k_ade20k_20220426_174752-f92a2975.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k/upernet_mae-base_fp16_8x2_512x512_160k_ade20k_20220426_174752.log.json) | + +## Citation + +```bibtex +@article{he2021masked, + title={Masked autoencoders are scalable vision learners}, + author={He, Kaiming and Chen, Xinlei and Xie, Saining and Li, Yanghao and Doll{\'a}r, Piotr and Girshick, Ross}, + journal={arXiv preprint arXiv:2111.06377}, + year={2021} +} +``` diff --git a/configs/mae/mae.yml b/configs/mae/mae.yml deleted file mode 100644 index 72b2cc7f12..0000000000 --- a/configs/mae/mae.yml +++ /dev/null @@ -1,23 +0,0 @@ -Models: -- Name: mae-base_upernet_8xb2-amp-160k_ade20k-512x512 - In Collection: UPerNet - Metadata: - backbone: ViT-B - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 140.06 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: AMP - resolution: (512,512) - Training Memory (GB): 9.96 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 48.13 - mIoU(ms+flip): 48.7 - Config: configs/mae/mae-base_upernet_8xb2-amp-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k/upernet_mae-base_fp16_8x2_512x512_160k_ade20k_20220426_174752-f92a2975.pth diff --git a/configs/mae/metafile.yaml b/configs/mae/metafile.yaml new file mode 100644 index 0000000000..567eafe131 --- /dev/null +++ b/configs/mae/metafile.yaml @@ -0,0 +1,25 @@ +Models: +- Name: mae-base_upernet_8xb2-amp-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 48.13 + mIoU(ms+flip): 48.7 + Config: configs/mae/mae-base_upernet_8xb2-amp-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - ViT-B + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 9.96 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k/upernet_mae-base_fp16_8x2_512x512_160k_ade20k_20220426_174752-f92a2975.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k/upernet_mae-base_fp16_8x2_512x512_160k_ade20k_20220426_174752.log.json + Paper: + Title: Masked Autoencoders Are Scalable Vision Learners + URL: https://arxiv.org/abs/2111.06377 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.24.0/mmseg/models/backbones/mae.py#L46 + Framework: PyTorch diff --git a/configs/mask2former/README.md b/configs/mask2former/README.md index 1861fec3b1..c21ab0d0c6 100644 --- a/configs/mask2former/README.md +++ b/configs/mask2former/README.md @@ -1,6 +1,6 @@ # Mask2Former -[Masked-attention Mask Transformer for Universal Image Segmentation](https://arxiv.org/abs/2112.01527) +> [Masked-attention Mask Transformer for Universal Image Segmentation](https://arxiv.org/abs/2112.01527) ## Introduction @@ -16,21 +16,6 @@ Image segmentation is about grouping pixels with different semantics, e.g., category or instance membership, where each choice of semantics defines a task. While only the semantics of each task differ, current research focuses on designing specialized architectures for each task. We present Masked-attention Mask Transformer (Mask2Former), a new architecture capable of addressing any image segmentation task (panoptic, instance or semantic). Its key components include masked attention, which extracts localized features by constraining cross-attention within predicted mask regions. In addition to reducing the research effort by at least three times, it outperforms the best specialized architectures by a significant margin on four popular datasets. Most notably, Mask2Former sets a new state-of-the-art for panoptic segmentation (57.8 PQ on COCO), instance segmentation (50.1 AP on COCO) and semantic segmentation (57.7 mIoU on ADE20K). -```bibtex -@inproceedings{cheng2021mask2former, - title={Masked-attention Mask Transformer for Universal Image Segmentation}, - author={Bowen Cheng and Ishan Misra and Alexander G. Schwing and Alexander Kirillov and Rohit Girdhar}, - journal={CVPR}, - year={2022} -} -@inproceedings{cheng2021maskformer, - title={Per-Pixel Classification is Not All You Need for Semantic Segmentation}, - author={Bowen Cheng and Alexander G. Schwing and Alexander Kirillov}, - journal={NeurIPS}, - year={2021} -} -``` - ### Usage - Mask2Former model needs to install [MMDetection](https://github.com/open-mmlab/mmdetection) first. @@ -43,26 +28,26 @@ pip install "mmdet>=3.0.0rc4" ### Cityscapes -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ----------- | -------------- | --------- | ------- | -------: | -------------- | ----- | ------------: | -----------------------------------------------------------------------------------------------------------------------------------------------------------: | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| Mask2Former | R-50-D32 | 512x1024 | 90000 | 5806 | 9.17 | 80.44 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024/mask2former_r50_8xb2-90k_cityscapes-512x1024_20221202_140802-ffd9d750.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024/mask2former_r50_8xb2-90k_cityscapes-512x1024_20221202_140802.json) | -| Mask2Former | R-101-D32 | 512x1024 | 90000 | 6971 | 7.11 | 80.80 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024/mask2former_r101_8xb2-90k_cityscapes-512x1024_20221130_031628-43e68666.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024/mask2former_r101_8xb2-90k_cityscapes-512x1024_20221130_031628.json)) | -| Mask2Former | Swin-T | 512x1024 | 90000 | 6511 | 7.18 | 81.71 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024/mask2former_swin-t_8xb2-90k_cityscapes-512x1024_20221127_144501-36c59341.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024/mask2former_swin-t_8xb2-90k_cityscapes-512x1024_20221127_144501.json)) | -| Mask2Former | Swin-S | 512x1024 | 90000 | 8282 | 5.57 | 82.57 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024/mask2former_swin-s_8xb2-90k_cityscapes-512x1024_20221127_143802-9ab177f6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024/mask2former_swin-s_8xb2-90k_cityscapes-512x1024_20221127_143802.json)) | -| Mask2Former | Swin-B (in22k) | 512x1024 | 90000 | 11152 | 4.32 | 83.52 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221203_045030-9a86a225.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221203_045030.json)) | -| Mask2Former | Swin-L (in22k) | 512x1024 | 90000 | 16207 | 2.86 | 83.65 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221202_141901-28ad20f1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221202_141901.json)) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ----------- | -------------- | --------- | ------- | -------: | -------------- | ------ | ----- | ------------: | --------------------------------------------------------------------------------------------------------------------------------------------------------: | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Mask2Former | R-50-D32 | 512x1024 | 90000 | 5.67 | 9.17 | A100 | 80.44 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024/mask2former_r50_8xb2-90k_cityscapes-512x1024_20221202_140802-ffd9d750.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024/mask2former_r50_8xb2-90k_cityscapes-512x1024_20221202_140802.json) | +| Mask2Former | R-101-D32 | 512x1024 | 90000 | 6.81 | 7.11 | A100 | 80.80 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024/mask2former_r101_8xb2-90k_cityscapes-512x1024_20221130_031628-43e68666.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024/mask2former_r101_8xb2-90k_cityscapes-512x1024_20221130_031628.json)) | +| Mask2Former | Swin-T | 512x1024 | 90000 | 6.36 | 7.18 | A100 | 81.71 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024/mask2former_swin-t_8xb2-90k_cityscapes-512x1024_20221127_144501-36c59341.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024/mask2former_swin-t_8xb2-90k_cityscapes-512x1024_20221127_144501.json)) | +| Mask2Former | Swin-S | 512x1024 | 90000 | 8.09 | 5.57 | A100 | 82.57 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024/mask2former_swin-s_8xb2-90k_cityscapes-512x1024_20221127_143802-9ab177f6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024/mask2former_swin-s_8xb2-90k_cityscapes-512x1024_20221127_143802.json)) | +| Mask2Former | Swin-B (in22k) | 512x1024 | 90000 | 10.89 | 4.32 | A100 | 83.52 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221203_045030-9a86a225.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221203_045030.json)) | +| Mask2Former | Swin-L (in22k) | 512x1024 | 90000 | 15.83 | 2.86 | A100 | 83.65 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221202_141901-28ad20f1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221202_141901.json)) | ### ADE20K -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ----------- | -------------- | --------- | ------- | -------: | -------------- | ----- | ------------: | -------------------------------------------------------------------------------------------------------------------------------------------------------: | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| Mask2Former | R-50-D32 | 512x512 | 160000 | 3385 | 26.59 | 47.87 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512/mask2former_r50_8xb2-160k_ade20k-512x512_20221204_000055-2d1f55f1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512/mask2former_r50_8xb2-160k_ade20k-512x512_20221204_000055.json)) | -| Mask2Former | R-101-D32 | 512x512 | 160000 | 4190 | 22.97 | 48.60 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512/mask2former_r101_8xb2-160k_ade20k-512x512_20221203_233905-b7135890.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512/mask2former_r101_8xb2-160k_ade20k-512x512_20221203_233905.json)) | -| Mask2Former | Swin-T | 512x512 | 160000 | 3826 | 23.82 | 48.66 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512/mask2former_swin-t_8xb2-160k_ade20k-512x512_20221203_234230-7d64e5dd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512/mask2former_swin-t_8xb2-160k_ade20k-512x512_20221203_234230.json)) | -| Mask2Former | Swin-S | 512x512 | 160000 | 5034 | 19.69 | 51.24 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512/mask2former_swin-s_8xb2-160k_ade20k-512x512_20221204_143905-e715144e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512/mask2former_swin-s_8xb2-160k_ade20k-512x512_20221204_143905.json)) | -| Mask2Former | Swin-B | 640x640 | 160000 | 5795 | 12.48 | 52.44 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640_20221129_125118-a4a086d2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640_20221129_125118.json)) | -| Mask2Former | Swin-B (in22k) | 640x640 | 160000 | 5795 | 12.43 | 53.90 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235230-7ec0f569.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235230.json)) | -| Mask2Former | Swin-L (in22k) | 640x640 | 160000 | 9077 | 8.81 | 56.01 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235933-7120c214.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235933.json)) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ----------- | -------------- | --------- | ------- | -------: | -------------- | ------ | ----- | ------------: | ----------------------------------------------------------------------------------------------------------------------------------------------------: | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Mask2Former | R-50-D32 | 512x512 | 160000 | 3.31 | 26.59 | A100 | 47.87 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512/mask2former_r50_8xb2-160k_ade20k-512x512_20221204_000055-2d1f55f1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512/mask2former_r50_8xb2-160k_ade20k-512x512_20221204_000055.json)) | +| Mask2Former | R-101-D32 | 512x512 | 160000 | 4.09 | 22.97 | A100 | 48.60 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512/mask2former_r101_8xb2-160k_ade20k-512x512_20221203_233905-b7135890.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512/mask2former_r101_8xb2-160k_ade20k-512x512_20221203_233905.json)) | +| Mask2Former | Swin-T | 512x512 | 160000 | 3826 | 23.82 | A100 | 48.66 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512/mask2former_swin-t_8xb2-160k_ade20k-512x512_20221203_234230-7d64e5dd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512/mask2former_swin-t_8xb2-160k_ade20k-512x512_20221203_234230.json)) | +| Mask2Former | Swin-S | 512x512 | 160000 | 3.74 | 19.69 | A100 | 51.24 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512/mask2former_swin-s_8xb2-160k_ade20k-512x512_20221204_143905-e715144e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512/mask2former_swin-s_8xb2-160k_ade20k-512x512_20221204_143905.json)) | +| Mask2Former | Swin-B | 640x640 | 160000 | 5.66 | 12.48 | A100 | 52.44 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640_20221129_125118-a4a086d2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640_20221129_125118.json)) | +| Mask2Former | Swin-B (in22k) | 640x640 | 160000 | 5.66 | 12.43 | A100 | 53.90 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235230-7ec0f569.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235230.json)) | +| Mask2Former | Swin-L (in22k) | 640x640 | 160000 | 8.86 | 8.81 | A100 | 56.01 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235933-7120c214.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235933.json)) | Note: @@ -70,3 +55,20 @@ Note: - As mentioned at [the official repo](https://github.com/facebookresearch/Mask2Former/issues/5), the results of Mask2Former are relatively not stable, the result of Mask2Former(swin-s) on ADE20K dataset in the table is the medium result obtained by training 5 times following the suggestion of the author. - The ResNet backbones utilized in MaskFormer models are standard `ResNet` rather than `ResNetV1c`. - Test time augmentation is not supported in MMSegmentation 1.x version yet, we would add "ms+flip" results as soon as possible. + +## Citation + +```bibtex +@inproceedings{cheng2021mask2former, + title={Masked-attention Mask Transformer for Universal Image Segmentation}, + author={Bowen Cheng and Ishan Misra and Alexander G. Schwing and Alexander Kirillov and Rohit Girdhar}, + journal={CVPR}, + year={2022} +} +@inproceedings{cheng2021maskformer, + title={Per-Pixel Classification is Not All You Need for Semantic Segmentation}, + author={Bowen Cheng and Alexander G. Schwing and Alexander Kirillov}, + journal={NeurIPS}, + year={2021} +} +``` diff --git a/configs/mask2former/mask2former.yml b/configs/mask2former/mask2former.yml deleted file mode 100644 index 4e33766c70..0000000000 --- a/configs/mask2former/mask2former.yml +++ /dev/null @@ -1,290 +0,0 @@ -Collections: -- Name: Mask2Former - Metadata: - Training Data: - - Usage - - Cityscapes - - ADE20K - Paper: - URL: https://arxiv.org/abs/2112.01527 - Title: Masked-attention Mask Transformer for Universal Image Segmentation - README: configs/mask2former/README.md - Code: - URL: https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/dense_heads/mask2former_head.py - Version: 3.x - Converted From: - Code: https://github.com/facebookresearch/Mask2Former -Models: -- Name: mask2former_r50_8xb2-90k_cityscapes-512x1024 - In Collection: Mask2Former - Metadata: - backbone: R-50-D32 - crop size: (512,1024) - lr schd: 90000 - inference time (ms/im): - - value: 109.05 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 5806.0 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 80.44 - Config: configs/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024/mask2former_r50_8xb2-90k_cityscapes-512x1024_20221202_140802-ffd9d750.pth -- Name: mask2former_r101_8xb2-90k_cityscapes-512x1024 - In Collection: Mask2Former - Metadata: - backbone: R-101-D32 - crop size: (512,1024) - lr schd: 90000 - inference time (ms/im): - - value: 140.65 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 6971.0 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 80.8 - Config: configs/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024/mask2former_r101_8xb2-90k_cityscapes-512x1024_20221130_031628-43e68666.pth -- Name: mask2former_swin-t_8xb2-90k_cityscapes-512x1024 - In Collection: Mask2Former - Metadata: - backbone: Swin-T - crop size: (512,1024) - lr schd: 90000 - inference time (ms/im): - - value: 139.28 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 6511.0 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 81.71 - Config: configs/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024/mask2former_swin-t_8xb2-90k_cityscapes-512x1024_20221127_144501-36c59341.pth -- Name: mask2former_swin-s_8xb2-90k_cityscapes-512x1024 - In Collection: Mask2Former - Metadata: - backbone: Swin-S - crop size: (512,1024) - lr schd: 90000 - inference time (ms/im): - - value: 179.53 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 8282.0 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 82.57 - Config: configs/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024/mask2former_swin-s_8xb2-90k_cityscapes-512x1024_20221127_143802-9ab177f6.pth -- Name: mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024 - In Collection: Mask2Former - Metadata: - backbone: Swin-B (in22k) - crop size: (512,1024) - lr schd: 90000 - inference time (ms/im): - - value: 231.48 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 11152.0 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 83.52 - Config: configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221203_045030-9a86a225.pth -- Name: mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024 - In Collection: Mask2Former - Metadata: - backbone: Swin-L (in22k) - crop size: (512,1024) - lr schd: 90000 - inference time (ms/im): - - value: 349.65 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 16207.0 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 83.65 - Config: configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221202_141901-28ad20f1.pth -- Name: mask2former_r50_8xb2-160k_ade20k-512x512 - In Collection: Mask2Former - Metadata: - backbone: R-50-D32 - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 37.61 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 3385.0 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 47.87 - Config: configs/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512/mask2former_r50_8xb2-160k_ade20k-512x512_20221204_000055-2d1f55f1.pth -- Name: mask2former_r101_8xb2-160k_ade20k-512x512 - In Collection: Mask2Former - Metadata: - backbone: R-101-D32 - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 43.54 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 4190.0 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 48.6 - Config: configs/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512/mask2former_r101_8xb2-160k_ade20k-512x512_20221203_233905-b7135890.pth -- Name: mask2former_swin-t_8xb2-160k_ade20k-512x512 - In Collection: Mask2Former - Metadata: - backbone: Swin-T - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 41.98 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 3826.0 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 48.66 - Config: configs/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512/mask2former_swin-t_8xb2-160k_ade20k-512x512_20221203_234230-7d64e5dd.pth -- Name: mask2former_swin-s_8xb2-160k_ade20k-512x512 - In Collection: Mask2Former - Metadata: - backbone: Swin-S - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 50.79 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 5034.0 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 51.24 - Config: configs/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512/mask2former_swin-s_8xb2-160k_ade20k-512x512_20221204_143905-e715144e.pth -- Name: mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640 - In Collection: Mask2Former - Metadata: - backbone: Swin-B - crop size: (640,640) - lr schd: 160000 - inference time (ms/im): - - value: 80.13 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (640,640) - Training Memory (GB): 5795.0 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 52.44 - Config: configs/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640_20221129_125118-a4a086d2.pth -- Name: mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640 - In Collection: Mask2Former - Metadata: - backbone: Swin-B (in22k) - crop size: (640,640) - lr schd: 160000 - inference time (ms/im): - - value: 80.45 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (640,640) - Training Memory (GB): 5795.0 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 53.9 - Config: configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235230-7ec0f569.pth -- Name: mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640 - In Collection: Mask2Former - Metadata: - backbone: Swin-L (in22k) - crop size: (640,640) - lr schd: 160000 - inference time (ms/im): - - value: 113.51 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (640,640) - Training Memory (GB): 9077.0 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 56.01 - Config: configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235933-7120c214.pth diff --git a/configs/mask2former/metafile.yaml b/configs/mask2former/metafile.yaml new file mode 100644 index 0000000000..090c95e7cf --- /dev/null +++ b/configs/mask2former/metafile.yaml @@ -0,0 +1,314 @@ +Collections: +- Name: Mask2Former + License: Apache License 2.0 + Metadata: + Training Data: + - Usage + - Cityscapes + - ADE20K + Paper: + Title: Masked-attention Mask Transformer for Universal Image Segmentation + URL: https://arxiv.org/abs/2112.01527 + README: configs/mask2former/README.md + Frameworks: + - PyTorch +Models: +- Name: mask2former_r50_8xb2-90k_cityscapes-512x1024 + In Collection: Mask2Former + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.44 + Config: configs/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - R-50-D32 + - Mask2Former + Training Resources: 8x A100 GPUS + Memory (GB): 5.67 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024/mask2former_r50_8xb2-90k_cityscapes-512x1024_20221202_140802-ffd9d750.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024/mask2former_r50_8xb2-90k_cityscapes-512x1024_20221202_140802.json + Paper: + Title: Masked-attention Mask Transformer for Universal Image Segmentation + URL: https://arxiv.org/abs/2112.01527 + Code: https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/dense_heads/mask2former_head.py + Framework: PyTorch +- Name: mask2former_r101_8xb2-90k_cityscapes-512x1024 + In Collection: Mask2Former + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.8 + Config: configs/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - R-101-D32 + - Mask2Former + Training Resources: 8x A100 GPUS + Memory (GB): 6.81 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024/mask2former_r101_8xb2-90k_cityscapes-512x1024_20221130_031628-43e68666.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-90k_cityscapes-512x1024/mask2former_r101_8xb2-90k_cityscapes-512x1024_20221130_031628.json + Paper: + Title: Masked-attention Mask Transformer for Universal Image Segmentation + URL: https://arxiv.org/abs/2112.01527 + Code: https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/dense_heads/mask2former_head.py + Framework: PyTorch +- Name: mask2former_swin-t_8xb2-90k_cityscapes-512x1024 + In Collection: Mask2Former + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 81.71 + Config: configs/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - Swin-T + - Mask2Former + Training Resources: 8x A100 GPUS + Memory (GB): 6.36 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024/mask2former_swin-t_8xb2-90k_cityscapes-512x1024_20221127_144501-36c59341.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-90k_cityscapes-512x1024/mask2former_swin-t_8xb2-90k_cityscapes-512x1024_20221127_144501.json + Paper: + Title: Masked-attention Mask Transformer for Universal Image Segmentation + URL: https://arxiv.org/abs/2112.01527 + Code: https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/dense_heads/mask2former_head.py + Framework: PyTorch +- Name: mask2former_swin-s_8xb2-90k_cityscapes-512x1024 + In Collection: Mask2Former + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 82.57 + Config: configs/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - Swin-S + - Mask2Former + Training Resources: 8x A100 GPUS + Memory (GB): 8.09 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024/mask2former_swin-s_8xb2-90k_cityscapes-512x1024_20221127_143802-9ab177f6.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-90k_cityscapes-512x1024/mask2former_swin-s_8xb2-90k_cityscapes-512x1024_20221127_143802.json + Paper: + Title: Masked-attention Mask Transformer for Universal Image Segmentation + URL: https://arxiv.org/abs/2112.01527 + Code: https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/dense_heads/mask2former_head.py + Framework: PyTorch +- Name: mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024 + In Collection: Mask2Former + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 83.52 + Config: configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - Swin-B + - Mask2Former + Training Resources: 8x A100 GPUS + Memory (GB): 10.89 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221203_045030-9a86a225.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-b-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221203_045030.json + Paper: + Title: Masked-attention Mask Transformer for Universal Image Segmentation + URL: https://arxiv.org/abs/2112.01527 + Code: https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/dense_heads/mask2former_head.py + Framework: PyTorch +- Name: mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024 + In Collection: Mask2Former + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 83.65 + Config: configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - Swin-L + - Mask2Former + Training Resources: 8x A100 GPUS + Memory (GB): 15.83 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221202_141901-28ad20f1.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024/mask2former_swin-l-in22k-384x384-pre_8xb2-90k_cityscapes-512x1024_20221202_141901.json + Paper: + Title: Masked-attention Mask Transformer for Universal Image Segmentation + URL: https://arxiv.org/abs/2112.01527 + Code: https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/dense_heads/mask2former_head.py + Framework: PyTorch +- Name: mask2former_r50_8xb2-160k_ade20k-512x512 + In Collection: Mask2Former + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 47.87 + Config: configs/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D32 + - Mask2Former + Training Resources: 8x A100 GPUS + Memory (GB): 3.31 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512/mask2former_r50_8xb2-160k_ade20k-512x512_20221204_000055-2d1f55f1.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r50_8xb2-160k_ade20k-512x512/mask2former_r50_8xb2-160k_ade20k-512x512_20221204_000055.json + Paper: + Title: Masked-attention Mask Transformer for Universal Image Segmentation + URL: https://arxiv.org/abs/2112.01527 + Code: https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/dense_heads/mask2former_head.py + Framework: PyTorch +- Name: mask2former_r101_8xb2-160k_ade20k-512x512 + In Collection: Mask2Former + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 48.6 + Config: configs/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D32 + - Mask2Former + Training Resources: 8x A100 GPUS + Memory (GB): 4.09 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512/mask2former_r101_8xb2-160k_ade20k-512x512_20221203_233905-b7135890.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_r101_8xb2-160k_ade20k-512x512/mask2former_r101_8xb2-160k_ade20k-512x512_20221203_233905.json + Paper: + Title: Masked-attention Mask Transformer for Universal Image Segmentation + URL: https://arxiv.org/abs/2112.01527 + Code: https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/dense_heads/mask2former_head.py + Framework: PyTorch +- Name: mask2former_swin-t_8xb2-160k_ade20k-512x512 + In Collection: Mask2Former + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 48.66 + Config: configs/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-T + - Mask2Former + Training Resources: 8x A100 GPUS + Memory (GB): 3826.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512/mask2former_swin-t_8xb2-160k_ade20k-512x512_20221203_234230-7d64e5dd.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-t_8xb2-160k_ade20k-512x512/mask2former_swin-t_8xb2-160k_ade20k-512x512_20221203_234230.json + Paper: + Title: Masked-attention Mask Transformer for Universal Image Segmentation + URL: https://arxiv.org/abs/2112.01527 + Code: https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/dense_heads/mask2former_head.py + Framework: PyTorch +- Name: mask2former_swin-s_8xb2-160k_ade20k-512x512 + In Collection: Mask2Former + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 51.24 + Config: configs/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-S + - Mask2Former + Training Resources: 8x A100 GPUS + Memory (GB): 3.74 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512/mask2former_swin-s_8xb2-160k_ade20k-512x512_20221204_143905-e715144e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-s_8xb2-160k_ade20k-512x512/mask2former_swin-s_8xb2-160k_ade20k-512x512_20221204_143905.json + Paper: + Title: Masked-attention Mask Transformer for Universal Image Segmentation + URL: https://arxiv.org/abs/2112.01527 + Code: https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/dense_heads/mask2former_head.py + Framework: PyTorch +- Name: mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640 + In Collection: Mask2Former + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 52.44 + Config: configs/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-B + - Mask2Former + Training Resources: 8x A100 GPUS + Memory (GB): 5.66 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640_20221129_125118-a4a086d2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640_20221129_125118.json + Paper: + Title: Masked-attention Mask Transformer for Universal Image Segmentation + URL: https://arxiv.org/abs/2112.01527 + Code: https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/dense_heads/mask2former_head.py + Framework: PyTorch +- Name: mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640 + In Collection: Mask2Former + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 53.9 + Config: configs/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-B + - Mask2Former + Training Resources: 8x A100 GPUS + Memory (GB): 5.66 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235230-7ec0f569.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-b-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235230.json + Paper: + Title: Masked-attention Mask Transformer for Universal Image Segmentation + URL: https://arxiv.org/abs/2112.01527 + Code: https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/dense_heads/mask2former_head.py + Framework: PyTorch +- Name: mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640 + In Collection: Mask2Former + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 56.01 + Config: configs/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-L + - Mask2Former + Training Resources: 8x A100 GPUS + Memory (GB): 8.86 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235933-7120c214.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mask2former/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640/mask2former_swin-l-in22k-384x384-pre_8xb2-160k_ade20k-640x640_20221203_235933.json + Paper: + Title: Masked-attention Mask Transformer for Universal Image Segmentation + URL: https://arxiv.org/abs/2112.01527 + Code: https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/dense_heads/mask2former_head.py + Framework: PyTorch diff --git a/configs/maskformer/README.md b/configs/maskformer/README.md index 0248dbb63c..a899bac090 100644 --- a/configs/maskformer/README.md +++ b/configs/maskformer/README.md @@ -1,6 +1,6 @@ # MaskFormer -[MaskFormer: Per-Pixel Classification is Not All You Need for Semantic Segmentation](https://arxiv.org/abs/2107.06278) +> [MaskFormer: Per-Pixel Classification is Not All You Need for Semantic Segmentation](https://arxiv.org/abs/2107.06278) ## Introduction @@ -22,17 +22,6 @@ Modern approaches typically formulate semantic segmentation as a per-pixel class -```bibtex -@article{cheng2021per, - title={Per-pixel classification is not all you need for semantic segmentation}, - author={Cheng, Bowen and Schwing, Alex and Kirillov, Alexander}, - journal={Advances in Neural Information Processing Systems}, - volume={34}, - pages={17864--17875}, - year={2021} -} -``` - ### Usage - MaskFormer model needs to install [MMDetection](https://github.com/open-mmlab/mmdetection) first. @@ -45,12 +34,12 @@ pip install "mmdet>=3.0.0rc4" ### ADE20K -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ---------- | --------- | --------- | ------- | -------- | -------------- | ----- | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| MaskFormer | R-50-D32 | 512x512 | 160000 | 3.29 | 42.20 | 44.29 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512/maskformer_r50-d32_8xb2-160k_ade20k-512x512_20221030_182724-3a9cfe45.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512/maskformer_r50-d32_8xb2-160k_ade20k-512x512_20221030_182724.json) | -| MaskFormer | R-101-D32 | 512x512 | 160000 | 4.12 | 34.90 | 45.11 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512/maskformer_r101-d32_8xb2-160k_ade20k-512x512_20221031_223053-84adbfcb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512/maskformer_r101-d32_8xb2-160k_ade20k-512x512_20221031_223053.json) | -| MaskFormer | Swin-T | 512x512 | 160000 | 3.73 | 40.53 | 46.69 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512_20221114_232813-f14e7ce0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512_20221114_232813.json) | -| MaskFormer | Swin-S | 512x512 | 160000 | 5.33 | 26.98 | 49.36 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512_20221115_114710-723512c7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512_20221115_114710.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | --------- | --------- | ------- | -------- | -------------- | ------ | ----- | ------------- | ----------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| MaskFormer | R-50-D32 | 512x512 | 160000 | 3.29 | A100 | 42.20 | 44.29 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512/maskformer_r50-d32_8xb2-160k_ade20k-512x512_20221030_182724-3a9cfe45.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512/maskformer_r50-d32_8xb2-160k_ade20k-512x512_20221030_182724.json) | +| MaskFormer | R-101-D32 | 512x512 | 160000 | 4.12 | A100 | 34.90 | 45.11 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512/maskformer_r101-d32_8xb2-160k_ade20k-512x512_20221031_223053-84adbfcb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512/maskformer_r101-d32_8xb2-160k_ade20k-512x512_20221031_223053.json) | +| MaskFormer | Swin-T | 512x512 | 160000 | 3.73 | A100 | 40.53 | 46.69 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512_20221114_232813-f14e7ce0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512_20221114_232813.json) | +| MaskFormer | Swin-S | 512x512 | 160000 | 5.33 | A100 | 26.98 | 49.36 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512_20221115_114710-723512c7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512_20221115_114710.json) | Note: @@ -58,3 +47,16 @@ Note: - The results of MaskFormer are relatively not stable. The accuracy (mIoU) of model with `R-101-D32` is from 44.7 to 46.0, and with `Swin-S` is from 49.0 to 49.8. - The ResNet backbones utilized in MaskFormer models are standard `ResNet` rather than `ResNetV1c`. - Test time augmentation is not supported in MMSegmentation 1.x version yet, we would add "ms+flip" results as soon as possible. + +## Citation + +```bibtex +@article{cheng2021per, + title={Per-pixel classification is not all you need for semantic segmentation}, + author={Cheng, Bowen and Schwing, Alex and Kirillov, Alexander}, + journal={Advances in Neural Information Processing Systems}, + volume={34}, + pages={17864--17875}, + year={2021} +} +``` diff --git a/configs/maskformer/maskformer.yml b/configs/maskformer/maskformer.yml deleted file mode 100644 index b499476a50..0000000000 --- a/configs/maskformer/maskformer.yml +++ /dev/null @@ -1,101 +0,0 @@ -Collections: -- Name: MaskFormer - Metadata: - Training Data: - - Usage - - ADE20K - Paper: - URL: https://arxiv.org/abs/2107.06278 - Title: 'MaskFormer: Per-Pixel Classification is Not All You Need for Semantic - Segmentation' - README: configs/maskformer/README.md - Code: - URL: https://github.com/open-mmlab/mmdetection/blob/dev-3.x/mmdet/models/dense_heads/maskformer_head.py#L21 - Version: dev-3.x - Converted From: - Code: https://github.com/facebookresearch/MaskFormer/ -Models: -- Name: maskformer_r50-d32_8xb2-160k_ade20k-512x512 - In Collection: MaskFormer - Metadata: - backbone: R-50-D32 - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 23.7 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 3.29 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 44.29 - Config: configs/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512/maskformer_r50-d32_8xb2-160k_ade20k-512x512_20221030_182724-3a9cfe45.pth -- Name: maskformer_r101-d32_8xb2-160k_ade20k-512x512 - In Collection: MaskFormer - Metadata: - backbone: R-101-D32 - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 28.65 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 4.12 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 45.11 - Config: configs/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512/maskformer_r101-d32_8xb2-160k_ade20k-512x512_20221031_223053-84adbfcb.pth -- Name: maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512 - In Collection: MaskFormer - Metadata: - backbone: Swin-T - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 24.67 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 3.73 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 46.69 - Config: configs/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512_20221114_232813-f14e7ce0.pth -- Name: maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512 - In Collection: MaskFormer - Metadata: - backbone: Swin-S - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 37.06 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 5.33 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 49.36 - Config: configs/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512_20221115_114710-723512c7.pth diff --git a/configs/maskformer/metafile.yaml b/configs/maskformer/metafile.yaml new file mode 100644 index 0000000000..c9853e131f --- /dev/null +++ b/configs/maskformer/metafile.yaml @@ -0,0 +1,111 @@ +Collections: +- Name: MaskFormer + License: Apache License 2.0 + Metadata: + Training Data: + - Usage + - ADE20K + Paper: + Title: 'MaskFormer: Per-Pixel Classification is Not All You Need for Semantic + Segmentation' + URL: https://arxiv.org/abs/2107.06278 + README: configs/maskformer/README.md + Frameworks: + - PyTorch +Models: +- Name: maskformer_r50-d32_8xb2-160k_ade20k-512x512 + In Collection: MaskFormer + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 44.29 + Config: configs/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D32 + - MaskFormer + Training Resources: 8x 42.20 GPUS + Memory (GB): 3.29 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512/maskformer_r50-d32_8xb2-160k_ade20k-512x512_20221030_182724-3a9cfe45.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r50-d32_8xb2-160k_ade20k-512x512/maskformer_r50-d32_8xb2-160k_ade20k-512x512_20221030_182724.json + Paper: + Title: 'MaskFormer: Per-Pixel Classification is Not All You Need for Semantic + Segmentation' + URL: https://arxiv.org/abs/2107.06278 + Code: https://github.com/open-mmlab/mmdetection/blob/dev-3.x/mmdet/models/dense_heads/maskformer_head.py#L21 + Framework: PyTorch +- Name: maskformer_r101-d32_8xb2-160k_ade20k-512x512 + In Collection: MaskFormer + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.11 + Config: configs/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D32 + - MaskFormer + Training Resources: 8x 34.90 GPUS + Memory (GB): 4.12 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512/maskformer_r101-d32_8xb2-160k_ade20k-512x512_20221031_223053-84adbfcb.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_r101-d32_8xb2-160k_ade20k-512x512/maskformer_r101-d32_8xb2-160k_ade20k-512x512_20221031_223053.json + Paper: + Title: 'MaskFormer: Per-Pixel Classification is Not All You Need for Semantic + Segmentation' + URL: https://arxiv.org/abs/2107.06278 + Code: https://github.com/open-mmlab/mmdetection/blob/dev-3.x/mmdet/models/dense_heads/maskformer_head.py#L21 + Framework: PyTorch +- Name: maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512 + In Collection: MaskFormer + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 46.69 + Config: configs/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-T + - MaskFormer + Training Resources: 8x 40.53 GPUS + Memory (GB): 3.73 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512_20221114_232813-f14e7ce0.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-t_upernet_8xb2-160k_ade20k-512x512_20221114_232813.json + Paper: + Title: 'MaskFormer: Per-Pixel Classification is Not All You Need for Semantic + Segmentation' + URL: https://arxiv.org/abs/2107.06278 + Code: https://github.com/open-mmlab/mmdetection/blob/dev-3.x/mmdet/models/dense_heads/maskformer_head.py#L21 + Framework: PyTorch +- Name: maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512 + In Collection: MaskFormer + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 49.36 + Config: configs/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-S + - MaskFormer + Training Resources: 8x 26.98 GPUS + Memory (GB): 5.33 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512_20221115_114710-723512c7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/maskformer/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512/maskformer_swin-s_upernet_8xb2-160k_ade20k-512x512_20221115_114710.json + Paper: + Title: 'MaskFormer: Per-Pixel Classification is Not All You Need for Semantic + Segmentation' + URL: https://arxiv.org/abs/2107.06278 + Code: https://github.com/open-mmlab/mmdetection/blob/dev-3.x/mmdet/models/dense_heads/maskformer_head.py#L21 + Framework: PyTorch diff --git a/configs/mobilenet_v2/README.md b/configs/mobilenet_v2/README.md index 30f1fe3ce2..bff5259129 100644 --- a/configs/mobilenet_v2/README.md +++ b/configs/mobilenet_v2/README.md @@ -1,6 +1,6 @@ # MobileNetV2 -[MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381) +> [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381) ## Introduction @@ -23,6 +23,26 @@ The MobileNetV2 architecture is based on an inverted residual structure where th +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------: | -------------- | ------ | ----: | ------------- | --------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | M-V2-D8 | 512x1024 | 80000 | 3.4 | 14.2 | A100 | 71.19 | 73.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024-20230224_185436-13fef4ea.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024_20230224_185436.json) | +| PSPNet | M-V2-D8 | 512x1024 | 80000 | 3.6 | 11.2 | V100 | 70.23 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mobilenet_v2/mobilenet-v2-d8_pspnet_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes/pspnet_m-v2-d8_512x1024_80k_cityscapes_20200825_124817-19e81d51.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes/pspnet_m-v2-d8_512x1024_80k_cityscapes-20200825_124817.log.json) | +| DeepLabV3 | M-V2-D8 | 512x1024 | 80000 | 3.9 | 8.4 | V100 | 73.84 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes/deeplabv3_m-v2-d8_512x1024_80k_cityscapes_20200825_124836-bef03590.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes/deeplabv3_m-v2-d8_512x1024_80k_cityscapes-20200825_124836.log.json) | +| DeepLabV3+ | M-V2-D8 | 512x1024 | 80000 | 5.1 | 8.4 | V100 | 75.20 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes_20200825_124836-d256dd4b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes-20200825_124836.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------: | -------------- | ------ | ----: | ------------- | ----------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | M-V2-D8 | 512x512 | 160000 | 6.5 | 64.4 | V100 | 19.71 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x512_160k_ade20k/fcn_m-v2-d8_512x512_160k_ade20k_20200825_214953-c40e1095.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x512_160k_ade20k/fcn_m-v2-d8_512x512_160k_ade20k-20200825_214953.log.json) | +| PSPNet | M-V2-D8 | 512x512 | 160000 | 6.5 | 57.7 | V100 | 29.68 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mobilenet_v2/mobilenet-v2-d8_pspnet_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x512_160k_ade20k/pspnet_m-v2-d8_512x512_160k_ade20k_20200825_214953-f5942f7a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x512_160k_ade20k/pspnet_m-v2-d8_512x512_160k_ade20k-20200825_214953.log.json) | +| DeepLabV3 | M-V2-D8 | 512x512 | 160000 | 6.8 | 39.9 | V100 | 34.08 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x512_160k_ade20k/deeplabv3_m-v2-d8_512x512_160k_ade20k_20200825_223255-63986343.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x512_160k_ade20k/deeplabv3_m-v2-d8_512x512_160k_ade20k-20200825_223255.log.json) | +| DeepLabV3+ | M-V2-D8 | 512x512 | 160000 | 8.2 | 43.1 | V100 | 34.02 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3plus_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x512_160k_ade20k/deeplabv3plus_m-v2-d8_512x512_160k_ade20k_20200825_223255-465a01d4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x512_160k_ade20k/deeplabv3plus_m-v2-d8_512x512_160k_ade20k-20200825_223255.log.json) | + ## Citation ```bibtex @@ -34,23 +54,3 @@ The MobileNetV2 architecture is based on an inverted residual structure where th year={2018} } ``` - -## Results and models - -### Cityscapes - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ---------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| FCN | M-V2-D8 | 512x1024 | 80000 | 3.4 | 14.2 | 71.19 | 73.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024-20230224_185436-13fef4ea.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024_20230224_185436.json) | -| PSPNet | M-V2-D8 | 512x1024 | 80000 | 3.6 | 11.2 | 70.23 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mobilenet_v2/mobilenet-v2-d8_pspnet_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes/pspnet_m-v2-d8_512x1024_80k_cityscapes_20200825_124817-19e81d51.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes/pspnet_m-v2-d8_512x1024_80k_cityscapes-20200825_124817.log.json) | -| DeepLabV3 | M-V2-D8 | 512x1024 | 80000 | 3.9 | 8.4 | 73.84 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes/deeplabv3_m-v2-d8_512x1024_80k_cityscapes_20200825_124836-bef03590.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes/deeplabv3_m-v2-d8_512x1024_80k_cityscapes-20200825_124836.log.json) | -| DeepLabV3+ | M-V2-D8 | 512x1024 | 80000 | 5.1 | 8.4 | 75.20 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes_20200825_124836-d256dd4b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes-20200825_124836.log.json) | - -### ADE20K - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ---------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| FCN | M-V2-D8 | 512x512 | 160000 | 6.5 | 64.4 | 19.71 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x512_160k_ade20k/fcn_m-v2-d8_512x512_160k_ade20k_20200825_214953-c40e1095.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x512_160k_ade20k/fcn_m-v2-d8_512x512_160k_ade20k-20200825_214953.log.json) | -| PSPNet | M-V2-D8 | 512x512 | 160000 | 6.5 | 57.7 | 29.68 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mobilenet_v2/mobilenet-v2-d8_pspnet_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x512_160k_ade20k/pspnet_m-v2-d8_512x512_160k_ade20k_20200825_214953-f5942f7a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x512_160k_ade20k/pspnet_m-v2-d8_512x512_160k_ade20k-20200825_214953.log.json) | -| DeepLabV3 | M-V2-D8 | 512x512 | 160000 | 6.8 | 39.9 | 34.08 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x512_160k_ade20k/deeplabv3_m-v2-d8_512x512_160k_ade20k_20200825_223255-63986343.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x512_160k_ade20k/deeplabv3_m-v2-d8_512x512_160k_ade20k-20200825_223255.log.json) | -| DeepLabV3+ | M-V2-D8 | 512x512 | 160000 | 8.2 | 43.1 | 34.02 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3plus_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x512_160k_ade20k/deeplabv3plus_m-v2-d8_512x512_160k_ade20k_20200825_223255-465a01d4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x512_160k_ade20k/deeplabv3plus_m-v2-d8_512x512_160k_ade20k-20200825_223255.log.json) | diff --git a/configs/mobilenet_v2/metafile.yaml b/configs/mobilenet_v2/metafile.yaml new file mode 100644 index 0000000000..119c9ae7d6 --- /dev/null +++ b/configs/mobilenet_v2/metafile.yaml @@ -0,0 +1,186 @@ +Models: +- Name: mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 71.19 + mIoU(ms+flip): 73.34 + Config: configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - M-V2-D8 + - FCN + Training Resources: 4x A100 GPUS + Memory (GB): 3.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024-20230224_185436-13fef4ea.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024_20230224_185436.json + Paper: + Title: 'MobileNetV2: Inverted Residuals and Linear Bottlenecks' + URL: https://arxiv.org/abs/1801.04381 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mobilenet_v2.py#L14 + Framework: PyTorch +- Name: mobilenet-v2-d8_pspnet_4xb2-80k_cityscapes-512x1024 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 70.23 + Config: configs/mobilenet_v2/mobilenet-v2-d8_pspnet_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - M-V2-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 3.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes/pspnet_m-v2-d8_512x1024_80k_cityscapes_20200825_124817-19e81d51.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes/pspnet_m-v2-d8_512x1024_80k_cityscapes-20200825_124817.log.json + Paper: + Title: 'MobileNetV2: Inverted Residuals and Linear Bottlenecks' + URL: https://arxiv.org/abs/1801.04381 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mobilenet_v2.py#L14 + Framework: PyTorch +- Name: mobilenet-v2-d8_deeplabv3_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.84 + Config: configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - M-V2-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 3.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes/deeplabv3_m-v2-d8_512x1024_80k_cityscapes_20200825_124836-bef03590.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes/deeplabv3_m-v2-d8_512x1024_80k_cityscapes-20200825_124836.log.json + Paper: + Title: 'MobileNetV2: Inverted Residuals and Linear Bottlenecks' + URL: https://arxiv.org/abs/1801.04381 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mobilenet_v2.py#L14 + Framework: PyTorch +- Name: mobilenet-v2-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.2 + Config: configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - M-V2-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 5.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes_20200825_124836-d256dd4b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes-20200825_124836.log.json + Paper: + Title: 'MobileNetV2: Inverted Residuals and Linear Bottlenecks' + URL: https://arxiv.org/abs/1801.04381 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mobilenet_v2.py#L14 + Framework: PyTorch +- Name: mobilenet-v2-d8_fcn_4xb4-160k_ade20k-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 19.71 + Config: configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - M-V2-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 6.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x512_160k_ade20k/fcn_m-v2-d8_512x512_160k_ade20k_20200825_214953-c40e1095.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x512_160k_ade20k/fcn_m-v2-d8_512x512_160k_ade20k-20200825_214953.log.json + Paper: + Title: 'MobileNetV2: Inverted Residuals and Linear Bottlenecks' + URL: https://arxiv.org/abs/1801.04381 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mobilenet_v2.py#L14 + Framework: PyTorch +- Name: mobilenet-v2-d8_pspnet_4xb4-160k_ade20k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 29.68 + Config: configs/mobilenet_v2/mobilenet-v2-d8_pspnet_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - M-V2-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x512_160k_ade20k/pspnet_m-v2-d8_512x512_160k_ade20k_20200825_214953-f5942f7a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x512_160k_ade20k/pspnet_m-v2-d8_512x512_160k_ade20k-20200825_214953.log.json + Paper: + Title: 'MobileNetV2: Inverted Residuals and Linear Bottlenecks' + URL: https://arxiv.org/abs/1801.04381 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mobilenet_v2.py#L14 + Framework: PyTorch +- Name: mobilenet-v2-d8_deeplabv3_4xb4-160k_ade20k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 34.08 + Config: configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - M-V2-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 6.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x512_160k_ade20k/deeplabv3_m-v2-d8_512x512_160k_ade20k_20200825_223255-63986343.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x512_160k_ade20k/deeplabv3_m-v2-d8_512x512_160k_ade20k-20200825_223255.log.json + Paper: + Title: 'MobileNetV2: Inverted Residuals and Linear Bottlenecks' + URL: https://arxiv.org/abs/1801.04381 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mobilenet_v2.py#L14 + Framework: PyTorch +- Name: mobilenet-v2-d8_deeplabv3plus_4xb4-160k_ade20k-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 34.02 + Config: configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3plus_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - M-V2-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 8.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x512_160k_ade20k/deeplabv3plus_m-v2-d8_512x512_160k_ade20k_20200825_223255-465a01d4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x512_160k_ade20k/deeplabv3plus_m-v2-d8_512x512_160k_ade20k-20200825_223255.log.json + Paper: + Title: 'MobileNetV2: Inverted Residuals and Linear Bottlenecks' + URL: https://arxiv.org/abs/1801.04381 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mobilenet_v2.py#L14 + Framework: PyTorch diff --git a/configs/mobilenet_v2/mobilenet_v2.yml b/configs/mobilenet_v2/mobilenet_v2.yml deleted file mode 100644 index 6d87401ce8..0000000000 --- a/configs/mobilenet_v2/mobilenet_v2.yml +++ /dev/null @@ -1,170 +0,0 @@ -Models: -- Name: mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024 - In Collection: FCN - Metadata: - backbone: M-V2-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 70.42 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 3.4 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 71.19 - mIoU(ms+flip): 73.34 - Config: configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024-20230224_185436-13fef4ea.pth -- Name: mobilenet-v2-d8_pspnet_4xb2-80k_cityscapes-512x1024 - In Collection: PSPNet - Metadata: - backbone: M-V2-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 89.29 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 3.6 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 70.23 - Config: configs/mobilenet_v2/mobilenet-v2-d8_pspnet_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes/pspnet_m-v2-d8_512x1024_80k_cityscapes_20200825_124817-19e81d51.pth -- Name: mobilenet-v2-d8_deeplabv3_4xb2-80k_cityscapes-512x1024 - In Collection: DeepLabV3 - Metadata: - backbone: M-V2-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 119.05 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 3.9 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 73.84 - Config: configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes/deeplabv3_m-v2-d8_512x1024_80k_cityscapes_20200825_124836-bef03590.pth -- Name: mobilenet-v2-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024 - In Collection: DeepLabV3+ - Metadata: - backbone: M-V2-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 119.05 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 5.1 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 75.2 - Config: configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes_20200825_124836-d256dd4b.pth -- Name: mobilenet-v2-d8_fcn_4xb4-160k_ade20k-512x512 - In Collection: FCN - Metadata: - backbone: M-V2-D8 - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 15.53 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 6.5 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 19.71 - Config: configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x512_160k_ade20k/fcn_m-v2-d8_512x512_160k_ade20k_20200825_214953-c40e1095.pth -- Name: mobilenet-v2-d8_pspnet_4xb4-160k_ade20k-512x512 - In Collection: PSPNet - Metadata: - backbone: M-V2-D8 - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 17.33 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 6.5 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 29.68 - Config: configs/mobilenet_v2/mobilenet-v2-d8_pspnet_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x512_160k_ade20k/pspnet_m-v2-d8_512x512_160k_ade20k_20200825_214953-f5942f7a.pth -- Name: mobilenet-v2-d8_deeplabv3_4xb4-160k_ade20k-512x512 - In Collection: DeepLabV3 - Metadata: - backbone: M-V2-D8 - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 25.06 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 6.8 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 34.08 - Config: configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x512_160k_ade20k/deeplabv3_m-v2-d8_512x512_160k_ade20k_20200825_223255-63986343.pth -- Name: mobilenet-v2-d8_deeplabv3plus_4xb4-160k_ade20k-512x512 - In Collection: DeepLabV3+ - Metadata: - backbone: M-V2-D8 - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 23.2 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 8.2 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 34.02 - Config: configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3plus_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x512_160k_ade20k/deeplabv3plus_m-v2-d8_512x512_160k_ade20k_20200825_223255-465a01d4.pth diff --git a/configs/mobilenet_v3/README.md b/configs/mobilenet_v3/README.md index c2fed06ccd..8ed0a5692a 100644 --- a/configs/mobilenet_v3/README.md +++ b/configs/mobilenet_v3/README.md @@ -1,6 +1,6 @@ # MobileNetV3 -[Searching for MobileNetV3](https://arxiv.org/abs/1905.02244) +> [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244) ## Introduction @@ -24,6 +24,17 @@ We present the next generation of MobileNets based on a combination of complemen +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------: | -------------- | ------ | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| LRASPP | M-V3-D8 | 512x1024 | 320000 | 8.9 | 15.22 | V100 | 69.54 | 70.89 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mobilenet_v3/mobilenet-v3-d8_lraspp_4xb4-320k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes/lraspp_m-v3-d8_512x1024_320k_cityscapes_20201224_220337-cfe8fb07.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes/lraspp_m-v3-d8_512x1024_320k_cityscapes-20201224_220337.log.json) | +| LRASPP | M-V3-D8 (scratch) | 512x1024 | 320000 | 8.9 | 14.77 | V100 | 67.87 | 69.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mobilenet_v3/mobilenet-v3-d8-scratch_lraspp_4xb4-320k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes_20201224_220337-9f29cd72.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes-20201224_220337.log.json) | +| LRASPP | M-V3s-D8 | 512x1024 | 320000 | 5.3 | 23.64 | V100 | 64.11 | 66.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mobilenet_v3/mobilenet-v3-d8-s_lraspp_4xb4-320k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_512x1024_320k_cityscapes/lraspp_m-v3s-d8_512x1024_320k_cityscapes_20201224_223935-61565b34.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_512x1024_320k_cityscapes/lraspp_m-v3s-d8_512x1024_320k_cityscapes-20201224_223935.log.json) | +| LRASPP | M-V3s-D8 (scratch) | 512x1024 | 320000 | 5.3 | 24.50 | V100 | 62.74 | 65.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mobilenet_v3/mobilenet-v3-d8-scratch-s_lraspp_4xb4-320k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes_20201224_223935-03daeabb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes-20201224_223935.log.json) | + ## Citation ```bibtex @@ -37,14 +48,3 @@ We present the next generation of MobileNets based on a combination of complemen doi={10.1109/ICCV.2019.00140}} } ``` - -## Results and models - -### Cityscapes - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | ------------------ | --------- | ------: | -------: | -------------- | ----: | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| LRASPP | M-V3-D8 | 512x1024 | 320000 | 8.9 | 15.22 | 69.54 | 70.89 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mobilenet_v3/mobilenet-v3-d8_lraspp_4xb4-320k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes/lraspp_m-v3-d8_512x1024_320k_cityscapes_20201224_220337-cfe8fb07.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes/lraspp_m-v3-d8_512x1024_320k_cityscapes-20201224_220337.log.json) | -| LRASPP | M-V3-D8 (scratch) | 512x1024 | 320000 | 8.9 | 14.77 | 67.87 | 69.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mobilenet_v3/mobilenet-v3-d8-scratch_lraspp_4xb4-320k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes_20201224_220337-9f29cd72.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes-20201224_220337.log.json) | -| LRASPP | M-V3s-D8 | 512x1024 | 320000 | 5.3 | 23.64 | 64.11 | 66.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mobilenet_v3/mobilenet-v3-d8-s_lraspp_4xb4-320k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_512x1024_320k_cityscapes/lraspp_m-v3s-d8_512x1024_320k_cityscapes_20201224_223935-61565b34.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_512x1024_320k_cityscapes/lraspp_m-v3s-d8_512x1024_320k_cityscapes-20201224_223935.log.json) | -| LRASPP | M-V3s-D8 (scratch) | 512x1024 | 320000 | 5.3 | 24.50 | 62.74 | 65.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mobilenet_v3/mobilenet-v3-d8-scratch-s_lraspp_4xb4-320k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes_20201224_223935-03daeabb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes-20201224_223935.log.json) | diff --git a/configs/mobilenet_v3/metafile.yaml b/configs/mobilenet_v3/metafile.yaml new file mode 100644 index 0000000000..0351d3b8e4 --- /dev/null +++ b/configs/mobilenet_v3/metafile.yaml @@ -0,0 +1,109 @@ +Collections: +- Name: LRASPP + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + Paper: + Title: Searching for MobileNetV3 + URL: https://arxiv.org/abs/1905.02244 + README: configs/mobilenet_v3/README.md + Frameworks: + - PyTorch +Models: +- Name: mobilenet-v3-d8_lraspp_4xb4-320k_cityscapes-512x1024 + In Collection: LRASPP + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 69.54 + mIoU(ms+flip): 70.89 + Config: configs/mobilenet_v3/mobilenet-v3-d8_lraspp_4xb4-320k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - M-V3-D8 + - LRASPP + Training Resources: 4x V100 GPUS + Memory (GB): 8.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes/lraspp_m-v3-d8_512x1024_320k_cityscapes_20201224_220337-cfe8fb07.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes/lraspp_m-v3-d8_512x1024_320k_cityscapes-20201224_220337.log.json + Paper: + Title: Searching for MobileNetV3 + URL: https://arxiv.org/abs/1905.02244 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mobilenet_v3.py#L15 + Framework: PyTorch +- Name: mobilenet-v3-d8-scratch_lraspp_4xb4-320k_cityscapes-512x1024 + In Collection: LRASPP + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 67.87 + mIoU(ms+flip): 69.78 + Config: configs/mobilenet_v3/mobilenet-v3-d8-scratch_lraspp_4xb4-320k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - M-V3-D8 + - LRASPP + Training Resources: 4x V100 GPUS + Memory (GB): 8.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes_20201224_220337-9f29cd72.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes-20201224_220337.log.json + Paper: + Title: Searching for MobileNetV3 + URL: https://arxiv.org/abs/1905.02244 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mobilenet_v3.py#L15 + Framework: PyTorch +- Name: mobilenet-v3-d8-s_lraspp_4xb4-320k_cityscapes-512x1024 + In Collection: LRASPP + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 64.11 + mIoU(ms+flip): 66.42 + Config: configs/mobilenet_v3/mobilenet-v3-d8-s_lraspp_4xb4-320k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - M-V3s-D8 + - LRASPP + Training Resources: 4x V100 GPUS + Memory (GB): 5.3 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_512x1024_320k_cityscapes/lraspp_m-v3s-d8_512x1024_320k_cityscapes_20201224_223935-61565b34.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_512x1024_320k_cityscapes/lraspp_m-v3s-d8_512x1024_320k_cityscapes-20201224_223935.log.json + Paper: + Title: Searching for MobileNetV3 + URL: https://arxiv.org/abs/1905.02244 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mobilenet_v3.py#L15 + Framework: PyTorch +- Name: mobilenet-v3-d8-scratch-s_lraspp_4xb4-320k_cityscapes-512x1024 + In Collection: LRASPP + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 62.74 + mIoU(ms+flip): 65.01 + Config: configs/mobilenet_v3/mobilenet-v3-d8-scratch-s_lraspp_4xb4-320k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - M-V3s-D8 + - LRASPP + Training Resources: 4x V100 GPUS + Memory (GB): 5.3 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes_20201224_223935-03daeabb.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes-20201224_223935.log.json + Paper: + Title: Searching for MobileNetV3 + URL: https://arxiv.org/abs/1905.02244 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mobilenet_v3.py#L15 + Framework: PyTorch diff --git a/configs/mobilenet_v3/mobilenet_v3.yml b/configs/mobilenet_v3/mobilenet_v3.yml deleted file mode 100644 index 067a150cea..0000000000 --- a/configs/mobilenet_v3/mobilenet_v3.yml +++ /dev/null @@ -1,103 +0,0 @@ -Collections: -- Name: LRASPP - Metadata: - Training Data: - - Cityscapes - Paper: - URL: https://arxiv.org/abs/1905.02244 - Title: Searching for MobileNetV3 - README: configs/mobilenet_v3/README.md - Code: - URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mobilenet_v3.py#L15 - Version: v0.17.0 - Converted From: - Code: https://github.com/tensorflow/models/tree/master/research/deeplab -Models: -- Name: mobilenet-v3-d8_lraspp_4xb4-320k_cityscapes-512x1024 - In Collection: LRASPP - Metadata: - backbone: M-V3-D8 - crop size: (512,1024) - lr schd: 320000 - inference time (ms/im): - - value: 65.7 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 8.9 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 69.54 - mIoU(ms+flip): 70.89 - Config: configs/mobilenet_v3/mobilenet-v3-d8_lraspp_4xb4-320k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes/lraspp_m-v3-d8_512x1024_320k_cityscapes_20201224_220337-cfe8fb07.pth -- Name: mobilenet-v3-d8-scratch_lraspp_4xb4-320k_cityscapes-512x1024 - In Collection: LRASPP - Metadata: - backbone: M-V3-D8 (scratch) - crop size: (512,1024) - lr schd: 320000 - inference time (ms/im): - - value: 67.7 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 8.9 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 67.87 - mIoU(ms+flip): 69.78 - Config: configs/mobilenet_v3/mobilenet-v3-d8-scratch_lraspp_4xb4-320k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes_20201224_220337-9f29cd72.pth -- Name: mobilenet-v3-d8-s_lraspp_4xb4-320k_cityscapes-512x1024 - In Collection: LRASPP - Metadata: - backbone: M-V3s-D8 - crop size: (512,1024) - lr schd: 320000 - inference time (ms/im): - - value: 42.3 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 5.3 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 64.11 - mIoU(ms+flip): 66.42 - Config: configs/mobilenet_v3/mobilenet-v3-d8-s_lraspp_4xb4-320k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_512x1024_320k_cityscapes/lraspp_m-v3s-d8_512x1024_320k_cityscapes_20201224_223935-61565b34.pth -- Name: mobilenet-v3-d8-scratch-s_lraspp_4xb4-320k_cityscapes-512x1024 - In Collection: LRASPP - Metadata: - backbone: M-V3s-D8 (scratch) - crop size: (512,1024) - lr schd: 320000 - inference time (ms/im): - - value: 40.82 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 5.3 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 62.74 - mIoU(ms+flip): 65.01 - Config: configs/mobilenet_v3/mobilenet-v3-d8-scratch-s_lraspp_4xb4-320k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes_20201224_223935-03daeabb.pth diff --git a/configs/nonlocal_net/README.md b/configs/nonlocal_net/README.md index 80d45ab589..4c3f49f981 100644 --- a/configs/nonlocal_net/README.md +++ b/configs/nonlocal_net/README.md @@ -1,6 +1,6 @@ # NonLocal Net -[Non-local Neural Networks](https://arxiv.org/abs/1711.07971) +> [Non-local Neural Networks](https://arxiv.org/abs/1711.07971) ## Introduction @@ -22,6 +22,39 @@ Both convolutional and recurrent operations are building blocks that process one +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ----------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------- | -------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| NonLocalNet | R-50-D8 | 512x1024 | 40000 | 7.4 | 2.72 | V100 | 78.24 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes/nonlocal_r50-d8_512x1024_40k_cityscapes_20200605_210748-c75e81e3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes/nonlocal_r50-d8_512x1024_40k_cityscapes_20200605_210748.log.json) | +| NonLocalNet | R-101-D8 | 512x1024 | 40000 | 10.9 | 1.95 | V100 | 78.66 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes/nonlocal_r101-d8_512x1024_40k_cityscapes_20200605_210748-d63729fa.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes/nonlocal_r101-d8_512x1024_40k_cityscapes_20200605_210748.log.json) | +| NonLocalNet | R-50-D8 | 769x769 | 40000 | 8.9 | 1.52 | V100 | 78.33 | 79.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes/nonlocal_r50-d8_769x769_40k_cityscapes_20200530_045243-82ef6749.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes/nonlocal_r50-d8_769x769_40k_cityscapes_20200530_045243.log.json) | +| NonLocalNet | R-101-D8 | 769x769 | 40000 | 12.8 | 1.05 | V100 | 78.57 | 80.29 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes/nonlocal_r101-d8_769x769_40k_cityscapes_20200530_045348-8fe9a9dc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes/nonlocal_r101-d8_769x769_40k_cityscapes_20200530_045348.log.json) | +| NonLocalNet | R-50-D8 | 512x1024 | 80000 | - | - | V100 | 78.01 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes/nonlocal_r50-d8_512x1024_80k_cityscapes_20200607_193518-d6839fae.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes/nonlocal_r50-d8_512x1024_80k_cityscapes_20200607_193518.log.json) | +| NonLocalNet | R-101-D8 | 512x1024 | 80000 | - | - | V100 | 78.93 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes/nonlocal_r101-d8_512x1024_80k_cityscapes_20200607_183411-32700183.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes/nonlocal_r101-d8_512x1024_80k_cityscapes_20200607_183411.log.json) | +| NonLocalNet | R-50-D8 | 769x769 | 80000 | - | - | V100 | 79.05 | 80.68 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes/nonlocal_r50-d8_769x769_80k_cityscapes_20200607_193506-1f9792f6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes/nonlocal_r50-d8_769x769_80k_cityscapes_20200607_193506.log.json) | +| NonLocalNet | R-101-D8 | 769x769 | 80000 | - | - | V100 | 79.40 | 80.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes/nonlocal_r101-d8_769x769_80k_cityscapes_20200607_183428-0e1fa4f9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes/nonlocal_r101-d8_769x769_80k_cityscapes_20200607_183428.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ----------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| NonLocalNet | R-50-D8 | 512x512 | 80000 | 9.1 | 21.37 | V100 | 40.75 | 42.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k/nonlocal_r50-d8_512x512_80k_ade20k_20200615_015801-5ae0aa33.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k/nonlocal_r50-d8_512x512_80k_ade20k_20200615_015801.log.json) | +| NonLocalNet | R-101-D8 | 512x512 | 80000 | 12.6 | 13.97 | V100 | 42.90 | 44.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k/nonlocal_r101-d8_512x512_80k_ade20k_20200615_015758-24105919.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k/nonlocal_r101-d8_512x512_80k_ade20k_20200615_015758.log.json) | +| NonLocalNet | R-50-D8 | 512x512 | 160000 | - | - | V100 | 42.03 | 43.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k/nonlocal_r50-d8_512x512_160k_ade20k_20200616_005410-baef45e3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k/nonlocal_r50-d8_512x512_160k_ade20k_20200616_005410.log.json) | +| NonLocalNet | R-101-D8 | 512x512 | 160000 | - | - | V100 | 44.63 | 45.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k/nonlocal_r101-d8_512x512_160k_ade20k_20210827_221502-7881aa1a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k/nonlocal_r101-d8_512x512_160k_ade20k_20210827_221502.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ----------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| NonLocalNet | R-50-D8 | 512x512 | 20000 | 6.4 | 21.21 | V100 | 76.20 | 77.12 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r50-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug/nonlocal_r50-d8_512x512_20k_voc12aug_20200617_222613-07f2a57c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug/nonlocal_r50-d8_512x512_20k_voc12aug_20200617_222613.log.json) | +| NonLocalNet | R-101-D8 | 512x512 | 20000 | 9.8 | 14.01 | V100 | 78.15 | 78.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r101-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug/nonlocal_r101-d8_512x512_20k_voc12aug_20200617_222615-948c68ab.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug/nonlocal_r101-d8_512x512_20k_voc12aug_20200617_222615.log.json) | +| NonLocalNet | R-50-D8 | 512x512 | 40000 | - | - | V100 | 76.65 | 77.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r50-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug/nonlocal_r50-d8_512x512_40k_voc12aug_20200614_000028-0139d4a9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug/nonlocal_r50-d8_512x512_40k_voc12aug_20200614_000028.log.json) | +| NonLocalNet | R-101-D8 | 512x512 | 40000 | - | - | V100 | 78.27 | 79.12 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net/nonlocal_r101-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug/nonlocal_r101-d8_512x512_40k_voc12aug_20200614_000028-7e5ff470.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug/nonlocal_r101-d8_512x512_40k_voc12aug_20200614_000028.log.json) | + ## Citation ```bibtex @@ -33,36 +66,3 @@ Both convolutional and recurrent operations are building blocks that process one year={2018} } ``` - -## Results and models - -### Cityscapes - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ----------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------- | ----------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| NonLocalNet | R-50-D8 | 512x1024 | 40000 | 7.4 | 2.72 | 78.24 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/nonlocal_net/nonlocal_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes/nonlocal_r50-d8_512x1024_40k_cityscapes_20200605_210748-c75e81e3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes/nonlocal_r50-d8_512x1024_40k_cityscapes_20200605_210748.log.json) | -| NonLocalNet | R-101-D8 | 512x1024 | 40000 | 10.9 | 1.95 | 78.66 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/nonlocal_net/nonlocal_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes/nonlocal_r101-d8_512x1024_40k_cityscapes_20200605_210748-d63729fa.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes/nonlocal_r101-d8_512x1024_40k_cityscapes_20200605_210748.log.json) | -| NonLocalNet | R-50-D8 | 769x769 | 40000 | 8.9 | 1.52 | 78.33 | 79.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/nonlocal_net/nonlocal_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes/nonlocal_r50-d8_769x769_40k_cityscapes_20200530_045243-82ef6749.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes/nonlocal_r50-d8_769x769_40k_cityscapes_20200530_045243.log.json) | -| NonLocalNet | R-101-D8 | 769x769 | 40000 | 12.8 | 1.05 | 78.57 | 80.29 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/nonlocal_net/nonlocal_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes/nonlocal_r101-d8_769x769_40k_cityscapes_20200530_045348-8fe9a9dc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes/nonlocal_r101-d8_769x769_40k_cityscapes_20200530_045348.log.json) | -| NonLocalNet | R-50-D8 | 512x1024 | 80000 | - | - | 78.01 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/nonlocal_net/nonlocal_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes/nonlocal_r50-d8_512x1024_80k_cityscapes_20200607_193518-d6839fae.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes/nonlocal_r50-d8_512x1024_80k_cityscapes_20200607_193518.log.json) | -| NonLocalNet | R-101-D8 | 512x1024 | 80000 | - | - | 78.93 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/nonlocal_net/nonlocal_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes/nonlocal_r101-d8_512x1024_80k_cityscapes_20200607_183411-32700183.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes/nonlocal_r101-d8_512x1024_80k_cityscapes_20200607_183411.log.json) | -| NonLocalNet | R-50-D8 | 769x769 | 80000 | - | - | 79.05 | 80.68 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/nonlocal_net/nonlocal_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes/nonlocal_r50-d8_769x769_80k_cityscapes_20200607_193506-1f9792f6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes/nonlocal_r50-d8_769x769_80k_cityscapes_20200607_193506.log.json) | -| NonLocalNet | R-101-D8 | 769x769 | 80000 | - | - | 79.40 | 80.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/nonlocal_net/nonlocal_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes/nonlocal_r101-d8_769x769_80k_cityscapes_20200607_183428-0e1fa4f9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes/nonlocal_r101-d8_769x769_80k_cityscapes_20200607_183428.log.json) | - -### ADE20K - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ----------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| NonLocalNet | R-50-D8 | 512x512 | 80000 | 9.1 | 21.37 | 40.75 | 42.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/nonlocal_net/nonlocal_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k/nonlocal_r50-d8_512x512_80k_ade20k_20200615_015801-5ae0aa33.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k/nonlocal_r50-d8_512x512_80k_ade20k_20200615_015801.log.json) | -| NonLocalNet | R-101-D8 | 512x512 | 80000 | 12.6 | 13.97 | 42.90 | 44.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/nonlocal_net/nonlocal_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k/nonlocal_r101-d8_512x512_80k_ade20k_20200615_015758-24105919.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k/nonlocal_r101-d8_512x512_80k_ade20k_20200615_015758.log.json) | -| NonLocalNet | R-50-D8 | 512x512 | 160000 | - | - | 42.03 | 43.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/nonlocal_net/nonlocal_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k/nonlocal_r50-d8_512x512_160k_ade20k_20200616_005410-baef45e3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k/nonlocal_r50-d8_512x512_160k_ade20k_20200616_005410.log.json) | -| NonLocalNet | R-101-D8 | 512x512 | 160000 | - | - | 44.63 | 45.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/nonlocal_net/nonlocal_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k/nonlocal_r101-d8_512x512_160k_ade20k_20210827_221502-7881aa1a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k/nonlocal_r101-d8_512x512_160k_ade20k_20210827_221502.log.json) | - -### Pascal VOC 2012 + Aug - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ----------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| NonLocalNet | R-50-D8 | 512x512 | 20000 | 6.4 | 21.21 | 76.20 | 77.12 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/nonlocal_net/nonlocal_r50-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug/nonlocal_r50-d8_512x512_20k_voc12aug_20200617_222613-07f2a57c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug/nonlocal_r50-d8_512x512_20k_voc12aug_20200617_222613.log.json) | -| NonLocalNet | R-101-D8 | 512x512 | 20000 | 9.8 | 14.01 | 78.15 | 78.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/nonlocal_net/nonlocal_r101-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug/nonlocal_r101-d8_512x512_20k_voc12aug_20200617_222615-948c68ab.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug/nonlocal_r101-d8_512x512_20k_voc12aug_20200617_222615.log.json) | -| NonLocalNet | R-50-D8 | 512x512 | 40000 | - | - | 76.65 | 77.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/nonlocal_net/nonlocal_r50-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug/nonlocal_r50-d8_512x512_40k_voc12aug_20200614_000028-0139d4a9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug/nonlocal_r50-d8_512x512_40k_voc12aug_20200614_000028.log.json) | -| NonLocalNet | R-101-D8 | 512x512 | 40000 | - | - | 78.27 | 79.12 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/nonlocal_net/nonlocal_r101-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug/nonlocal_r101-d8_512x512_40k_voc12aug_20200614_000028-7e5ff470.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug/nonlocal_r101-d8_512x512_40k_voc12aug_20200614_000028.log.json) | diff --git a/configs/nonlocal_net/metafile.yaml b/configs/nonlocal_net/metafile.yaml new file mode 100644 index 0000000000..69bd72570b --- /dev/null +++ b/configs/nonlocal_net/metafile.yaml @@ -0,0 +1,387 @@ +Collections: +- Name: NonLocalNet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + README: configs/nonlocal_net/README.md + Frameworks: + - PyTorch +Models: +- Name: nonlocal_r50-d8_4xb2-40k_cityscapes-512x1024 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.24 + Config: configs/nonlocal_net/nonlocal_r50-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Memory (GB): 7.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes/nonlocal_r50-d8_512x1024_40k_cityscapes_20200605_210748-c75e81e3.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes/nonlocal_r50-d8_512x1024_40k_cityscapes_20200605_210748.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch +- Name: nonlocal_r101-d8_4xb2-40k_cityscapes-512x1024 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.66 + Config: configs/nonlocal_net/nonlocal_r101-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Memory (GB): 10.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes/nonlocal_r101-d8_512x1024_40k_cityscapes_20200605_210748-d63729fa.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes/nonlocal_r101-d8_512x1024_40k_cityscapes_20200605_210748.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch +- Name: nonlocal_r50-d8_4xb2-40k_cityscapes-769x769 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.33 + mIoU(ms+flip): 79.92 + Config: configs/nonlocal_net/nonlocal_r50-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Memory (GB): 8.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes/nonlocal_r50-d8_769x769_40k_cityscapes_20200530_045243-82ef6749.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes/nonlocal_r50-d8_769x769_40k_cityscapes_20200530_045243.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch +- Name: nonlocal_r101-d8_4xb2-40k_cityscapes-769x769 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.57 + mIoU(ms+flip): 80.29 + Config: configs/nonlocal_net/nonlocal_r101-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Memory (GB): 12.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes/nonlocal_r101-d8_769x769_40k_cityscapes_20200530_045348-8fe9a9dc.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes/nonlocal_r101-d8_769x769_40k_cityscapes_20200530_045348.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch +- Name: nonlocal_r50-d8_4xb2-80k_cityscapes-512x1024 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.01 + Config: configs/nonlocal_net/nonlocal_r50-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes/nonlocal_r50-d8_512x1024_80k_cityscapes_20200607_193518-d6839fae.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes/nonlocal_r50-d8_512x1024_80k_cityscapes_20200607_193518.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch +- Name: nonlocal_r101-d8_4xb2-80k_cityscapes-512x1024 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.93 + Config: configs/nonlocal_net/nonlocal_r101-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes/nonlocal_r101-d8_512x1024_80k_cityscapes_20200607_183411-32700183.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes/nonlocal_r101-d8_512x1024_80k_cityscapes_20200607_183411.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch +- Name: nonlocal_r50-d8_4xb2-80k_cityscapes-769x769 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.05 + mIoU(ms+flip): 80.68 + Config: configs/nonlocal_net/nonlocal_r50-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes/nonlocal_r50-d8_769x769_80k_cityscapes_20200607_193506-1f9792f6.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes/nonlocal_r50-d8_769x769_80k_cityscapes_20200607_193506.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch +- Name: nonlocal_r101-d8_4xb2-80k_cityscapes-769x769 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.4 + mIoU(ms+flip): 80.85 + Config: configs/nonlocal_net/nonlocal_r101-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes/nonlocal_r101-d8_769x769_80k_cityscapes_20200607_183428-0e1fa4f9.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes/nonlocal_r101-d8_769x769_80k_cityscapes_20200607_183428.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch +- Name: nonlocal_r50-d8_4xb4-80k_ade20k-512x512 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 40.75 + mIoU(ms+flip): 42.05 + Config: configs/nonlocal_net/nonlocal_r50-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k/nonlocal_r50-d8_512x512_80k_ade20k_20200615_015801-5ae0aa33.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k/nonlocal_r50-d8_512x512_80k_ade20k_20200615_015801.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch +- Name: nonlocal_r101-d8_4xb4-80k_ade20k-512x512 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.9 + mIoU(ms+flip): 44.27 + Config: configs/nonlocal_net/nonlocal_r101-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Memory (GB): 12.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k/nonlocal_r101-d8_512x512_80k_ade20k_20200615_015758-24105919.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k/nonlocal_r101-d8_512x512_80k_ade20k_20200615_015758.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch +- Name: nonlocal_r50-d8_4xb4-160k_ade20k-512x512 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.03 + mIoU(ms+flip): 43.04 + Config: configs/nonlocal_net/nonlocal_r50-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k/nonlocal_r50-d8_512x512_160k_ade20k_20200616_005410-baef45e3.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k/nonlocal_r50-d8_512x512_160k_ade20k_20200616_005410.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch +- Name: nonlocal_r101-d8_4xb4-160k_ade20k-512x512 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 44.63 + mIoU(ms+flip): 45.79 + Config: configs/nonlocal_net/nonlocal_r101-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k/nonlocal_r101-d8_512x512_160k_ade20k_20210827_221502-7881aa1a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k/nonlocal_r101-d8_512x512_160k_ade20k_20210827_221502.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch +- Name: nonlocal_r50-d8_4xb4-20k_voc12aug-512x512 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.2 + mIoU(ms+flip): 77.12 + Config: configs/nonlocal_net/nonlocal_r50-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug/nonlocal_r50-d8_512x512_20k_voc12aug_20200617_222613-07f2a57c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug/nonlocal_r50-d8_512x512_20k_voc12aug_20200617_222613.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch +- Name: nonlocal_r101-d8_4xb4-20k_voc12aug-512x512 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 78.15 + mIoU(ms+flip): 78.86 + Config: configs/nonlocal_net/nonlocal_r101-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug/nonlocal_r101-d8_512x512_20k_voc12aug_20200617_222615-948c68ab.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug/nonlocal_r101-d8_512x512_20k_voc12aug_20200617_222615.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch +- Name: nonlocal_r50-d8_4xb4-40k_voc12aug-512x512 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.65 + mIoU(ms+flip): 77.47 + Config: configs/nonlocal_net/nonlocal_r50-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug/nonlocal_r50-d8_512x512_40k_voc12aug_20200614_000028-0139d4a9.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug/nonlocal_r50-d8_512x512_40k_voc12aug_20200614_000028.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch +- Name: nonlocal_r101-d8_4xb4-40k_voc12aug-512x512 + In Collection: NonLocalNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 78.27 + mIoU(ms+flip): 79.12 + Config: configs/nonlocal_net/nonlocal_r101-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - NonLocalNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug/nonlocal_r101-d8_512x512_40k_voc12aug_20200614_000028-7e5ff470.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug/nonlocal_r101-d8_512x512_40k_voc12aug_20200614_000028.log.json + Paper: + Title: Non-local Neural Networks + URL: https://arxiv.org/abs/1711.07971 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Framework: PyTorch diff --git a/configs/nonlocal_net/nonlocal_net.yml b/configs/nonlocal_net/nonlocal_net.yml deleted file mode 100644 index 22f32c5abb..0000000000 --- a/configs/nonlocal_net/nonlocal_net.yml +++ /dev/null @@ -1,301 +0,0 @@ -Collections: -- Name: NonLocalNet - Metadata: - Training Data: - - Cityscapes - - ADE20K - - Pascal VOC 2012 + Aug - Paper: - URL: https://arxiv.org/abs/1711.07971 - Title: Non-local Neural Networks - README: configs/nonlocal_net/README.md - Code: - URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 - Version: v0.17.0 - Converted From: - Code: https://github.com/facebookresearch/video-nonlocal-net -Models: -- Name: nonlocal_r50-d8_4xb2-40k_cityscapes-512x1024 - In Collection: NonLocalNet - Metadata: - backbone: R-50-D8 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 367.65 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 7.4 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.24 - Config: configs/nonlocal_net/nonlocal_r50-d8_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes/nonlocal_r50-d8_512x1024_40k_cityscapes_20200605_210748-c75e81e3.pth -- Name: nonlocal_r101-d8_4xb2-40k_cityscapes-512x1024 - In Collection: NonLocalNet - Metadata: - backbone: R-101-D8 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 512.82 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 10.9 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.66 - Config: configs/nonlocal_net/nonlocal_r101-d8_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes/nonlocal_r101-d8_512x1024_40k_cityscapes_20200605_210748-d63729fa.pth -- Name: nonlocal_r50-d8_4xb2-40k_cityscapes-769x769 - In Collection: NonLocalNet - Metadata: - backbone: R-50-D8 - crop size: (769,769) - lr schd: 40000 - inference time (ms/im): - - value: 657.89 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 8.9 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.33 - mIoU(ms+flip): 79.92 - Config: configs/nonlocal_net/nonlocal_r50-d8_4xb2-40k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes/nonlocal_r50-d8_769x769_40k_cityscapes_20200530_045243-82ef6749.pth -- Name: nonlocal_r101-d8_4xb2-40k_cityscapes-769x769 - In Collection: NonLocalNet - Metadata: - backbone: R-101-D8 - crop size: (769,769) - lr schd: 40000 - inference time (ms/im): - - value: 952.38 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 12.8 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.57 - mIoU(ms+flip): 80.29 - Config: configs/nonlocal_net/nonlocal_r101-d8_4xb2-40k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes/nonlocal_r101-d8_769x769_40k_cityscapes_20200530_045348-8fe9a9dc.pth -- Name: nonlocal_r50-d8_4xb2-80k_cityscapes-512x1024 - In Collection: NonLocalNet - Metadata: - backbone: R-50-D8 - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.01 - Config: configs/nonlocal_net/nonlocal_r50-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes/nonlocal_r50-d8_512x1024_80k_cityscapes_20200607_193518-d6839fae.pth -- Name: nonlocal_r101-d8_4xb2-80k_cityscapes-512x1024 - In Collection: NonLocalNet - Metadata: - backbone: R-101-D8 - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.93 - Config: configs/nonlocal_net/nonlocal_r101-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes/nonlocal_r101-d8_512x1024_80k_cityscapes_20200607_183411-32700183.pth -- Name: nonlocal_r50-d8_4xb2-80k_cityscapes-769x769 - In Collection: NonLocalNet - Metadata: - backbone: R-50-D8 - crop size: (769,769) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.05 - mIoU(ms+flip): 80.68 - Config: configs/nonlocal_net/nonlocal_r50-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes/nonlocal_r50-d8_769x769_80k_cityscapes_20200607_193506-1f9792f6.pth -- Name: nonlocal_r101-d8_4xb2-80k_cityscapes-769x769 - In Collection: NonLocalNet - Metadata: - backbone: R-101-D8 - crop size: (769,769) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.4 - mIoU(ms+flip): 80.85 - Config: configs/nonlocal_net/nonlocal_r101-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes/nonlocal_r101-d8_769x769_80k_cityscapes_20200607_183428-0e1fa4f9.pth -- Name: nonlocal_r50-d8_4xb4-80k_ade20k-512x512 - In Collection: NonLocalNet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 46.79 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 9.1 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 40.75 - mIoU(ms+flip): 42.05 - Config: configs/nonlocal_net/nonlocal_r50-d8_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k/nonlocal_r50-d8_512x512_80k_ade20k_20200615_015801-5ae0aa33.pth -- Name: nonlocal_r101-d8_4xb4-80k_ade20k-512x512 - In Collection: NonLocalNet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 71.58 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 12.6 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 42.9 - mIoU(ms+flip): 44.27 - Config: configs/nonlocal_net/nonlocal_r101-d8_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k/nonlocal_r101-d8_512x512_80k_ade20k_20200615_015758-24105919.pth -- Name: nonlocal_r50-d8_4xb4-160k_ade20k-512x512 - In Collection: NonLocalNet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 42.03 - mIoU(ms+flip): 43.04 - Config: configs/nonlocal_net/nonlocal_r50-d8_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k/nonlocal_r50-d8_512x512_160k_ade20k_20200616_005410-baef45e3.pth -- Name: nonlocal_r101-d8_4xb4-160k_ade20k-512x512 - In Collection: NonLocalNet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 44.63 - mIoU(ms+flip): 45.79 - Config: configs/nonlocal_net/nonlocal_r101-d8_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k/nonlocal_r101-d8_512x512_160k_ade20k_20210827_221502-7881aa1a.pth -- Name: nonlocal_r50-d8_4xb4-20k_voc12aug-512x512 - In Collection: NonLocalNet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 20000 - inference time (ms/im): - - value: 47.15 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 6.4 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 76.2 - mIoU(ms+flip): 77.12 - Config: configs/nonlocal_net/nonlocal_r50-d8_4xb4-20k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug/nonlocal_r50-d8_512x512_20k_voc12aug_20200617_222613-07f2a57c.pth -- Name: nonlocal_r101-d8_4xb4-20k_voc12aug-512x512 - In Collection: NonLocalNet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 20000 - inference time (ms/im): - - value: 71.38 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 9.8 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 78.15 - mIoU(ms+flip): 78.86 - Config: configs/nonlocal_net/nonlocal_r101-d8_4xb4-20k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug/nonlocal_r101-d8_512x512_20k_voc12aug_20200617_222615-948c68ab.pth -- Name: nonlocal_r50-d8_4xb4-40k_voc12aug-512x512 - In Collection: NonLocalNet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 76.65 - mIoU(ms+flip): 77.47 - Config: configs/nonlocal_net/nonlocal_r50-d8_4xb4-40k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug/nonlocal_r50-d8_512x512_40k_voc12aug_20200614_000028-0139d4a9.pth -- Name: nonlocal_r101-d8_4xb4-40k_voc12aug-512x512 - In Collection: NonLocalNet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 78.27 - mIoU(ms+flip): 79.12 - Config: configs/nonlocal_net/nonlocal_r101-d8_4xb4-40k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug/nonlocal_r101-d8_512x512_40k_voc12aug_20200614_000028-7e5ff470.pth diff --git a/configs/ocrnet/README.md b/configs/ocrnet/README.md index 4bd9c7d0b0..628a3b1597 100644 --- a/configs/ocrnet/README.md +++ b/configs/ocrnet/README.md @@ -1,6 +1,6 @@ # OCRNet -[Object-Contextual Representations for Semantic Segmentation](https://arxiv.org/abs/1909.11065) +> [Object-Contextual Representations for Semantic Segmentation](https://arxiv.org/abs/1909.11065) ## Introduction @@ -22,6 +22,54 @@ In this paper, we address the problem of semantic segmentation and focus on the +## Results and models + +### Cityscapes + +#### HRNet backbone + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| OCRNet | HRNetV2p-W18-Small | 512x1024 | 40000 | 3.5 | 10.45 | A100 | 76.61 | 78.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024_20230227_145026-6c052a14.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024_20230227_145026.json) | +| OCRNet | HRNetV2p-W18 | 512x1024 | 40000 | 4.7 | 7.50 | V100 | 77.72 | 79.49 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr18_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes/ocrnet_hr18_512x1024_40k_cityscapes_20200601_033320-401c5bdd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes/ocrnet_hr18_512x1024_40k_cityscapes_20200601_033320.log.json) | +| OCRNet | HRNetV2p-W48 | 512x1024 | 40000 | 8 | 4.22 | V100 | 80.58 | 81.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr48_4xb2-40k_cityscapes-512x1024.pyy) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes/ocrnet_hr48_512x1024_40k_cityscapes_20200601_033336-55b32491.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes/ocrnet_hr48_512x1024_40k_cityscapes_20200601_033336.log.json) | +| OCRNet | HRNetV2p-W18-Small | 512x1024 | 80000 | - | - | V100 | 77.16 | 78.66 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr18s_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes/ocrnet_hr18s_512x1024_80k_cityscapes_20200601_222735-55979e63.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes/ocrnet_hr18s_512x1024_80k_cityscapes_20200601_222735.log.json) | +| OCRNet | HRNetV2p-W18 | 512x1024 | 80000 | - | - | V100 | 78.57 | 80.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr18_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes/ocrnet_hr18_512x1024_80k_cityscapes_20200614_230521-c2e1dd4a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes/ocrnet_hr18_512x1024_80k_cityscapes_20200614_230521.log.json) | +| OCRNet | HRNetV2p-W48 | 512x1024 | 80000 | - | - | V100 | 80.70 | 81.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr48_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes/ocrnet_hr48_512x1024_80k_cityscapes_20200601_222752-9076bcdf.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes/ocrnet_hr48_512x1024_80k_cityscapes_20200601_222752.log.json) | +| OCRNet | HRNetV2p-W18-Small | 512x1024 | 160000 | - | - | V100 | 78.45 | 79.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr18s_4xb2-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes/ocrnet_hr18s_512x1024_160k_cityscapes_20200602_191005-f4a7af28.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes/ocrnet_hr18s_512x1024_160k_cityscapes_20200602_191005.log.json) | +| OCRNet | HRNetV2p-W18 | 512x1024 | 160000 | - | - | V100 | 79.47 | 80.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr18_4xb2-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001-b9172d0c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001.log.json) | +| OCRNet | HRNetV2p-W48 | 512x1024 | 160000 | - | - | V100 | 81.35 | 82.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr48_4xb2-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037-dfbf1b0c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037.log.json) | + +#### ResNet backbone + +| Method | Backbone | Crop Size | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ---------- | ------- | -------- | -------------- | ------ | ----- | ------------: | ------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| OCRNet | R-101-D8 | 512x1024 | 8 | 40000 | - | - | V100 | 80.09 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes/ocrnet_r101-d8_512x1024_40k_b8_cityscapes_20200717_110721-02ac0f13.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes/ocrnet_r101-d8_512x1024_40k_b8_cityscapes_20200717_110721.log.json) | +| OCRNet | R-101-D8 | 512x1024 | 16 | 40000 | 8.8 | 3.02 | V100 | 80.30 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_r101-d8_8xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes/ocrnet_r101-d8_512x1024_40k_b16_cityscapes_20200723_193726-db500f80.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes/ocrnet_r101-d8_512x1024_40k_b16_cityscapes_20200723_193726.log.json) | +| OCRNet | R-101-D8 | 512x1024 | 16 | 80000 | 8.8 | 3.02 | V100 | 80.81 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_r101-d8_8xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes/ocrnet_r101-d8_512x1024_80k_b16_cityscapes_20200723_192421-78688424.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes/ocrnet_r101-d8_512x1024_80k_b16_cityscapes_20200723_192421.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| OCRNet | HRNetV2p-W18-Small | 512x512 | 80000 | 6.7 | 28.98 | V100 | 35.06 | 35.80 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr18s_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_80k_ade20k/ocrnet_hr18s_512x512_80k_ade20k_20200615_055600-e80b62af.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_80k_ade20k/ocrnet_hr18s_512x512_80k_ade20k_20200615_055600.log.json) | +| OCRNet | HRNetV2p-W18 | 512x512 | 80000 | 7.9 | 18.93 | V100 | 37.79 | 39.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr18_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_80k_ade20k/ocrnet_hr18_512x512_80k_ade20k_20200615_053157-d173d83b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_80k_ade20k/ocrnet_hr18_512x512_80k_ade20k_20200615_053157.log.json) | +| OCRNet | HRNetV2p-W48 | 512x512 | 80000 | 11.2 | 16.99 | V100 | 43.00 | 44.30 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr48_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_80k_ade20k/ocrnet_hr48_512x512_80k_ade20k_20200615_021518-d168c2d1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_80k_ade20k/ocrnet_hr48_512x512_80k_ade20k_20200615_021518.log.json) | +| OCRNet | HRNetV2p-W18-Small | 512x512 | 160000 | - | - | V100 | 37.19 | 38.40 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr18s_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_160k_ade20k/ocrnet_hr18s_512x512_160k_ade20k_20200615_184505-8e913058.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_160k_ade20k/ocrnet_hr18s_512x512_160k_ade20k_20200615_184505.log.json) | +| OCRNet | HRNetV2p-W18 | 512x512 | 160000 | - | - | V100 | 39.32 | 40.80 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr18_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_160k_ade20k/ocrnet_hr18_512x512_160k_ade20k_20200615_200940-d8fcd9d1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_160k_ade20k/ocrnet_hr18_512x512_160k_ade20k_20200615_200940.log.json) | +| OCRNet | HRNetV2p-W48 | 512x512 | 160000 | - | - | V100 | 43.25 | 44.88 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr48_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_160k_ade20k/ocrnet_hr48_512x512_160k_ade20k_20200615_184705-a073726d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_160k_ade20k/ocrnet_hr48_512x512_160k_ade20k_20200615_184705.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| OCRNet | HRNetV2p-W18-Small | 512x512 | 20000 | 3.5 | 31.55 | V100 | 71.70 | 73.84 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr18s_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug/ocrnet_hr18s_512x512_20k_voc12aug_20200617_233913-02b04fcb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug/ocrnet_hr18s_512x512_20k_voc12aug_20200617_233913.log.json) | +| OCRNet | HRNetV2p-W18 | 512x512 | 20000 | 4.7 | 19.91 | V100 | 74.75 | 77.11 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr18_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_20k_voc12aug/ocrnet_hr18_512x512_20k_voc12aug_20200617_233932-8954cbb7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_20k_voc12aug/ocrnet_hr18_512x512_20k_voc12aug_20200617_233932.log.json) | +| OCRNet | HRNetV2p-W48 | 512x512 | 20000 | 8.1 | 17.83 | V100 | 77.72 | 79.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr48_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_20k_voc12aug/ocrnet_hr48_512x512_20k_voc12aug_20200617_233932-9e82080a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_20k_voc12aug/ocrnet_hr48_512x512_20k_voc12aug_20200617_233932.log.json) | +| OCRNet | HRNetV2p-W18-Small | 512x512 | 40000 | - | - | V100 | 72.76 | 74.60 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr18s_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug/ocrnet_hr18s_512x512_40k_voc12aug_20200614_002025-42b587ac.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug/ocrnet_hr18s_512x512_40k_voc12aug_20200614_002025.log.json) | +| OCRNet | HRNetV2p-W18 | 512x512 | 40000 | - | - | V100 | 74.98 | 77.40 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr18_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_40k_voc12aug/ocrnet_hr18_512x512_40k_voc12aug_20200614_015958-714302be.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_40k_voc12aug/ocrnet_hr18_512x512_40k_voc12aug_20200614_015958.log.json) | +| OCRNet | HRNetV2p-W48 | 512x512 | 40000 | - | - | V100 | 77.14 | 79.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet/ocrnet_hr48_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_40k_voc12aug/ocrnet_hr48_512x512_40k_voc12aug_20200614_015958-255bc5ce.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_40k_voc12aug/ocrnet_hr48_512x512_40k_voc12aug_20200614_015958.log.json) | + ## Citation ```bibtex @@ -39,51 +87,3 @@ In this paper, we address the problem of semantic segmentation and focus on the year={2020} } ``` - -## Results and models - -### Cityscapes - -#### HRNet backbone - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | ------------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| OCRNet | HRNetV2p-W18-Small | 512x1024 | 40000 | 3.5 | 10.45 | 76.61 | 78.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024_20230227_145026-6c052a14.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024_20230227_145026.json) | -| OCRNet | HRNetV2p-W18 | 512x1024 | 40000 | 4.7 | 7.50 | 77.72 | 79.49 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes/ocrnet_hr18_512x1024_40k_cityscapes_20200601_033320-401c5bdd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes/ocrnet_hr18_512x1024_40k_cityscapes_20200601_033320.log.json) | -| OCRNet | HRNetV2p-W48 | 512x1024 | 40000 | 8 | 4.22 | 80.58 | 81.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr48_4xb2-40k_cityscapes-512x1024.pyy) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes/ocrnet_hr48_512x1024_40k_cityscapes_20200601_033336-55b32491.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes/ocrnet_hr48_512x1024_40k_cityscapes_20200601_033336.log.json) | -| OCRNet | HRNetV2p-W18-Small | 512x1024 | 80000 | - | - | 77.16 | 78.66 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18s_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes/ocrnet_hr18s_512x1024_80k_cityscapes_20200601_222735-55979e63.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes/ocrnet_hr18s_512x1024_80k_cityscapes_20200601_222735.log.json) | -| OCRNet | HRNetV2p-W18 | 512x1024 | 80000 | - | - | 78.57 | 80.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes/ocrnet_hr18_512x1024_80k_cityscapes_20200614_230521-c2e1dd4a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes/ocrnet_hr18_512x1024_80k_cityscapes_20200614_230521.log.json) | -| OCRNet | HRNetV2p-W48 | 512x1024 | 80000 | - | - | 80.70 | 81.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr48_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes/ocrnet_hr48_512x1024_80k_cityscapes_20200601_222752-9076bcdf.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes/ocrnet_hr48_512x1024_80k_cityscapes_20200601_222752.log.json) | -| OCRNet | HRNetV2p-W18-Small | 512x1024 | 160000 | - | - | 78.45 | 79.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18s_4xb2-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes/ocrnet_hr18s_512x1024_160k_cityscapes_20200602_191005-f4a7af28.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes/ocrnet_hr18s_512x1024_160k_cityscapes_20200602_191005.log.json) | -| OCRNet | HRNetV2p-W18 | 512x1024 | 160000 | - | - | 79.47 | 80.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18_4xb2-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001-b9172d0c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001.log.json) | -| OCRNet | HRNetV2p-W48 | 512x1024 | 160000 | - | - | 81.35 | 82.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr48_4xb2-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037-dfbf1b0c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037.log.json) | - -#### ResNet backbone - -| Method | Backbone | Crop Size | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ---------- | ------- | -------- | -------------- | ----- | ------------: | --------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| OCRNet | R-101-D8 | 512x1024 | 8 | 40000 | - | - | 80.09 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes/ocrnet_r101-d8_512x1024_40k_b8_cityscapes_20200717_110721-02ac0f13.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes/ocrnet_r101-d8_512x1024_40k_b8_cityscapes_20200717_110721.log.json) | -| OCRNet | R-101-D8 | 512x1024 | 16 | 40000 | 8.8 | 3.02 | 80.30 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_r101-d8_8xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes/ocrnet_r101-d8_512x1024_40k_b16_cityscapes_20200723_193726-db500f80.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes/ocrnet_r101-d8_512x1024_40k_b16_cityscapes_20200723_193726.log.json) | -| OCRNet | R-101-D8 | 512x1024 | 16 | 80000 | 8.8 | 3.02 | 80.81 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_r101-d8_8xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes/ocrnet_r101-d8_512x1024_80k_b16_cityscapes_20200723_192421-78688424.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes/ocrnet_r101-d8_512x1024_80k_b16_cityscapes_20200723_192421.log.json) | - -### ADE20K - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | ------------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| OCRNet | HRNetV2p-W18-Small | 512x512 | 80000 | 6.7 | 28.98 | 35.06 | 35.80 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18s_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_80k_ade20k/ocrnet_hr18s_512x512_80k_ade20k_20200615_055600-e80b62af.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_80k_ade20k/ocrnet_hr18s_512x512_80k_ade20k_20200615_055600.log.json) | -| OCRNet | HRNetV2p-W18 | 512x512 | 80000 | 7.9 | 18.93 | 37.79 | 39.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_80k_ade20k/ocrnet_hr18_512x512_80k_ade20k_20200615_053157-d173d83b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_80k_ade20k/ocrnet_hr18_512x512_80k_ade20k_20200615_053157.log.json) | -| OCRNet | HRNetV2p-W48 | 512x512 | 80000 | 11.2 | 16.99 | 43.00 | 44.30 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr48_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_80k_ade20k/ocrnet_hr48_512x512_80k_ade20k_20200615_021518-d168c2d1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_80k_ade20k/ocrnet_hr48_512x512_80k_ade20k_20200615_021518.log.json) | -| OCRNet | HRNetV2p-W18-Small | 512x512 | 160000 | - | - | 37.19 | 38.40 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18s_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_160k_ade20k/ocrnet_hr18s_512x512_160k_ade20k_20200615_184505-8e913058.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_160k_ade20k/ocrnet_hr18s_512x512_160k_ade20k_20200615_184505.log.json) | -| OCRNet | HRNetV2p-W18 | 512x512 | 160000 | - | - | 39.32 | 40.80 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_160k_ade20k/ocrnet_hr18_512x512_160k_ade20k_20200615_200940-d8fcd9d1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_160k_ade20k/ocrnet_hr18_512x512_160k_ade20k_20200615_200940.log.json) | -| OCRNet | HRNetV2p-W48 | 512x512 | 160000 | - | - | 43.25 | 44.88 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr48_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_160k_ade20k/ocrnet_hr48_512x512_160k_ade20k_20200615_184705-a073726d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_160k_ade20k/ocrnet_hr48_512x512_160k_ade20k_20200615_184705.log.json) | - -### Pascal VOC 2012 + Aug - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | ------------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| OCRNet | HRNetV2p-W18-Small | 512x512 | 20000 | 3.5 | 31.55 | 71.70 | 73.84 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18s_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug/ocrnet_hr18s_512x512_20k_voc12aug_20200617_233913-02b04fcb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug/ocrnet_hr18s_512x512_20k_voc12aug_20200617_233913.log.json) | -| OCRNet | HRNetV2p-W18 | 512x512 | 20000 | 4.7 | 19.91 | 74.75 | 77.11 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_20k_voc12aug/ocrnet_hr18_512x512_20k_voc12aug_20200617_233932-8954cbb7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_20k_voc12aug/ocrnet_hr18_512x512_20k_voc12aug_20200617_233932.log.json) | -| OCRNet | HRNetV2p-W48 | 512x512 | 20000 | 8.1 | 17.83 | 77.72 | 79.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr48_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_20k_voc12aug/ocrnet_hr48_512x512_20k_voc12aug_20200617_233932-9e82080a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_20k_voc12aug/ocrnet_hr48_512x512_20k_voc12aug_20200617_233932.log.json) | -| OCRNet | HRNetV2p-W18-Small | 512x512 | 40000 | - | - | 72.76 | 74.60 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18s_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug/ocrnet_hr18s_512x512_40k_voc12aug_20200614_002025-42b587ac.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug/ocrnet_hr18s_512x512_40k_voc12aug_20200614_002025.log.json) | -| OCRNet | HRNetV2p-W18 | 512x512 | 40000 | - | - | 74.98 | 77.40 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_40k_voc12aug/ocrnet_hr18_512x512_40k_voc12aug_20200614_015958-714302be.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_40k_voc12aug/ocrnet_hr18_512x512_40k_voc12aug_20200614_015958.log.json) | -| OCRNet | HRNetV2p-W48 | 512x512 | 40000 | - | - | 77.14 | 79.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr48_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_40k_voc12aug/ocrnet_hr48_512x512_40k_voc12aug_20200614_015958-255bc5ce.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_40k_voc12aug/ocrnet_hr48_512x512_40k_voc12aug_20200614_015958.log.json) | diff --git a/configs/ocrnet/metafile.yaml b/configs/ocrnet/metafile.yaml new file mode 100644 index 0000000000..5467feb975 --- /dev/null +++ b/configs/ocrnet/metafile.yaml @@ -0,0 +1,577 @@ +Collections: +- Name: OCRNet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - '# HRNet backbone' + - '# ResNet backbone' + - ADE20K + - Pascal VOC 2012 + Aug + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + README: configs/ocrnet/README.md + Frameworks: + - PyTorch +Models: +- Name: ocrnet_hr18s_4xb2-40k_cityscapes-512x1024 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: '# HRNet backbone' + Metrics: + mIoU: 76.61 + mIoU(ms+flip): 78.01 + Config: configs/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: '# HRNet backbone' + Batch Size: 8 + Architecture: + - HRNetV2p-W18-Small + - OCRNet + Training Resources: 4x A100 GPUS + Memory (GB): 3.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024_20230227_145026-6c052a14.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024_20230227_145026.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr18_4xb2-40k_cityscapes-512x1024 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: '# HRNet backbone' + Metrics: + mIoU: 77.72 + mIoU(ms+flip): 79.49 + Config: configs/ocrnet/ocrnet_hr18_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: '# HRNet backbone' + Batch Size: 8 + Architecture: + - HRNetV2p-W18 + - OCRNet + Training Resources: 4x V100 GPUS + Memory (GB): 4.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes/ocrnet_hr18_512x1024_40k_cityscapes_20200601_033320-401c5bdd.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes/ocrnet_hr18_512x1024_40k_cityscapes_20200601_033320.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr48_4xb2-40k_cityscapes-512x1024 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: '# HRNet backbone' + Metrics: + mIoU: 80.58 + mIoU(ms+flip): 81.79 + Config: configs/ocrnet/ocrnet_hr48_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: '# HRNet backbone' + Batch Size: 8 + Architecture: + - HRNetV2p-W48 + - OCRNet + Training Resources: 4x V100 GPUS + Memory (GB): 8.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes/ocrnet_hr48_512x1024_40k_cityscapes_20200601_033336-55b32491.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes/ocrnet_hr48_512x1024_40k_cityscapes_20200601_033336.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr18s_4xb2-80k_cityscapes-512x1024 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: '# HRNet backbone' + Metrics: + mIoU: 77.16 + mIoU(ms+flip): 78.66 + Config: configs/ocrnet/ocrnet_hr18s_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: '# HRNet backbone' + Batch Size: 8 + Architecture: + - HRNetV2p-W18-Small + - OCRNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes/ocrnet_hr18s_512x1024_80k_cityscapes_20200601_222735-55979e63.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes/ocrnet_hr18s_512x1024_80k_cityscapes_20200601_222735.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr18_4xb2-80k_cityscapes-512x1024 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: '# HRNet backbone' + Metrics: + mIoU: 78.57 + mIoU(ms+flip): 80.46 + Config: configs/ocrnet/ocrnet_hr18_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: '# HRNet backbone' + Batch Size: 8 + Architecture: + - HRNetV2p-W18 + - OCRNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes/ocrnet_hr18_512x1024_80k_cityscapes_20200614_230521-c2e1dd4a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes/ocrnet_hr18_512x1024_80k_cityscapes_20200614_230521.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr48_4xb2-80k_cityscapes-512x1024 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: '# HRNet backbone' + Metrics: + mIoU: 80.7 + mIoU(ms+flip): 81.87 + Config: configs/ocrnet/ocrnet_hr48_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: '# HRNet backbone' + Batch Size: 8 + Architecture: + - HRNetV2p-W48 + - OCRNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes/ocrnet_hr48_512x1024_80k_cityscapes_20200601_222752-9076bcdf.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes/ocrnet_hr48_512x1024_80k_cityscapes_20200601_222752.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr18s_4xb2-160k_cityscapes-512x1024 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: '# HRNet backbone' + Metrics: + mIoU: 78.45 + mIoU(ms+flip): 79.97 + Config: configs/ocrnet/ocrnet_hr18s_4xb2-160k_cityscapes-512x1024.py + Metadata: + Training Data: '# HRNet backbone' + Batch Size: 8 + Architecture: + - HRNetV2p-W18-Small + - OCRNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes/ocrnet_hr18s_512x1024_160k_cityscapes_20200602_191005-f4a7af28.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes/ocrnet_hr18s_512x1024_160k_cityscapes_20200602_191005.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr18_4xb2-160k_cityscapes-512x1024 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: '# HRNet backbone' + Metrics: + mIoU: 79.47 + mIoU(ms+flip): 80.91 + Config: configs/ocrnet/ocrnet_hr18_4xb2-160k_cityscapes-512x1024.py + Metadata: + Training Data: '# HRNet backbone' + Batch Size: 8 + Architecture: + - HRNetV2p-W18 + - OCRNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001-b9172d0c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr48_4xb2-160k_cityscapes-512x1024 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: '# HRNet backbone' + Metrics: + mIoU: 81.35 + mIoU(ms+flip): 82.7 + Config: configs/ocrnet/ocrnet_hr48_4xb2-160k_cityscapes-512x1024.py + Metadata: + Training Data: '# HRNet backbone' + Batch Size: 8 + Architecture: + - HRNetV2p-W48 + - OCRNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037-dfbf1b0c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_r101-d8_4xb2-40k_cityscapes-512x1024 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: '# ResNet backbone' + Metrics: + mIoU: 80.09 + Config: configs/ocrnet/ocrnet_r101-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: '# ResNet backbone' + Batch Size: 8 + Architecture: + - R-101-D8 + - OCRNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes/ocrnet_r101-d8_512x1024_40k_b8_cityscapes_20200717_110721-02ac0f13.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes/ocrnet_r101-d8_512x1024_40k_b8_cityscapes_20200717_110721.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_r101-d8_8xb2-40k_cityscapes-512x1024 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: '# ResNet backbone' + Metrics: + mIoU: 80.3 + Config: configs/ocrnet/ocrnet_r101-d8_8xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: '# ResNet backbone' + Batch Size: 16 + Architecture: + - R-101-D8 + - OCRNet + Training Resources: 8x V100 GPUS + Memory (GB): 8.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes/ocrnet_r101-d8_512x1024_40k_b16_cityscapes_20200723_193726-db500f80.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes/ocrnet_r101-d8_512x1024_40k_b16_cityscapes_20200723_193726.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_r101-d8_8xb2-80k_cityscapes-512x1024 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: '# ResNet backbone' + Metrics: + mIoU: 80.81 + Config: configs/ocrnet/ocrnet_r101-d8_8xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: '# ResNet backbone' + Batch Size: 16 + Architecture: + - R-101-D8 + - OCRNet + Training Resources: 8x V100 GPUS + Memory (GB): 8.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes/ocrnet_r101-d8_512x1024_80k_b16_cityscapes_20200723_192421-78688424.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes/ocrnet_r101-d8_512x1024_80k_b16_cityscapes_20200723_192421.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr18s_4xb4-80k_ade20k-512x512 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 35.06 + mIoU(ms+flip): 35.8 + Config: configs/ocrnet/ocrnet_hr18s_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - HRNetV2p-W18-Small + - OCRNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_80k_ade20k/ocrnet_hr18s_512x512_80k_ade20k_20200615_055600-e80b62af.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_80k_ade20k/ocrnet_hr18s_512x512_80k_ade20k_20200615_055600.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr18_4xb4-80k_ade20k-512x512 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 37.79 + mIoU(ms+flip): 39.16 + Config: configs/ocrnet/ocrnet_hr18_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - HRNetV2p-W18 + - OCRNet + Training Resources: 4x V100 GPUS + Memory (GB): 7.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_80k_ade20k/ocrnet_hr18_512x512_80k_ade20k_20200615_053157-d173d83b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_80k_ade20k/ocrnet_hr18_512x512_80k_ade20k_20200615_053157.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr48_4xb4-80k_ade20k-512x512 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.0 + mIoU(ms+flip): 44.3 + Config: configs/ocrnet/ocrnet_hr48_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - OCRNet + Training Resources: 4x V100 GPUS + Memory (GB): 11.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_80k_ade20k/ocrnet_hr48_512x512_80k_ade20k_20200615_021518-d168c2d1.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_80k_ade20k/ocrnet_hr48_512x512_80k_ade20k_20200615_021518.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr18s_4xb4-80k_ade20k-512x512 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 37.19 + mIoU(ms+flip): 38.4 + Config: configs/ocrnet/ocrnet_hr18s_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - HRNetV2p-W18-Small + - OCRNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_160k_ade20k/ocrnet_hr18s_512x512_160k_ade20k_20200615_184505-8e913058.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_160k_ade20k/ocrnet_hr18s_512x512_160k_ade20k_20200615_184505.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr18_4xb4-80k_ade20k-512x512 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 39.32 + mIoU(ms+flip): 40.8 + Config: configs/ocrnet/ocrnet_hr18_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - HRNetV2p-W18 + - OCRNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_160k_ade20k/ocrnet_hr18_512x512_160k_ade20k_20200615_200940-d8fcd9d1.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_160k_ade20k/ocrnet_hr18_512x512_160k_ade20k_20200615_200940.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr48_4xb4-160k_ade20k-512x512 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.25 + mIoU(ms+flip): 44.88 + Config: configs/ocrnet/ocrnet_hr48_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - OCRNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_160k_ade20k/ocrnet_hr48_512x512_160k_ade20k_20200615_184705-a073726d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_160k_ade20k/ocrnet_hr48_512x512_160k_ade20k_20200615_184705.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr18s_4xb4-20k_voc12aug-512x512 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 71.7 + mIoU(ms+flip): 73.84 + Config: configs/ocrnet/ocrnet_hr18s_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - HRNetV2p-W18-Small + - OCRNet + Training Resources: 4x V100 GPUS + Memory (GB): 3.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug/ocrnet_hr18s_512x512_20k_voc12aug_20200617_233913-02b04fcb.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug/ocrnet_hr18s_512x512_20k_voc12aug_20200617_233913.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr18_4xb4-20k_voc12aug-512x512 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 74.75 + mIoU(ms+flip): 77.11 + Config: configs/ocrnet/ocrnet_hr18_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - HRNetV2p-W18 + - OCRNet + Training Resources: 4x V100 GPUS + Memory (GB): 4.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_20k_voc12aug/ocrnet_hr18_512x512_20k_voc12aug_20200617_233932-8954cbb7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_20k_voc12aug/ocrnet_hr18_512x512_20k_voc12aug_20200617_233932.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr48_4xb4-20k_voc12aug-512x512 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.72 + mIoU(ms+flip): 79.87 + Config: configs/ocrnet/ocrnet_hr48_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - OCRNet + Training Resources: 4x V100 GPUS + Memory (GB): 8.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_20k_voc12aug/ocrnet_hr48_512x512_20k_voc12aug_20200617_233932-9e82080a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_20k_voc12aug/ocrnet_hr48_512x512_20k_voc12aug_20200617_233932.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr18s_4xb4-40k_voc12aug-512x512 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 72.76 + mIoU(ms+flip): 74.6 + Config: configs/ocrnet/ocrnet_hr18s_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - HRNetV2p-W18-Small + - OCRNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug/ocrnet_hr18s_512x512_40k_voc12aug_20200614_002025-42b587ac.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug/ocrnet_hr18s_512x512_40k_voc12aug_20200614_002025.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr18_4xb4-40k_voc12aug-512x512 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 74.98 + mIoU(ms+flip): 77.4 + Config: configs/ocrnet/ocrnet_hr18_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - HRNetV2p-W18 + - OCRNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_40k_voc12aug/ocrnet_hr18_512x512_40k_voc12aug_20200614_015958-714302be.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_40k_voc12aug/ocrnet_hr18_512x512_40k_voc12aug_20200614_015958.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch +- Name: ocrnet_hr48_4xb4-40k_voc12aug-512x512 + In Collection: OCRNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.14 + mIoU(ms+flip): 79.71 + Config: configs/ocrnet/ocrnet_hr48_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - HRNetV2p-W48 + - OCRNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_40k_voc12aug/ocrnet_hr48_512x512_40k_voc12aug_20200614_015958-255bc5ce.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_40k_voc12aug/ocrnet_hr48_512x512_40k_voc12aug_20200614_015958.log.json + Paper: + Title: Object-Contextual Representations for Semantic Segmentation + URL: https://arxiv.org/abs/1909.11065 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Framework: PyTorch diff --git a/configs/ocrnet/ocrnet.yml b/configs/ocrnet/ocrnet.yml deleted file mode 100644 index 20002e8864..0000000000 --- a/configs/ocrnet/ocrnet.yml +++ /dev/null @@ -1,438 +0,0 @@ -Collections: -- Name: OCRNet - Metadata: - Training Data: - - Cityscapes - - ADE20K - - Pascal VOC 2012 + Aug - Paper: - URL: https://arxiv.org/abs/1909.11065 - Title: Object-Contextual Representations for Semantic Segmentation - README: configs/ocrnet/README.md - Code: - URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 - Version: v0.17.0 - Converted From: - Code: https://github.com/openseg-group/OCNet.pytorch -Models: -- Name: ocrnet_hr18s_4xb2-40k_cityscapes-512x1024 - In Collection: OCRNet - Metadata: - backbone: HRNetV2p-W18-Small - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 95.69 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 3.5 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 76.61 - mIoU(ms+flip): 78.01 - Config: configs/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024_20230227_145026-6c052a14.pth -- Name: ocrnet_hr18_4xb2-40k_cityscapes-512x1024 - In Collection: OCRNet - Metadata: - backbone: HRNetV2p-W18 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 133.33 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 4.7 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 77.72 - mIoU(ms+flip): 79.49 - Config: configs/ocrnet/ocrnet_hr18_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes/ocrnet_hr18_512x1024_40k_cityscapes_20200601_033320-401c5bdd.pth -- Name: ocrnet_hr48_4xb2-40k_cityscapes-512x1024 - In Collection: OCRNet - Metadata: - backbone: HRNetV2p-W48 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 236.97 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 8.0 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 80.58 - mIoU(ms+flip): 81.79 - Config: configs/ocrnet/ocrnet_hr48_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes/ocrnet_hr48_512x1024_40k_cityscapes_20200601_033336-55b32491.pth -- Name: ocrnet_hr18s_4xb2-80k_cityscapes-512x1024 - In Collection: OCRNet - Metadata: - backbone: HRNetV2p-W18-Small - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 77.16 - mIoU(ms+flip): 78.66 - Config: configs/ocrnet/ocrnet_hr18s_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes/ocrnet_hr18s_512x1024_80k_cityscapes_20200601_222735-55979e63.pth -- Name: ocrnet_hr18_4xb2-80k_cityscapes-512x1024 - In Collection: OCRNet - Metadata: - backbone: HRNetV2p-W18 - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.57 - mIoU(ms+flip): 80.46 - Config: configs/ocrnet/ocrnet_hr18_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes/ocrnet_hr18_512x1024_80k_cityscapes_20200614_230521-c2e1dd4a.pth -- Name: ocrnet_hr48_4xb2-80k_cityscapes-512x1024 - In Collection: OCRNet - Metadata: - backbone: HRNetV2p-W48 - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 80.7 - mIoU(ms+flip): 81.87 - Config: configs/ocrnet/ocrnet_hr48_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes/ocrnet_hr48_512x1024_80k_cityscapes_20200601_222752-9076bcdf.pth -- Name: ocrnet_hr18s_4xb2-160k_cityscapes-512x1024 - In Collection: OCRNet - Metadata: - backbone: HRNetV2p-W18-Small - crop size: (512,1024) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.45 - mIoU(ms+flip): 79.97 - Config: configs/ocrnet/ocrnet_hr18s_4xb2-160k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes/ocrnet_hr18s_512x1024_160k_cityscapes_20200602_191005-f4a7af28.pth -- Name: ocrnet_hr18_4xb2-160k_cityscapes-512x1024 - In Collection: OCRNet - Metadata: - backbone: HRNetV2p-W18 - crop size: (512,1024) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.47 - mIoU(ms+flip): 80.91 - Config: configs/ocrnet/ocrnet_hr18_4xb2-160k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001-b9172d0c.pth -- Name: ocrnet_hr48_4xb2-160k_cityscapes-512x1024 - In Collection: OCRNet - Metadata: - backbone: HRNetV2p-W48 - crop size: (512,1024) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 81.35 - mIoU(ms+flip): 82.7 - Config: configs/ocrnet/ocrnet_hr48_4xb2-160k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037-dfbf1b0c.pth -- Name: ocrnet_r101-d8_4xb2-40k_cityscapes-512x1024 - In Collection: OCRNet - Metadata: - backbone: R-101-D8 - crop size: (512,1024) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 80.09 - Config: configs/ocrnet/ocrnet_r101-d8_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes/ocrnet_r101-d8_512x1024_40k_b8_cityscapes_20200717_110721-02ac0f13.pth -- Name: ocrnet_r101-d8_8xb2-40k_cityscapes-512x1024 - In Collection: OCRNet - Metadata: - backbone: R-101-D8 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 331.13 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 8.8 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 80.3 - Config: configs/ocrnet/ocrnet_r101-d8_8xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes/ocrnet_r101-d8_512x1024_40k_b16_cityscapes_20200723_193726-db500f80.pth -- Name: ocrnet_r101-d8_8xb2-80k_cityscapes-512x1024 - In Collection: OCRNet - Metadata: - backbone: R-101-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 331.13 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 8.8 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 80.81 - Config: configs/ocrnet/ocrnet_r101-d8_8xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes/ocrnet_r101-d8_512x1024_80k_b16_cityscapes_20200723_192421-78688424.pth -- Name: ocrnet_hr18s_4xb4-80k_ade20k-512x512 - In Collection: OCRNet - Metadata: - backbone: HRNetV2p-W18-Small - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 34.51 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 6.7 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 35.06 - mIoU(ms+flip): 35.8 - Config: configs/ocrnet/ocrnet_hr18s_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_80k_ade20k/ocrnet_hr18s_512x512_80k_ade20k_20200615_055600-e80b62af.pth -- Name: ocrnet_hr18_4xb4-80k_ade20k-512x512 - In Collection: OCRNet - Metadata: - backbone: HRNetV2p-W18 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 52.83 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 7.9 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 37.79 - mIoU(ms+flip): 39.16 - Config: configs/ocrnet/ocrnet_hr18_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_80k_ade20k/ocrnet_hr18_512x512_80k_ade20k_20200615_053157-d173d83b.pth -- Name: ocrnet_hr48_4xb4-80k_ade20k-512x512 - In Collection: OCRNet - Metadata: - backbone: HRNetV2p-W48 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 58.86 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 11.2 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 43.0 - mIoU(ms+flip): 44.3 - Config: configs/ocrnet/ocrnet_hr48_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_80k_ade20k/ocrnet_hr48_512x512_80k_ade20k_20200615_021518-d168c2d1.pth -- Name: ocrnet_hr18s_4xb4-80k_ade20k-512x512 - In Collection: OCRNet - Metadata: - backbone: HRNetV2p-W18-Small - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 37.19 - mIoU(ms+flip): 38.4 - Config: configs/ocrnet/ocrnet_hr18s_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_160k_ade20k/ocrnet_hr18s_512x512_160k_ade20k_20200615_184505-8e913058.pth -- Name: ocrnet_hr18_4xb4-80k_ade20k-512x512 - In Collection: OCRNet - Metadata: - backbone: HRNetV2p-W18 - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 39.32 - mIoU(ms+flip): 40.8 - Config: configs/ocrnet/ocrnet_hr18_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_160k_ade20k/ocrnet_hr18_512x512_160k_ade20k_20200615_200940-d8fcd9d1.pth -- Name: ocrnet_hr48_4xb4-160k_ade20k-512x512 - In Collection: OCRNet - Metadata: - backbone: HRNetV2p-W48 - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 43.25 - mIoU(ms+flip): 44.88 - Config: configs/ocrnet/ocrnet_hr48_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_160k_ade20k/ocrnet_hr48_512x512_160k_ade20k_20200615_184705-a073726d.pth -- Name: ocrnet_hr18s_4xb4-20k_voc12aug-512x512 - In Collection: OCRNet - Metadata: - backbone: HRNetV2p-W18-Small - crop size: (512,512) - lr schd: 20000 - inference time (ms/im): - - value: 31.7 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 3.5 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 71.7 - mIoU(ms+flip): 73.84 - Config: configs/ocrnet/ocrnet_hr18s_4xb4-20k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug/ocrnet_hr18s_512x512_20k_voc12aug_20200617_233913-02b04fcb.pth -- Name: ocrnet_hr18_4xb4-20k_voc12aug-512x512 - In Collection: OCRNet - Metadata: - backbone: HRNetV2p-W18 - crop size: (512,512) - lr schd: 20000 - inference time (ms/im): - - value: 50.23 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 4.7 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 74.75 - mIoU(ms+flip): 77.11 - Config: configs/ocrnet/ocrnet_hr18_4xb4-20k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_20k_voc12aug/ocrnet_hr18_512x512_20k_voc12aug_20200617_233932-8954cbb7.pth -- Name: ocrnet_hr48_4xb4-20k_voc12aug-512x512 - In Collection: OCRNet - Metadata: - backbone: HRNetV2p-W48 - crop size: (512,512) - lr schd: 20000 - inference time (ms/im): - - value: 56.09 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 8.1 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 77.72 - mIoU(ms+flip): 79.87 - Config: configs/ocrnet/ocrnet_hr48_4xb4-20k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_20k_voc12aug/ocrnet_hr48_512x512_20k_voc12aug_20200617_233932-9e82080a.pth -- Name: ocrnet_hr18s_4xb4-40k_voc12aug-512x512 - In Collection: OCRNet - Metadata: - backbone: HRNetV2p-W18-Small - crop size: (512,512) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 72.76 - mIoU(ms+flip): 74.6 - Config: configs/ocrnet/ocrnet_hr18s_4xb4-40k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug/ocrnet_hr18s_512x512_40k_voc12aug_20200614_002025-42b587ac.pth -- Name: ocrnet_hr18_4xb4-40k_voc12aug-512x512 - In Collection: OCRNet - Metadata: - backbone: HRNetV2p-W18 - crop size: (512,512) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 74.98 - mIoU(ms+flip): 77.4 - Config: configs/ocrnet/ocrnet_hr18_4xb4-40k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_40k_voc12aug/ocrnet_hr18_512x512_40k_voc12aug_20200614_015958-714302be.pth -- Name: ocrnet_hr48_4xb4-40k_voc12aug-512x512 - In Collection: OCRNet - Metadata: - backbone: HRNetV2p-W48 - crop size: (512,512) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 77.14 - mIoU(ms+flip): 79.71 - Config: configs/ocrnet/ocrnet_hr48_4xb4-40k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_40k_voc12aug/ocrnet_hr48_512x512_40k_voc12aug_20200614_015958-255bc5ce.pth diff --git a/configs/pidnet/README.md b/configs/pidnet/README.md new file mode 100644 index 0000000000..e23efbd3f3 --- /dev/null +++ b/configs/pidnet/README.md @@ -0,0 +1,50 @@ +# PIDNet + +> [PIDNet: A Real-time Semantic Segmentation Network Inspired from PID Controller](https://arxiv.org/pdf/2206.02066.pdf) + +## Introduction + + + +
Official Repo + +Code Snippet + +## Abstract + + + +Two-branch network architecture has shown its efficiency and effectiveness for real-time semantic segmentation tasks. However, direct fusion of low-level details and high-level semantics will lead to a phenomenon that the detailed features are easily overwhelmed by surrounding contextual information, namely overshoot in this paper, which limits the improvement of the accuracy of existed two-branch models. In this paper, we bridge a connection between Convolutional Neural Network (CNN) and Proportional-IntegralDerivative (PID) controller and reveal that the two-branch network is nothing but a Proportional-Integral (PI) controller, which inherently suffers from the similar overshoot issue. To alleviate this issue, we propose a novel threebranch network architecture: PIDNet, which possesses three branches to parse the detailed, context and boundary information (derivative of semantics), respectively, and employs boundary attention to guide the fusion of detailed and context branches in final stage. The family of PIDNets achieve the best trade-off between inference speed and accuracy and their test accuracy surpasses all the existed models with similar inference speed on Cityscapes, CamVid and COCO-Stuff datasets. Especially, PIDNet-S achieves 78.6% mIOU with inference speed of 93.2 FPS on Cityscapes test set and 80.1% mIOU with speed of 153.7 FPS on CamVid test set. + + + +
+ +
+ +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------- | -------- | -------------- | ------ | ----- | ------------- | -------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PIDNet | PIDNet-S | 1024x1024 | 120000 | 3.38 | 80.82 | A100 | 78.74 | 80.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pidnet/pidnet-s_2xb6-120k_1024x1024-cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pidnet/pidnet-s_2xb6-120k_1024x1024-cityscapes/pidnet-s_2xb6-120k_1024x1024-cityscapes_20230302_191700-bb8e3bcc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pidnet/pidnet-s_2xb6-120k_1024x1024-cityscapes/pidnet-s_2xb6-120k_1024x1024-cityscapes_20230302_191700.json) | +| PIDNet | PIDNet-M | 1024x1024 | 120000 | 5.14 | 71.98 | A100 | 80.22 | 82.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pidnet/pidnet-m_2xb6-120k_1024x1024-cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pidnet/pidnet-m_2xb6-120k_1024x1024-cityscapes/pidnet-m_2xb6-120k_1024x1024-cityscapes_20230301_143452-f9bcdbf3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pidnet/pidnet-m_2xb6-120k_1024x1024-cityscapes/pidnet-m_2xb6-120k_1024x1024-cityscapes_20230301_143452.json) | +| PIDNet | PIDNet-L | 1024x1024 | 120000 | 5.83 | 60.06 | A100 | 80.89 | 82.37 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pidnet/pidnet-l_2xb6-120k_1024x1024-cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pidnet/pidnet-l_2xb6-120k_1024x1024-cityscapes/pidnet-l_2xb6-120k_1024x1024-cityscapes_20230303_114514-0783ca6b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pidnet/pidnet-l_2xb6-120k_1024x1024-cityscapes/pidnet-l_2xb6-120k_1024x1024-cityscapes_20230303_114514.json) | + +## Notes + +The pretrained weights in config files are converted from [the official repo](https://github.com/XuJiacong/PIDNet#models). + +## Citation + +```bibtex +@misc{xu2022pidnet, + title={PIDNet: A Real-time Semantic Segmentation Network Inspired from PID Controller}, + author={Jiacong Xu and Zixiang Xiong and Shankar P. Bhattacharyya}, + year={2022}, + eprint={2206.02066}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` diff --git a/configs/pidnet/metafile.yaml b/configs/pidnet/metafile.yaml new file mode 100644 index 0000000000..51b514a487 --- /dev/null +++ b/configs/pidnet/metafile.yaml @@ -0,0 +1,85 @@ +Collections: +- Name: PIDNet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + Paper: + Title: 'PIDNet: A Real-time Semantic Segmentation Network Inspired from PID Controller' + URL: https://arxiv.org/pdf/2206.02066.pdf + README: configs/pidnet/README.md + Frameworks: + - PyTorch +Models: +- Name: pidnet-s_2xb6-120k_1024x1024-cityscapes + In Collection: PIDNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.74 + mIoU(ms+flip): 80.87 + Config: configs/pidnet/pidnet-s_2xb6-120k_1024x1024-cityscapes.py + Metadata: + Training Data: Cityscapes + Batch Size: 12 + Architecture: + - PIDNet-S + - PIDNet + Training Resources: 2x A100 GPUS + Memory (GB): 3.38 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pidnet/pidnet-s_2xb6-120k_1024x1024-cityscapes/pidnet-s_2xb6-120k_1024x1024-cityscapes_20230302_191700-bb8e3bcc.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pidnet/pidnet-s_2xb6-120k_1024x1024-cityscapes/pidnet-s_2xb6-120k_1024x1024-cityscapes_20230302_191700.json + Paper: + Title: 'PIDNet: A Real-time Semantic Segmentation Network Inspired from PID Controller' + URL: https://arxiv.org/pdf/2206.02066.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/models/backbones/pidnet.py + Framework: PyTorch +- Name: pidnet-m_2xb6-120k_1024x1024-cityscapes + In Collection: PIDNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.22 + mIoU(ms+flip): 82.05 + Config: configs/pidnet/pidnet-m_2xb6-120k_1024x1024-cityscapes.py + Metadata: + Training Data: Cityscapes + Batch Size: 12 + Architecture: + - PIDNet-M + - PIDNet + Training Resources: 2x A100 GPUS + Memory (GB): 5.14 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pidnet/pidnet-m_2xb6-120k_1024x1024-cityscapes/pidnet-m_2xb6-120k_1024x1024-cityscapes_20230301_143452-f9bcdbf3.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pidnet/pidnet-m_2xb6-120k_1024x1024-cityscapes/pidnet-m_2xb6-120k_1024x1024-cityscapes_20230301_143452.json + Paper: + Title: 'PIDNet: A Real-time Semantic Segmentation Network Inspired from PID Controller' + URL: https://arxiv.org/pdf/2206.02066.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/models/backbones/pidnet.py + Framework: PyTorch +- Name: pidnet-l_2xb6-120k_1024x1024-cityscapes + In Collection: PIDNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.89 + mIoU(ms+flip): 82.37 + Config: configs/pidnet/pidnet-l_2xb6-120k_1024x1024-cityscapes.py + Metadata: + Training Data: Cityscapes + Batch Size: 12 + Architecture: + - PIDNet-L + - PIDNet + Training Resources: 2x A100 GPUS + Memory (GB): 5.83 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pidnet/pidnet-l_2xb6-120k_1024x1024-cityscapes/pidnet-l_2xb6-120k_1024x1024-cityscapes_20230303_114514-0783ca6b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pidnet/pidnet-l_2xb6-120k_1024x1024-cityscapes/pidnet-l_2xb6-120k_1024x1024-cityscapes_20230303_114514.json + Paper: + Title: 'PIDNet: A Real-time Semantic Segmentation Network Inspired from PID Controller' + URL: https://arxiv.org/pdf/2206.02066.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/models/backbones/pidnet.py + Framework: PyTorch diff --git a/configs/pidnet/pidnet-l_2xb6-120k_1024x1024-cityscapes.py b/configs/pidnet/pidnet-l_2xb6-120k_1024x1024-cityscapes.py new file mode 100644 index 0000000000..1955c91e05 --- /dev/null +++ b/configs/pidnet/pidnet-l_2xb6-120k_1024x1024-cityscapes.py @@ -0,0 +1,10 @@ +_base_ = './pidnet-s_2xb6-120k_1024x1024-cityscapes.py' +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/pidnet/pidnet-l_imagenet1k_20230306-67889109.pth' # noqa +model = dict( + backbone=dict( + channels=64, + ppm_channels=112, + num_stem_blocks=3, + num_branch_blocks=4, + init_cfg=dict(checkpoint=checkpoint_file)), + decode_head=dict(in_channels=256, channels=256)) diff --git a/configs/pidnet/pidnet-m_2xb6-120k_1024x1024-cityscapes.py b/configs/pidnet/pidnet-m_2xb6-120k_1024x1024-cityscapes.py new file mode 100644 index 0000000000..38a69c1c45 --- /dev/null +++ b/configs/pidnet/pidnet-m_2xb6-120k_1024x1024-cityscapes.py @@ -0,0 +1,5 @@ +_base_ = './pidnet-s_2xb6-120k_1024x1024-cityscapes.py' +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/pidnet/pidnet-m_imagenet1k_20230306-39893c52.pth' # noqa +model = dict( + backbone=dict(channels=64, init_cfg=dict(checkpoint=checkpoint_file)), + decode_head=dict(in_channels=256)) diff --git a/configs/pidnet/pidnet-s_2xb6-120k_1024x1024-cityscapes.py b/configs/pidnet/pidnet-s_2xb6-120k_1024x1024-cityscapes.py new file mode 100644 index 0000000000..f70ca4287a --- /dev/null +++ b/configs/pidnet/pidnet-s_2xb6-120k_1024x1024-cityscapes.py @@ -0,0 +1,113 @@ +_base_ = [ + '../_base_/datasets/cityscapes_1024x1024.py', + '../_base_/default_runtime.py' +] + +# The class_weight is borrowed from https://github.com/openseg-group/OCNet.pytorch/issues/14 # noqa +# Licensed under the MIT License +class_weight = [ + 0.8373, 0.918, 0.866, 1.0345, 1.0166, 0.9969, 0.9754, 1.0489, 0.8786, + 1.0023, 0.9539, 0.9843, 1.1116, 0.9037, 1.0865, 1.0955, 1.0865, 1.1529, + 1.0507 +] +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/pidnet/pidnet-s_imagenet1k_20230306-715e6273.pth' # noqa +crop_size = (1024, 1024) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255, + size=crop_size) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='PIDNet', + in_channels=3, + channels=32, + ppm_channels=96, + num_stem_blocks=2, + num_branch_blocks=3, + align_corners=False, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU', inplace=True), + init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file)), + decode_head=dict( + type='PIDHead', + in_channels=128, + channels=128, + num_classes=19, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU', inplace=True), + align_corners=True, + loss_decode=[ + dict( + type='CrossEntropyLoss', + use_sigmoid=False, + class_weight=class_weight, + loss_weight=0.4), + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=class_weight, + loss_weight=1.0), + dict(type='BoundaryLoss', loss_weight=20.0), + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=class_weight, + loss_weight=1.0) + ]), + train_cfg=dict(), + test_cfg=dict(mode='whole')) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + type='RandomResize', + scale=(2048, 1024), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='GenerateEdge', edge_width=4), + dict(type='PackSegInputs') +] +train_dataloader = dict(batch_size=6, dataset=dict(pipeline=train_pipeline)) + +iters = 120000 +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=0, + power=0.9, + begin=0, + end=iters, + by_epoch=False) +] +# training schedule for 120k +train_cfg = dict( + type='IterBasedTrainLoop', max_iters=iters, val_interval=iters // 10) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict( + type='CheckpointHook', by_epoch=False, interval=iters // 10), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) + +randomness = dict(seed=304) diff --git a/configs/point_rend/README.md b/configs/point_rend/README.md index 2690e7b9e6..487d3bcc7f 100644 --- a/configs/point_rend/README.md +++ b/configs/point_rend/README.md @@ -1,6 +1,6 @@ # PointRend -[PointRend: Image Segmentation as Rendering](https://arxiv.org/abs/1912.08193) +> [PointRend: Image Segmentation as Rendering](https://arxiv.org/abs/1912.08193) ## Introduction @@ -22,6 +22,22 @@ We present a new method for efficient high-quality image segmentation of objects +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------: | -------------- | ------ | ----: | ------------- | ---------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PointRend | R-50 | 512x1024 | 80000 | 3.1 | 8.48 | V100 | 76.47 | 78.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/point_rend/pointrend_r50_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x1024_80k_cityscapes/pointrend_r50_512x1024_80k_cityscapes_20200711_015821-bb1ff523.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x1024_80k_cityscapes/pointrend_r50_512x1024_80k_cityscapes-20200715_214714.log.json) | +| PointRend | R-101 | 512x1024 | 80000 | 4.2 | 7.00 | V100 | 78.30 | 79.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/point_rend/pointrend_r101_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x1024_80k_cityscapes/pointrend_r101_512x1024_80k_cityscapes_20200711_170850-d0ca84be.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x1024_80k_cityscapes/pointrend_r101_512x1024_80k_cityscapes-20200715_214824.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------: | -------------- | ------ | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| PointRend | R-50 | 512x512 | 160000 | 5.1 | 17.31 | V100 | 37.64 | 39.17 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/point_rend/pointrend_r50_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x512_160k_ade20k/pointrend_r50_512x512_160k_ade20k_20200807_232644-ac3febf2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x512_160k_ade20k/pointrend_r50_512x512_160k_ade20k-20200807_232644.log.json) | +| PointRend | R-101 | 512x512 | 160000 | 6.1 | 15.50 | V100 | 40.02 | 41.60 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/point_rend/pointrend_r101_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x512_160k_ade20k/pointrend_r101_512x512_160k_ade20k_20200808_030852-8834902a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x512_160k_ade20k/pointrend_r101_512x512_160k_ade20k-20200808_030852.log.json) | + ## Citation ```bibtex @@ -33,19 +49,3 @@ We present a new method for efficient high-quality image segmentation of objects year={2020} } ``` - -## Results and models - -### Cityscapes - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| --------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| PointRend | R-50 | 512x1024 | 80000 | 3.1 | 8.48 | 76.47 | 78.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/point_rend/pointrend_r50_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x1024_80k_cityscapes/pointrend_r50_512x1024_80k_cityscapes_20200711_015821-bb1ff523.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x1024_80k_cityscapes/pointrend_r50_512x1024_80k_cityscapes-20200715_214714.log.json) | -| PointRend | R-101 | 512x1024 | 80000 | 4.2 | 7.00 | 78.30 | 79.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/point_rend/pointrend_r101_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x1024_80k_cityscapes/pointrend_r101_512x1024_80k_cityscapes_20200711_170850-d0ca84be.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x1024_80k_cityscapes/pointrend_r101_512x1024_80k_cityscapes-20200715_214824.log.json) | - -### ADE20K - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| --------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | --------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| PointRend | R-50 | 512x512 | 160000 | 5.1 | 17.31 | 37.64 | 39.17 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/point_rend/pointrend_r50_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x512_160k_ade20k/pointrend_r50_512x512_160k_ade20k_20200807_232644-ac3febf2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x512_160k_ade20k/pointrend_r50_512x512_160k_ade20k-20200807_232644.log.json) | -| PointRend | R-101 | 512x512 | 160000 | 6.1 | 15.50 | 40.02 | 41.60 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/point_rend/pointrend_r101_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x512_160k_ade20k/pointrend_r101_512x512_160k_ade20k_20200808_030852-8834902a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x512_160k_ade20k/pointrend_r101_512x512_160k_ade20k-20200808_030852.log.json) | diff --git a/configs/point_rend/metafile.yaml b/configs/point_rend/metafile.yaml new file mode 100644 index 0000000000..064717c9df --- /dev/null +++ b/configs/point_rend/metafile.yaml @@ -0,0 +1,110 @@ +Collections: +- Name: PointRend + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + Paper: + Title: 'PointRend: Image Segmentation as Rendering' + URL: https://arxiv.org/abs/1912.08193 + README: configs/point_rend/README.md + Frameworks: + - PyTorch +Models: +- Name: pointrend_r50_4xb2-80k_cityscapes-512x1024 + In Collection: PointRend + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.47 + mIoU(ms+flip): 78.13 + Config: configs/point_rend/pointrend_r50_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50 + - PointRend + Training Resources: 4x V100 GPUS + Memory (GB): 3.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x1024_80k_cityscapes/pointrend_r50_512x1024_80k_cityscapes_20200711_015821-bb1ff523.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x1024_80k_cityscapes/pointrend_r50_512x1024_80k_cityscapes-20200715_214714.log.json + Paper: + Title: 'PointRend: Image Segmentation as Rendering' + URL: https://arxiv.org/abs/1912.08193 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/point_head.py#L36 + Framework: PyTorch +- Name: pointrend_r101_4xb2-80k_cityscapes-512x1024 + In Collection: PointRend + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.3 + mIoU(ms+flip): 79.97 + Config: configs/point_rend/pointrend_r101_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101 + - PointRend + Training Resources: 4x V100 GPUS + Memory (GB): 4.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x1024_80k_cityscapes/pointrend_r101_512x1024_80k_cityscapes_20200711_170850-d0ca84be.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x1024_80k_cityscapes/pointrend_r101_512x1024_80k_cityscapes-20200715_214824.log.json + Paper: + Title: 'PointRend: Image Segmentation as Rendering' + URL: https://arxiv.org/abs/1912.08193 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/point_head.py#L36 + Framework: PyTorch +- Name: pointrend_r50_4xb4-160k_ade20k-512x512 + In Collection: PointRend + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 37.64 + mIoU(ms+flip): 39.17 + Config: configs/point_rend/pointrend_r50_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50 + - PointRend + Training Resources: 4x V100 GPUS + Memory (GB): 5.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x512_160k_ade20k/pointrend_r50_512x512_160k_ade20k_20200807_232644-ac3febf2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x512_160k_ade20k/pointrend_r50_512x512_160k_ade20k-20200807_232644.log.json + Paper: + Title: 'PointRend: Image Segmentation as Rendering' + URL: https://arxiv.org/abs/1912.08193 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/point_head.py#L36 + Framework: PyTorch +- Name: pointrend_r101_4xb4-160k_ade20k-512x512 + In Collection: PointRend + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 40.02 + mIoU(ms+flip): 41.6 + Config: configs/point_rend/pointrend_r101_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101 + - PointRend + Training Resources: 4x V100 GPUS + Memory (GB): 6.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x512_160k_ade20k/pointrend_r101_512x512_160k_ade20k_20200808_030852-8834902a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x512_160k_ade20k/pointrend_r101_512x512_160k_ade20k-20200808_030852.log.json + Paper: + Title: 'PointRend: Image Segmentation as Rendering' + URL: https://arxiv.org/abs/1912.08193 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/point_head.py#L36 + Framework: PyTorch diff --git a/configs/point_rend/point_rend.yml b/configs/point_rend/point_rend.yml deleted file mode 100644 index a4539081f3..0000000000 --- a/configs/point_rend/point_rend.yml +++ /dev/null @@ -1,104 +0,0 @@ -Collections: -- Name: PointRend - Metadata: - Training Data: - - Cityscapes - - ADE20K - Paper: - URL: https://arxiv.org/abs/1912.08193 - Title: 'PointRend: Image Segmentation as Rendering' - README: configs/point_rend/README.md - Code: - URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/point_head.py#L36 - Version: v0.17.0 - Converted From: - Code: https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend -Models: -- Name: pointrend_r50_4xb2-80k_cityscapes-512x1024 - In Collection: PointRend - Metadata: - backbone: R-50 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 117.92 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 3.1 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 76.47 - mIoU(ms+flip): 78.13 - Config: configs/point_rend/pointrend_r50_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x1024_80k_cityscapes/pointrend_r50_512x1024_80k_cityscapes_20200711_015821-bb1ff523.pth -- Name: pointrend_r101_4xb2-80k_cityscapes-512x1024 - In Collection: PointRend - Metadata: - backbone: R-101 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 142.86 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 4.2 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.3 - mIoU(ms+flip): 79.97 - Config: configs/point_rend/pointrend_r101_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x1024_80k_cityscapes/pointrend_r101_512x1024_80k_cityscapes_20200711_170850-d0ca84be.pth -- Name: pointrend_r50_4xb4-160k_ade20k-512x512 - In Collection: PointRend - Metadata: - backbone: R-50 - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 57.77 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 5.1 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 37.64 - mIoU(ms+flip): 39.17 - Config: configs/point_rend/pointrend_r50_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x512_160k_ade20k/pointrend_r50_512x512_160k_ade20k_20200807_232644-ac3febf2.pth -- Name: pointrend_r101_4xb4-160k_ade20k-512x512 - In Collection: PointRend - Metadata: - backbone: R-101 - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 64.52 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 6.1 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 40.02 - mIoU(ms+flip): 41.6 - Config: configs/point_rend/pointrend_r101_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x512_160k_ade20k/pointrend_r101_512x512_160k_ade20k_20200808_030852-8834902a.pth diff --git a/configs/poolformer/README.md b/configs/poolformer/README.md index 3bdd2ba3f1..987db33d98 100644 --- a/configs/poolformer/README.md +++ b/configs/poolformer/README.md @@ -1,6 +1,6 @@ # PoolFormer -[MetaFormer is Actually What You Need for Vision](https://arxiv.org/abs/2111.11418) +> [MetaFormer is Actually What You Need for Vision](https://arxiv.org/abs/2111.11418) ## Introduction @@ -48,13 +48,13 @@ pip install "mmcls>=1.0.0rc0" ### ADE20K -| Method | Backbone | Crop Size | pretrain | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | mIoU\* | mIoU\*(ms+flip) | config | download | -| ------ | -------------- | --------- | ----------- | ---------- | ------- | -------- | -------------- | ----- | ------------: | ------ | --------------: | ------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| FPN | PoolFormer-S12 | 512x512 | ImageNet-1K | 32 | 40000 | 4.17 | 23.48 | 36.68 | - | 37.07 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/poolformer/fpn_poolformer_s12_8xb4-40k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s12_8x4_512x512_40k_ade20k/fpn_poolformer_s12_8x4_512x512_40k_ade20k_20220501_115154-b5aa2f49.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s12_8x4_512x512_40k_ade20k/fpn_poolformer_s12_8x4_512x512_40k_ade20k_20220501_115154.log.json) | -| FPN | PoolFormer-S24 | 512x512 | ImageNet-1K | 32 | 40000 | 5.47 | 15.74 | 40.12 | - | 40.36 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/poolformer/fpn_poolformer_s24_8xb4-40k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s24_8x4_512x512_40k_ade20k/fpn_poolformer_s24_8x4_512x512_40k_ade20k_20220503_222049-394a7cf7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s24_8x4_512x512_40k_ade20k/fpn_poolformer_s24_8x4_512x512_40k_ade20k_20220503_222049.log.json) | -| FPN | PoolFormer-S36 | 512x512 | ImageNet-1K | 32 | 40000 | 6.77 | 11.34 | 41.61 | - | 41.81 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/poolformer/fpn_poolformer_s36_8xb4-40k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s36_8x4_512x512_40k_ade20k/fpn_poolformer_s36_8x4_512x512_40k_ade20k_20220501_151122-b47e607d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s36_8x4_512x512_40k_ade20k/fpn_poolformer_s36_8x4_512x512_40k_ade20k_20220501_151122.log.json) | -| FPN | PoolFormer-M36 | 512x512 | ImageNet-1K | 32 | 40000 | 8.59 | 8.97 | 41.95 | - | 42.35 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/poolformer/fpn_poolformer_m36_8xb4-40k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m36_8x4_512x512_40k_ade20k/fpn_poolformer_m36_8x4_512x512_40k_ade20k_20220501_164230-3dc83921.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m36_8x4_512x512_40k_ade20k/fpn_poolformer_m36_8x4_512x512_40k_ade20k_20220501_164230.log.json) | -| FPN | PoolFormer-M48 | 512x512 | ImageNet-1K | 32 | 40000 | 10.48 | 6.69 | 42.43 | - | 42.76 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/poolformer/fpn_poolformer_m48_8xb4-40k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m48_8x4_512x512_40k_ade20k/fpn_poolformer_m48_8x4_512x512_40k_ade20k_20220504_003923-64168d3b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m48_8x4_512x512_40k_ade20k/fpn_poolformer_m48_8x4_512x512_40k_ade20k_20220504_003923.log.json) | +| Method | Backbone | Crop Size | pretrain | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | mIoU\* | mIoU\*(ms+flip) | config | download | +| ------ | -------------- | --------- | ----------- | ---------- | ------- | -------- | -------------- | ------ | ----- | ------------: | ------ | --------------: | --------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FPN | PoolFormer-S12 | 512x512 | ImageNet-1K | 32 | 40000 | 4.17 | 23.48 | V100 | 36.68 | - | 37.07 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/poolformer/fpn_poolformer_s12_8xb4-40k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s12_8x4_512x512_40k_ade20k/fpn_poolformer_s12_8x4_512x512_40k_ade20k_20220501_115154-b5aa2f49.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s12_8x4_512x512_40k_ade20k/fpn_poolformer_s12_8x4_512x512_40k_ade20k_20220501_115154.log.json) | +| FPN | PoolFormer-S24 | 512x512 | ImageNet-1K | 32 | 40000 | 5.47 | 15.74 | V100 | 40.12 | - | 40.36 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/poolformer/fpn_poolformer_s24_8xb4-40k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s24_8x4_512x512_40k_ade20k/fpn_poolformer_s24_8x4_512x512_40k_ade20k_20220503_222049-394a7cf7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s24_8x4_512x512_40k_ade20k/fpn_poolformer_s24_8x4_512x512_40k_ade20k_20220503_222049.log.json) | +| FPN | PoolFormer-S36 | 512x512 | ImageNet-1K | 32 | 40000 | 6.77 | 11.34 | V100 | 41.61 | - | 41.81 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/poolformer/fpn_poolformer_s36_8xb4-40k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s36_8x4_512x512_40k_ade20k/fpn_poolformer_s36_8x4_512x512_40k_ade20k_20220501_151122-b47e607d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s36_8x4_512x512_40k_ade20k/fpn_poolformer_s36_8x4_512x512_40k_ade20k_20220501_151122.log.json) | +| FPN | PoolFormer-M36 | 512x512 | ImageNet-1K | 32 | 40000 | 8.59 | 8.97 | V100 | 41.95 | - | 42.35 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/poolformer/fpn_poolformer_m36_8xb4-40k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m36_8x4_512x512_40k_ade20k/fpn_poolformer_m36_8x4_512x512_40k_ade20k_20220501_164230-3dc83921.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m36_8x4_512x512_40k_ade20k/fpn_poolformer_m36_8x4_512x512_40k_ade20k_20220501_164230.log.json) | +| FPN | PoolFormer-M48 | 512x512 | ImageNet-1K | 32 | 40000 | 10.48 | 6.69 | V100 | 42.43 | - | 42.76 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/poolformer/fpn_poolformer_m48_8xb4-40k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m48_8x4_512x512_40k_ade20k/fpn_poolformer_m48_8x4_512x512_40k_ade20k_20220504_003923-64168d3b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m48_8x4_512x512_40k_ade20k/fpn_poolformer_m48_8x4_512x512_40k_ade20k_20220504_003923.log.json) | Note: diff --git a/configs/poolformer/metafile.yaml b/configs/poolformer/metafile.yaml new file mode 100644 index 0000000000..12f402be65 --- /dev/null +++ b/configs/poolformer/metafile.yaml @@ -0,0 +1,116 @@ +Models: +- Name: fpn_poolformer_s12_8xb4-40k_ade20k-512x512 + In Collection: FPN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 36.68 + Config: configs/poolformer/fpn_poolformer_s12_8xb4-40k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 32 + Architecture: + - PoolFormer-S12 + - FPN + Training Resources: 8x V100 GPUS + Memory (GB): 4.17 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s12_8x4_512x512_40k_ade20k/fpn_poolformer_s12_8x4_512x512_40k_ade20k_20220501_115154-b5aa2f49.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s12_8x4_512x512_40k_ade20k/fpn_poolformer_s12_8x4_512x512_40k_ade20k_20220501_115154.log.json + Paper: + Title: MetaFormer is Actually What You Need for Vision + URL: https://arxiv.org/abs/2111.11418 + Code: https://github.com/open-mmlab/mmclassification/blob/v0.23.0/mmcls/models/backbones/poolformer.py#L198 + Framework: PyTorch +- Name: fpn_poolformer_s24_8xb4-40k_ade20k-512x512 + In Collection: FPN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 40.12 + Config: configs/poolformer/fpn_poolformer_s24_8xb4-40k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 32 + Architecture: + - PoolFormer-S24 + - FPN + Training Resources: 8x V100 GPUS + Memory (GB): 5.47 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s24_8x4_512x512_40k_ade20k/fpn_poolformer_s24_8x4_512x512_40k_ade20k_20220503_222049-394a7cf7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s24_8x4_512x512_40k_ade20k/fpn_poolformer_s24_8x4_512x512_40k_ade20k_20220503_222049.log.json + Paper: + Title: MetaFormer is Actually What You Need for Vision + URL: https://arxiv.org/abs/2111.11418 + Code: https://github.com/open-mmlab/mmclassification/blob/v0.23.0/mmcls/models/backbones/poolformer.py#L198 + Framework: PyTorch +- Name: fpn_poolformer_s36_8xb4-40k_ade20k-512x512 + In Collection: FPN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.61 + Config: configs/poolformer/fpn_poolformer_s36_8xb4-40k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 32 + Architecture: + - PoolFormer-S36 + - FPN + Training Resources: 8x V100 GPUS + Memory (GB): 6.77 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s36_8x4_512x512_40k_ade20k/fpn_poolformer_s36_8x4_512x512_40k_ade20k_20220501_151122-b47e607d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s36_8x4_512x512_40k_ade20k/fpn_poolformer_s36_8x4_512x512_40k_ade20k_20220501_151122.log.json + Paper: + Title: MetaFormer is Actually What You Need for Vision + URL: https://arxiv.org/abs/2111.11418 + Code: https://github.com/open-mmlab/mmclassification/blob/v0.23.0/mmcls/models/backbones/poolformer.py#L198 + Framework: PyTorch +- Name: fpn_poolformer_m36_8xb4-40k_ade20k-512x512 + In Collection: FPN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.95 + Config: configs/poolformer/fpn_poolformer_m36_8xb4-40k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 32 + Architecture: + - PoolFormer-M36 + - FPN + Training Resources: 8x V100 GPUS + Memory (GB): 8.59 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m36_8x4_512x512_40k_ade20k/fpn_poolformer_m36_8x4_512x512_40k_ade20k_20220501_164230-3dc83921.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m36_8x4_512x512_40k_ade20k/fpn_poolformer_m36_8x4_512x512_40k_ade20k_20220501_164230.log.json + Paper: + Title: MetaFormer is Actually What You Need for Vision + URL: https://arxiv.org/abs/2111.11418 + Code: https://github.com/open-mmlab/mmclassification/blob/v0.23.0/mmcls/models/backbones/poolformer.py#L198 + Framework: PyTorch +- Name: fpn_poolformer_m48_8xb4-40k_ade20k-512x512 + In Collection: FPN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.43 + Config: configs/poolformer/fpn_poolformer_m48_8xb4-40k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 32 + Architecture: + - PoolFormer-M48 + - FPN + Training Resources: 8x V100 GPUS + Memory (GB): 10.48 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m48_8x4_512x512_40k_ade20k/fpn_poolformer_m48_8x4_512x512_40k_ade20k_20220504_003923-64168d3b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m48_8x4_512x512_40k_ade20k/fpn_poolformer_m48_8x4_512x512_40k_ade20k_20220504_003923.log.json + Paper: + Title: MetaFormer is Actually What You Need for Vision + URL: https://arxiv.org/abs/2111.11418 + Code: https://github.com/open-mmlab/mmclassification/blob/v0.23.0/mmcls/models/backbones/poolformer.py#L198 + Framework: PyTorch diff --git a/configs/poolformer/poolformer.yml b/configs/poolformer/poolformer.yml deleted file mode 100644 index fa5fc30125..0000000000 --- a/configs/poolformer/poolformer.yml +++ /dev/null @@ -1,106 +0,0 @@ -Models: -- Name: fpn_poolformer_s12_8xb4-40k_ade20k-512x512 - In Collection: FPN - Metadata: - backbone: PoolFormer-S12 - crop size: (512,512) - lr schd: 40000 - inference time (ms/im): - - value: 42.59 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 4.17 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 36.68 - Config: configs/poolformer/fpn_poolformer_s12_8xb4-40k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s12_8x4_512x512_40k_ade20k/fpn_poolformer_s12_8x4_512x512_40k_ade20k_20220501_115154-b5aa2f49.pth -- Name: fpn_poolformer_s24_8xb4-40k_ade20k-512x512 - In Collection: FPN - Metadata: - backbone: PoolFormer-S24 - crop size: (512,512) - lr schd: 40000 - inference time (ms/im): - - value: 63.53 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 5.47 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 40.12 - Config: configs/poolformer/fpn_poolformer_s24_8xb4-40k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_s24_8x4_512x512_40k_ade20k/fpn_poolformer_s24_8x4_512x512_40k_ade20k_20220503_222049-394a7cf7.pth -- Name: '' - In Collection: FPN - Metadata: - backbone: PoolFormer-S36 - crop size: (512,512) - lr schd: 40000 - inference time (ms/im): - - value: 88.18 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 6.77 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 41.61 - Config: '' - Weights: '' -- Name: fpn_poolformer_m36_8xb4-40k_ade20k-512x512 - In Collection: FPN - Metadata: - backbone: PoolFormer-M36 - crop size: (512,512) - lr schd: 40000 - inference time (ms/im): - - value: 111.48 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 8.59 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 41.95 - Config: configs/poolformer/fpn_poolformer_m36_8xb4-40k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m36_8x4_512x512_40k_ade20k/fpn_poolformer_m36_8x4_512x512_40k_ade20k_20220501_164230-3dc83921.pth -- Name: fpn_poolformer_m48_8xb4-40k_ade20k-512x512 - In Collection: FPN - Metadata: - backbone: PoolFormer-M48 - crop size: (512,512) - lr schd: 40000 - inference time (ms/im): - - value: 149.48 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 10.48 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 42.43 - Config: configs/poolformer/fpn_poolformer_m48_8xb4-40k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/poolformer/fpn_poolformer_m48_8x4_512x512_40k_ade20k/fpn_poolformer_m48_8x4_512x512_40k_ade20k_20220504_003923-64168d3b.pth diff --git a/configs/psanet/README.md b/configs/psanet/README.md index 7182e500a1..1f5680fbab 100644 --- a/configs/psanet/README.md +++ b/configs/psanet/README.md @@ -1,6 +1,6 @@ # PSANet -[PSANet: Point-wise Spatial Attention Network for Scene Parsing](https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf) +> [PSANet: Point-wise Spatial Attention Network for Scene Parsing](https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf) ## Introduction @@ -22,6 +22,39 @@ We notice information flow in convolutional neural networksis restricted insid +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSANet | R-50-D8 | 512x1024 | 40000 | 7 | 3.17 | V100 | 77.63 | 79.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_40k_cityscapes/psanet_r50-d8_512x1024_40k_cityscapes_20200606_103117-99fac37c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_40k_cityscapes/psanet_r50-d8_512x1024_40k_cityscapes_20200606_103117.log.json) | +| PSANet | R-101-D8 | 512x1024 | 40000 | 10.5 | 2.20 | V100 | 79.14 | 80.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_40k_cityscapes/psanet_r101-d8_512x1024_40k_cityscapes_20200606_001418-27b9cfa7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_40k_cityscapes/psanet_r101-d8_512x1024_40k_cityscapes_20200606_001418.log.json) | +| PSANet | R-50-D8 | 769x769 | 40000 | 7.9 | 1.40 | V100 | 77.99 | 79.64 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_40k_cityscapes/psanet_r50-d8_769x769_40k_cityscapes_20200530_033717-d5365506.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_40k_cityscapes/psanet_r50-d8_769x769_40k_cityscapes_20200530_033717.log.json) | +| PSANet | R-101-D8 | 769x769 | 40000 | 11.9 | 0.98 | V100 | 78.43 | 80.26 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_40k_cityscapes/psanet_r101-d8_769x769_40k_cityscapes_20200530_035107-997da1e6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_40k_cityscapes/psanet_r101-d8_769x769_40k_cityscapes_20200530_035107.log.json) | +| PSANet | R-50-D8 | 512x1024 | 80000 | - | - | V100 | 77.24 | 78.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_80k_cityscapes/psanet_r50-d8_512x1024_80k_cityscapes_20200606_161842-ab60a24f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_80k_cityscapes/psanet_r50-d8_512x1024_80k_cityscapes_20200606_161842.log.json) | +| PSANet | R-101-D8 | 512x1024 | 80000 | - | - | V100 | 79.31 | 80.53 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_80k_cityscapes/psanet_r101-d8_512x1024_80k_cityscapes_20200606_161823-0f73a169.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_80k_cityscapes/psanet_r101-d8_512x1024_80k_cityscapes_20200606_161823.log.json) | +| PSANet | R-50-D8 | 769x769 | 80000 | - | - | V100 | 79.31 | 80.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_80k_cityscapes/psanet_r50-d8_769x769_80k_cityscapes_20200606_225134-fe42f49e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_80k_cityscapes/psanet_r50-d8_769x769_80k_cityscapes_20200606_225134.log.json) | +| PSANet | R-101-D8 | 769x769 | 80000 | - | - | V100 | 79.69 | 80.89 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_80k_cityscapes/psanet_r101-d8_769x769_80k_cityscapes_20200606_214550-7665827b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_80k_cityscapes/psanet_r101-d8_769x769_80k_cityscapes_20200606_214550.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSANet | R-50-D8 | 512x512 | 80000 | 9 | 18.91 | V100 | 41.14 | 41.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_80k_ade20k/psanet_r50-d8_512x512_80k_ade20k_20200614_144141-835e4b97.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_80k_ade20k/psanet_r50-d8_512x512_80k_ade20k_20200614_144141.log.json) | +| PSANet | R-101-D8 | 512x512 | 80000 | 12.5 | 13.13 | V100 | 43.80 | 44.75 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_80k_ade20k/psanet_r101-d8_512x512_80k_ade20k_20200614_185117-1fab60d4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_80k_ade20k/psanet_r101-d8_512x512_80k_ade20k_20200614_185117.log.json) | +| PSANet | R-50-D8 | 512x512 | 160000 | - | - | V100 | 41.67 | 42.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_160k_ade20k/psanet_r50-d8_512x512_160k_ade20k_20200615_161258-148077dd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_160k_ade20k/psanet_r50-d8_512x512_160k_ade20k_20200615_161258.log.json) | +| PSANet | R-101-D8 | 512x512 | 160000 | - | - | V100 | 43.74 | 45.38 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_160k_ade20k/psanet_r101-d8_512x512_160k_ade20k_20200615_161537-dbfa564c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_160k_ade20k/psanet_r101-d8_512x512_160k_ade20k_20200615_161537.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSANet | R-50-D8 | 512x512 | 20000 | 6.9 | 18.24 | V100 | 76.39 | 77.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r50-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_20k_voc12aug/psanet_r50-d8_512x512_20k_voc12aug_20200617_102413-2f1bbaa1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_20k_voc12aug/psanet_r50-d8_512x512_20k_voc12aug_20200617_102413.log.json) | +| PSANet | R-101-D8 | 512x512 | 20000 | 10.4 | 12.63 | V100 | 77.91 | 79.30 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r101-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_20k_voc12aug/psanet_r101-d8_512x512_20k_voc12aug_20200617_110624-946fef11.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_20k_voc12aug/psanet_r101-d8_512x512_20k_voc12aug_20200617_110624.log.json) | +| PSANet | R-50-D8 | 512x512 | 40000 | - | - | V100 | 76.30 | 77.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r50-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_40k_voc12aug/psanet_r50-d8_512x512_40k_voc12aug_20200613_161946-f596afb5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_40k_voc12aug/psanet_r50-d8_512x512_40k_voc12aug_20200613_161946.log.json) | +| PSANet | R-101-D8 | 512x512 | 40000 | - | - | V100 | 77.73 | 79.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet/psanet_r101-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_40k_voc12aug/psanet_r101-d8_512x512_40k_voc12aug_20200613_161946-1f560f9e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_40k_voc12aug/psanet_r101-d8_512x512_40k_voc12aug_20200613_161946.log.json) | + ## Citation ```bibtex @@ -33,36 +66,3 @@ We notice information flow in convolutional neural networksis restricted insid year={2018} } ``` - -## Results and models - -### Cityscapes - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| PSANet | R-50-D8 | 512x1024 | 40000 | 7 | 3.17 | 77.63 | 79.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/psanet/psanet_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_40k_cityscapes/psanet_r50-d8_512x1024_40k_cityscapes_20200606_103117-99fac37c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_40k_cityscapes/psanet_r50-d8_512x1024_40k_cityscapes_20200606_103117.log.json) | -| PSANet | R-101-D8 | 512x1024 | 40000 | 10.5 | 2.20 | 79.14 | 80.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/psanet/psanet_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_40k_cityscapes/psanet_r101-d8_512x1024_40k_cityscapes_20200606_001418-27b9cfa7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_40k_cityscapes/psanet_r101-d8_512x1024_40k_cityscapes_20200606_001418.log.json) | -| PSANet | R-50-D8 | 769x769 | 40000 | 7.9 | 1.40 | 77.99 | 79.64 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/psanet/psanet_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_40k_cityscapes/psanet_r50-d8_769x769_40k_cityscapes_20200530_033717-d5365506.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_40k_cityscapes/psanet_r50-d8_769x769_40k_cityscapes_20200530_033717.log.json) | -| PSANet | R-101-D8 | 769x769 | 40000 | 11.9 | 0.98 | 78.43 | 80.26 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/psanet/psanet_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_40k_cityscapes/psanet_r101-d8_769x769_40k_cityscapes_20200530_035107-997da1e6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_40k_cityscapes/psanet_r101-d8_769x769_40k_cityscapes_20200530_035107.log.json) | -| PSANet | R-50-D8 | 512x1024 | 80000 | - | - | 77.24 | 78.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/psanet/psanet_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_80k_cityscapes/psanet_r50-d8_512x1024_80k_cityscapes_20200606_161842-ab60a24f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_80k_cityscapes/psanet_r50-d8_512x1024_80k_cityscapes_20200606_161842.log.json) | -| PSANet | R-101-D8 | 512x1024 | 80000 | - | - | 79.31 | 80.53 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/psanet/psanet_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_80k_cityscapes/psanet_r101-d8_512x1024_80k_cityscapes_20200606_161823-0f73a169.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_80k_cityscapes/psanet_r101-d8_512x1024_80k_cityscapes_20200606_161823.log.json) | -| PSANet | R-50-D8 | 769x769 | 80000 | - | - | 79.31 | 80.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/psanet/psanet_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_80k_cityscapes/psanet_r50-d8_769x769_80k_cityscapes_20200606_225134-fe42f49e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_80k_cityscapes/psanet_r50-d8_769x769_80k_cityscapes_20200606_225134.log.json) | -| PSANet | R-101-D8 | 769x769 | 80000 | - | - | 79.69 | 80.89 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/psanet/psanet_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_80k_cityscapes/psanet_r101-d8_769x769_80k_cityscapes_20200606_214550-7665827b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_80k_cityscapes/psanet_r101-d8_769x769_80k_cityscapes_20200606_214550.log.json) | - -### ADE20K - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| PSANet | R-50-D8 | 512x512 | 80000 | 9 | 18.91 | 41.14 | 41.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/psanet/psanet_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_80k_ade20k/psanet_r50-d8_512x512_80k_ade20k_20200614_144141-835e4b97.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_80k_ade20k/psanet_r50-d8_512x512_80k_ade20k_20200614_144141.log.json) | -| PSANet | R-101-D8 | 512x512 | 80000 | 12.5 | 13.13 | 43.80 | 44.75 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/psanet/psanet_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_80k_ade20k/psanet_r101-d8_512x512_80k_ade20k_20200614_185117-1fab60d4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_80k_ade20k/psanet_r101-d8_512x512_80k_ade20k_20200614_185117.log.json) | -| PSANet | R-50-D8 | 512x512 | 160000 | - | - | 41.67 | 42.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/psanet/psanet_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_160k_ade20k/psanet_r50-d8_512x512_160k_ade20k_20200615_161258-148077dd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_160k_ade20k/psanet_r50-d8_512x512_160k_ade20k_20200615_161258.log.json) | -| PSANet | R-101-D8 | 512x512 | 160000 | - | - | 43.74 | 45.38 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/psanet/psanet_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_160k_ade20k/psanet_r101-d8_512x512_160k_ade20k_20200615_161537-dbfa564c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_160k_ade20k/psanet_r101-d8_512x512_160k_ade20k_20200615_161537.log.json) | - -### Pascal VOC 2012 + Aug - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| PSANet | R-50-D8 | 512x512 | 20000 | 6.9 | 18.24 | 76.39 | 77.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/psanet/psanet_r50-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_20k_voc12aug/psanet_r50-d8_512x512_20k_voc12aug_20200617_102413-2f1bbaa1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_20k_voc12aug/psanet_r50-d8_512x512_20k_voc12aug_20200617_102413.log.json) | -| PSANet | R-101-D8 | 512x512 | 20000 | 10.4 | 12.63 | 77.91 | 79.30 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/psanet/psanet_r101-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_20k_voc12aug/psanet_r101-d8_512x512_20k_voc12aug_20200617_110624-946fef11.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_20k_voc12aug/psanet_r101-d8_512x512_20k_voc12aug_20200617_110624.log.json) | -| PSANet | R-50-D8 | 512x512 | 40000 | - | - | 76.30 | 77.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/psanet/psanet_r50-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_40k_voc12aug/psanet_r50-d8_512x512_40k_voc12aug_20200613_161946-f596afb5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_40k_voc12aug/psanet_r50-d8_512x512_40k_voc12aug_20200613_161946.log.json) | -| PSANet | R-101-D8 | 512x512 | 40000 | - | - | 77.73 | 79.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/psanet/psanet_r101-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_40k_voc12aug/psanet_r101-d8_512x512_40k_voc12aug_20200613_161946-1f560f9e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_40k_voc12aug/psanet_r101-d8_512x512_40k_voc12aug_20200613_161946.log.json) | diff --git a/configs/psanet/metafile.yaml b/configs/psanet/metafile.yaml new file mode 100644 index 0000000000..3fbe6f6d3e --- /dev/null +++ b/configs/psanet/metafile.yaml @@ -0,0 +1,391 @@ +Collections: +- Name: PSANet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + README: configs/psanet/README.md + Frameworks: + - PyTorch +Models: +- Name: psanet_r50-d8_4xb2-40k_cityscapes-512x1024 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.63 + mIoU(ms+flip): 79.04 + Config: configs/psanet/psanet_r50-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - PSANet + Training Resources: 4x V100 GPUS + Memory (GB): 7.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_40k_cityscapes/psanet_r50-d8_512x1024_40k_cityscapes_20200606_103117-99fac37c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_40k_cityscapes/psanet_r50-d8_512x1024_40k_cityscapes_20200606_103117.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch +- Name: psanet_r101-d8_4xb2-40k_cityscapes-512x1024 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.14 + mIoU(ms+flip): 80.19 + Config: configs/psanet/psanet_r101-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - PSANet + Training Resources: 4x V100 GPUS + Memory (GB): 10.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_40k_cityscapes/psanet_r101-d8_512x1024_40k_cityscapes_20200606_001418-27b9cfa7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_40k_cityscapes/psanet_r101-d8_512x1024_40k_cityscapes_20200606_001418.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch +- Name: psanet_r50-d8_4xb2-40k_cityscapes-769x769 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.99 + mIoU(ms+flip): 79.64 + Config: configs/psanet/psanet_r50-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - PSANet + Training Resources: 4x V100 GPUS + Memory (GB): 7.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_40k_cityscapes/psanet_r50-d8_769x769_40k_cityscapes_20200530_033717-d5365506.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_40k_cityscapes/psanet_r50-d8_769x769_40k_cityscapes_20200530_033717.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch +- Name: psanet_r101-d8_4xb2-40k_cityscapes-769x769 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.43 + mIoU(ms+flip): 80.26 + Config: configs/psanet/psanet_r101-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - PSANet + Training Resources: 4x V100 GPUS + Memory (GB): 11.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_40k_cityscapes/psanet_r101-d8_769x769_40k_cityscapes_20200530_035107-997da1e6.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_40k_cityscapes/psanet_r101-d8_769x769_40k_cityscapes_20200530_035107.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch +- Name: psanet_r50-d8_4xb2-80k_cityscapes-512x1024 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.24 + mIoU(ms+flip): 78.69 + Config: configs/psanet/psanet_r50-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - PSANet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_80k_cityscapes/psanet_r50-d8_512x1024_80k_cityscapes_20200606_161842-ab60a24f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_80k_cityscapes/psanet_r50-d8_512x1024_80k_cityscapes_20200606_161842.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch +- Name: psanet_r101-d8_4xb2-80k_cityscapes-512x1024 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.31 + mIoU(ms+flip): 80.53 + Config: configs/psanet/psanet_r101-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - PSANet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_80k_cityscapes/psanet_r101-d8_512x1024_80k_cityscapes_20200606_161823-0f73a169.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_80k_cityscapes/psanet_r101-d8_512x1024_80k_cityscapes_20200606_161823.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch +- Name: psanet_r50-d8_4xb2-80k_cityscapes-769x769 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.31 + mIoU(ms+flip): 80.91 + Config: configs/psanet/psanet_r50-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - PSANet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_80k_cityscapes/psanet_r50-d8_769x769_80k_cityscapes_20200606_225134-fe42f49e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_80k_cityscapes/psanet_r50-d8_769x769_80k_cityscapes_20200606_225134.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch +- Name: psanet_r101-d8_4xb2-80k_cityscapes-769x769 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.69 + mIoU(ms+flip): 80.89 + Config: configs/psanet/psanet_r101-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - PSANet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_80k_cityscapes/psanet_r101-d8_769x769_80k_cityscapes_20200606_214550-7665827b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_80k_cityscapes/psanet_r101-d8_769x769_80k_cityscapes_20200606_214550.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch +- Name: psanet_r50-d8_4xb4-80k_ade20k-512x512 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.14 + mIoU(ms+flip): 41.91 + Config: configs/psanet/psanet_r50-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - PSANet + Training Resources: 4x V100 GPUS + Memory (GB): 9.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_80k_ade20k/psanet_r50-d8_512x512_80k_ade20k_20200614_144141-835e4b97.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_80k_ade20k/psanet_r50-d8_512x512_80k_ade20k_20200614_144141.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch +- Name: psanet_r101-d8_4xb4-80k_ade20k-512x512 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.8 + mIoU(ms+flip): 44.75 + Config: configs/psanet/psanet_r101-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - PSANet + Training Resources: 4x V100 GPUS + Memory (GB): 12.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_80k_ade20k/psanet_r101-d8_512x512_80k_ade20k_20200614_185117-1fab60d4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_80k_ade20k/psanet_r101-d8_512x512_80k_ade20k_20200614_185117.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch +- Name: psanet_r50-d8_4xb4-160k_ade20k-512x512 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.67 + mIoU(ms+flip): 42.95 + Config: configs/psanet/psanet_r50-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - PSANet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_160k_ade20k/psanet_r50-d8_512x512_160k_ade20k_20200615_161258-148077dd.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_160k_ade20k/psanet_r50-d8_512x512_160k_ade20k_20200615_161258.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch +- Name: psanet_r101-d8_4xb4-160k_ade20k-512x512 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.74 + mIoU(ms+flip): 45.38 + Config: configs/psanet/psanet_r101-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - PSANet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_160k_ade20k/psanet_r101-d8_512x512_160k_ade20k_20200615_161537-dbfa564c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_160k_ade20k/psanet_r101-d8_512x512_160k_ade20k_20200615_161537.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch +- Name: psanet_r50-d8_4xb4-20k_voc12aug-512x512 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.39 + mIoU(ms+flip): 77.34 + Config: configs/psanet/psanet_r50-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - PSANet + Training Resources: 4x V100 GPUS + Memory (GB): 6.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_20k_voc12aug/psanet_r50-d8_512x512_20k_voc12aug_20200617_102413-2f1bbaa1.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_20k_voc12aug/psanet_r50-d8_512x512_20k_voc12aug_20200617_102413.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch +- Name: psanet_r101-d8_4xb4-20k_voc12aug-512x512 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.91 + mIoU(ms+flip): 79.3 + Config: configs/psanet/psanet_r101-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - PSANet + Training Resources: 4x V100 GPUS + Memory (GB): 10.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_20k_voc12aug/psanet_r101-d8_512x512_20k_voc12aug_20200617_110624-946fef11.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_20k_voc12aug/psanet_r101-d8_512x512_20k_voc12aug_20200617_110624.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch +- Name: psanet_r50-d8_4xb4-40k_voc12aug-512x512 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.3 + mIoU(ms+flip): 77.35 + Config: configs/psanet/psanet_r50-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - PSANet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_40k_voc12aug/psanet_r50-d8_512x512_40k_voc12aug_20200613_161946-f596afb5.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_40k_voc12aug/psanet_r50-d8_512x512_40k_voc12aug_20200613_161946.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch +- Name: psanet_r101-d8_4xb4-40k_voc12aug-512x512 + In Collection: PSANet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.73 + mIoU(ms+flip): 79.05 + Config: configs/psanet/psanet_r101-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - PSANet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_40k_voc12aug/psanet_r101-d8_512x512_40k_voc12aug_20200613_161946-1f560f9e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_40k_voc12aug/psanet_r101-d8_512x512_40k_voc12aug_20200613_161946.log.json + Paper: + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Framework: PyTorch diff --git a/configs/psanet/psanet.yml b/configs/psanet/psanet.yml deleted file mode 100644 index fca1ac1b40..0000000000 --- a/configs/psanet/psanet.yml +++ /dev/null @@ -1,305 +0,0 @@ -Collections: -- Name: PSANet - Metadata: - Training Data: - - Cityscapes - - ADE20K - - Pascal VOC 2012 + Aug - Paper: - URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf - Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' - README: configs/psanet/README.md - Code: - URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 - Version: v0.17.0 - Converted From: - Code: https://github.com/hszhao/PSANet -Models: -- Name: psanet_r50-d8_4xb2-40k_cityscapes-512x1024 - In Collection: PSANet - Metadata: - backbone: R-50-D8 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 315.46 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 7.0 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 77.63 - mIoU(ms+flip): 79.04 - Config: configs/psanet/psanet_r50-d8_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_40k_cityscapes/psanet_r50-d8_512x1024_40k_cityscapes_20200606_103117-99fac37c.pth -- Name: psanet_r101-d8_4xb2-40k_cityscapes-512x1024 - In Collection: PSANet - Metadata: - backbone: R-101-D8 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 454.55 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 10.5 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.14 - mIoU(ms+flip): 80.19 - Config: configs/psanet/psanet_r101-d8_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_40k_cityscapes/psanet_r101-d8_512x1024_40k_cityscapes_20200606_001418-27b9cfa7.pth -- Name: psanet_r50-d8_4xb2-40k_cityscapes-769x769 - In Collection: PSANet - Metadata: - backbone: R-50-D8 - crop size: (769,769) - lr schd: 40000 - inference time (ms/im): - - value: 714.29 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 7.9 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 77.99 - mIoU(ms+flip): 79.64 - Config: configs/psanet/psanet_r50-d8_4xb2-40k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_40k_cityscapes/psanet_r50-d8_769x769_40k_cityscapes_20200530_033717-d5365506.pth -- Name: psanet_r101-d8_4xb2-40k_cityscapes-769x769 - In Collection: PSANet - Metadata: - backbone: R-101-D8 - crop size: (769,769) - lr schd: 40000 - inference time (ms/im): - - value: 1020.41 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 11.9 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.43 - mIoU(ms+flip): 80.26 - Config: configs/psanet/psanet_r101-d8_4xb2-40k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_40k_cityscapes/psanet_r101-d8_769x769_40k_cityscapes_20200530_035107-997da1e6.pth -- Name: psanet_r50-d8_4xb2-80k_cityscapes-512x1024 - In Collection: PSANet - Metadata: - backbone: R-50-D8 - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 77.24 - mIoU(ms+flip): 78.69 - Config: configs/psanet/psanet_r50-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_80k_cityscapes/psanet_r50-d8_512x1024_80k_cityscapes_20200606_161842-ab60a24f.pth -- Name: psanet_r101-d8_4xb2-80k_cityscapes-512x1024 - In Collection: PSANet - Metadata: - backbone: R-101-D8 - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.31 - mIoU(ms+flip): 80.53 - Config: configs/psanet/psanet_r101-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_80k_cityscapes/psanet_r101-d8_512x1024_80k_cityscapes_20200606_161823-0f73a169.pth -- Name: psanet_r50-d8_4xb2-80k_cityscapes-769x769 - In Collection: PSANet - Metadata: - backbone: R-50-D8 - crop size: (769,769) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.31 - mIoU(ms+flip): 80.91 - Config: configs/psanet/psanet_r50-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_80k_cityscapes/psanet_r50-d8_769x769_80k_cityscapes_20200606_225134-fe42f49e.pth -- Name: psanet_r101-d8_4xb2-80k_cityscapes-769x769 - In Collection: PSANet - Metadata: - backbone: R-101-D8 - crop size: (769,769) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.69 - mIoU(ms+flip): 80.89 - Config: configs/psanet/psanet_r101-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_80k_cityscapes/psanet_r101-d8_769x769_80k_cityscapes_20200606_214550-7665827b.pth -- Name: psanet_r50-d8_4xb4-80k_ade20k-512x512 - In Collection: PSANet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 52.88 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 9.0 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 41.14 - mIoU(ms+flip): 41.91 - Config: configs/psanet/psanet_r50-d8_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_80k_ade20k/psanet_r50-d8_512x512_80k_ade20k_20200614_144141-835e4b97.pth -- Name: psanet_r101-d8_4xb4-80k_ade20k-512x512 - In Collection: PSANet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 76.16 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 12.5 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 43.8 - mIoU(ms+flip): 44.75 - Config: configs/psanet/psanet_r101-d8_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_80k_ade20k/psanet_r101-d8_512x512_80k_ade20k_20200614_185117-1fab60d4.pth -- Name: psanet_r50-d8_4xb4-160k_ade20k-512x512 - In Collection: PSANet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 41.67 - mIoU(ms+flip): 42.95 - Config: configs/psanet/psanet_r50-d8_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_160k_ade20k/psanet_r50-d8_512x512_160k_ade20k_20200615_161258-148077dd.pth -- Name: psanet_r101-d8_4xb4-160k_ade20k-512x512 - In Collection: PSANet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 43.74 - mIoU(ms+flip): 45.38 - Config: configs/psanet/psanet_r101-d8_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_160k_ade20k/psanet_r101-d8_512x512_160k_ade20k_20200615_161537-dbfa564c.pth -- Name: psanet_r50-d8_4xb4-20k_voc12aug-512x512 - In Collection: PSANet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 20000 - inference time (ms/im): - - value: 54.82 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 6.9 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 76.39 - mIoU(ms+flip): 77.34 - Config: configs/psanet/psanet_r50-d8_4xb4-20k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_20k_voc12aug/psanet_r50-d8_512x512_20k_voc12aug_20200617_102413-2f1bbaa1.pth -- Name: psanet_r101-d8_4xb4-20k_voc12aug-512x512 - In Collection: PSANet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 20000 - inference time (ms/im): - - value: 79.18 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 10.4 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 77.91 - mIoU(ms+flip): 79.3 - Config: configs/psanet/psanet_r101-d8_4xb4-20k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_20k_voc12aug/psanet_r101-d8_512x512_20k_voc12aug_20200617_110624-946fef11.pth -- Name: psanet_r50-d8_4xb4-40k_voc12aug-512x512 - In Collection: PSANet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 76.3 - mIoU(ms+flip): 77.35 - Config: configs/psanet/psanet_r50-d8_4xb4-40k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_40k_voc12aug/psanet_r50-d8_512x512_40k_voc12aug_20200613_161946-f596afb5.pth -- Name: psanet_r101-d8_4xb4-40k_voc12aug-512x512 - In Collection: PSANet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 77.73 - mIoU(ms+flip): 79.05 - Config: configs/psanet/psanet_r101-d8_4xb4-40k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_40k_voc12aug/psanet_r101-d8_512x512_40k_voc12aug_20200613_161946-1f560f9e.pth diff --git a/configs/pspnet/README.md b/configs/pspnet/README.md index 9135091102..4209d259b7 100644 --- a/configs/pspnet/README.md +++ b/configs/pspnet/README.md @@ -1,6 +1,6 @@ # PSPNet -[Pyramid Scene Parsing Network](https://arxiv.org/abs/1612.01105) +> [Pyramid Scene Parsing Network](https://arxiv.org/abs/1612.01105) ## Introduction @@ -27,152 +27,132 @@ Scene parsing is challenging for unrestricted open vocabulary and diverse scenes PSPNet-R50 D8 model structure -## Citation - -```bibtex -@inproceedings{zhao2017pspnet, - title={Pyramid Scene Parsing Network}, - author={Zhao, Hengshuang and Shi, Jianping and Qi, Xiaojuan and Wang, Xiaogang and Jia, Jiaya}, - booktitle={CVPR}, - year={2017} -} -``` - -```bibtex -@article{wightman2021resnet, - title={Resnet strikes back: An improved training procedure in timm}, - author={Wightman, Ross and Touvron, Hugo and J{\'e}gou, Herv{\'e}}, - journal={arXiv preprint arXiv:2110.00476}, - year={2021} -} -``` - ## Results and models ### Cityscapes -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------------- | ------------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| PSPNet | R-50-D8 | 512x1024 | 40000 | 6.1 | 4.07 | 77.85 | 79.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338.log.json) | -| PSPNet | R-101-D8 | 512x1024 | 40000 | 9.6 | 2.68 | 78.34 | 79.74 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751-467e7cf4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751.log.json) | -| PSPNet | R-50-D8 | 769x769 | 40000 | 6.9 | 1.76 | 78.26 | 79.88 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_40k_cityscapes/pspnet_r50-d8_769x769_40k_cityscapes_20200606_112725-86638686.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_40k_cityscapes/pspnet_r50-d8_769x769_40k_cityscapes_20200606_112725.log.json) | -| PSPNet | R-101-D8 | 769x769 | 40000 | 10.9 | 1.15 | 79.08 | 80.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_40k_cityscapes/pspnet_r101-d8_769x769_40k_cityscapes_20200606_112753-61c6f5be.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_40k_cityscapes/pspnet_r101-d8_769x769_40k_cityscapes_20200606_112753.log.json) | -| PSPNet | R-18-D8 | 512x1024 | 80000 | 1.7 | 15.71 | 74.87 | 76.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r18-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes/pspnet_r18-d8_512x1024_80k_cityscapes_20201225_021458-09ffa746.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes/pspnet_r18-d8_512x1024_80k_cityscapes-20201225_021458.log.json) | -| PSPNet | R-50-D8 | 512x1024 | 80000 | - | - | 78.55 | 79.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_512x1024_80k_cityscapes_20200606_112131-2376f12b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_512x1024_80k_cityscapes_20200606_112131.log.json) | -| PSPNet | R-50b-D8 rsb | 512x1024 | 80000 | 6.2 | 3.82 | 78.47 | 79.45 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r50-d8-rsb_4xb2-adamw-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes_20220315_123238-588c30be.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes_20220315_123238.log.json) | -| PSPNet | R-101-D8 | 512x1024 | 80000 | - | - | 79.76 | 81.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211-e1e1100f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211.log.json) | -| PSPNet (FP16) | R-101-D8 | 512x1024 | 80000 | 5.34 | 8.77 | 79.46 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_fp16_512x1024_80k_cityscapes/pspnet_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230919-a0875e5c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_fp16_512x1024_80k_cityscapes/pspnet_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230919.log.json) | -| PSPNet | R-18-D8 | 769x769 | 80000 | 1.9 | 6.20 | 75.90 | 77.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r18-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_769x769_80k_cityscapes/pspnet_r18-d8_769x769_80k_cityscapes_20201225_021458-3deefc62.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_769x769_80k_cityscapes/pspnet_r18-d8_769x769_80k_cityscapes-20201225_021458.log.json) | -| PSPNet | R-50-D8 | 769x769 | 80000 | - | - | 79.59 | 80.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_80k_cityscapes/pspnet_r50-d8_769x769_80k_cityscapes_20200606_210121-5ccf03dd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_80k_cityscapes/pspnet_r50-d8_769x769_80k_cityscapes_20200606_210121.log.json) | -| PSPNet | R-101-D8 | 769x769 | 80000 | - | - | 79.77 | 81.06 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_80k_cityscapes/pspnet_r101-d8_769x769_80k_cityscapes_20200606_225055-dba412fa.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_80k_cityscapes/pspnet_r101-d8_769x769_80k_cityscapes_20200606_225055.log.json) | -| PSPNet | R-18b-D8 | 512x1024 | 80000 | 1.5 | 16.28 | 74.23 | 75.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r18b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_512x1024_80k_cityscapes/pspnet_r18b-d8_512x1024_80k_cityscapes_20201226_063116-26928a60.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_512x1024_80k_cityscapes/pspnet_r18b-d8_512x1024_80k_cityscapes-20201226_063116.log.json) | -| PSPNet | R-50b-D8 | 512x1024 | 80000 | 6.0 | 4.30 | 78.22 | 79.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r50b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_512x1024_80k_cityscapes/pspnet_r50b-d8_512x1024_80k_cityscapes_20201225_094315-6344287a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_512x1024_80k_cityscapes/pspnet_r50b-d8_512x1024_80k_cityscapes-20201225_094315.log.json) | -| PSPNet | R-101b-D8 | 512x1024 | 80000 | 9.5 | 2.76 | 79.69 | 80.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes_20201226_170012-3a4d38ab.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes-20201226_170012.log.json) | -| PSPNet | R-18b-D8 | 769x769 | 80000 | 1.7 | 6.41 | 74.92 | 76.90 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r18b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_769x769_80k_cityscapes/pspnet_r18b-d8_769x769_80k_cityscapes_20201226_080942-bf98d186.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_769x769_80k_cityscapes/pspnet_r18b-d8_769x769_80k_cityscapes-20201226_080942.log.json) | -| PSPNet | R-50b-D8 | 769x769 | 80000 | 6.8 | 1.88 | 78.50 | 79.96 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r50b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_769x769_80k_cityscapes/pspnet_r50b-d8_769x769_80k_cityscapes_20201225_094316-4c643cf6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_769x769_80k_cityscapes/pspnet_r50b-d8_769x769_80k_cityscapes-20201225_094316.log.json) | -| PSPNet | R-101b-D8 | 769x769 | 80000 | 10.8 | 1.17 | 78.87 | 80.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_769x769_80k_cityscapes/pspnet_r101b-d8_769x769_80k_cityscapes_20201226_171823-f0e7c293.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_769x769_80k_cityscapes/pspnet_r101b-d8_769x769_80k_cityscapes-20201226_171823.log.json) | -| PSPNet | R-50-D32 | 512x1024 | 80000 | 3.0 | 15.21 | 73.88 | 76.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r50b-d32_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d32_512x1024_80k_cityscapes/pspnet_r50-d32_512x1024_80k_cityscapes_20220316_224840-9092b254.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d32_512x1024_80k_cityscapes/pspnet_r50-d32_512x1024_80k_cityscapes_20220316_224840.log.json) | -| PSPNet | R-50b-D32 rsb | 512x1024 | 80000 | 3.1 | 16.08 | 74.09 | 77.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r50-d32_rsb_4xb2-adamw-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes_20220316_141229-dd9c9610.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes_20220316_141229.log.json) | -| PSPNet | R-50b-D32 | 512x1024 | 80000 | 2.9 | 15.41 | 72.61 | 75.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r50b-d32_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d32_512x1024_80k_cityscapes/pspnet_r50b-d32_512x1024_80k_cityscapes_20220311_152152-23bcaf8c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d32_512x1024_80k_cityscapes/pspnet_r50b-d32_512x1024_80k_cityscapes_20220311_152152.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------------- | ------------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| PSPNet | R-50-D8 | 512x1024 | 40000 | 6.1 | 4.07 | V100 | 77.85 | 79.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338.log.json) | +| PSPNet | R-101-D8 | 512x1024 | 40000 | 9.6 | 2.68 | V100 | 78.34 | 79.74 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751-467e7cf4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751.log.json) | +| PSPNet | R-50-D8 | 769x769 | 40000 | 6.9 | 1.76 | V100 | 78.26 | 79.88 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_40k_cityscapes/pspnet_r50-d8_769x769_40k_cityscapes_20200606_112725-86638686.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_40k_cityscapes/pspnet_r50-d8_769x769_40k_cityscapes_20200606_112725.log.json) | +| PSPNet | R-101-D8 | 769x769 | 40000 | 10.9 | 1.15 | V100 | 79.08 | 80.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_40k_cityscapes/pspnet_r101-d8_769x769_40k_cityscapes_20200606_112753-61c6f5be.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_40k_cityscapes/pspnet_r101-d8_769x769_40k_cityscapes_20200606_112753.log.json) | +| PSPNet | R-18-D8 | 512x1024 | 80000 | 1.7 | 15.71 | V100 | 74.87 | 76.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r18-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes/pspnet_r18-d8_512x1024_80k_cityscapes_20201225_021458-09ffa746.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes/pspnet_r18-d8_512x1024_80k_cityscapes-20201225_021458.log.json) | +| PSPNet | R-50-D8 | 512x1024 | 80000 | - | - | V100 | 78.55 | 79.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_512x1024_80k_cityscapes_20200606_112131-2376f12b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_512x1024_80k_cityscapes_20200606_112131.log.json) | +| PSPNet | R-50b-D8 rsb | 512x1024 | 80000 | 6.2 | 3.82 | V100 | 78.47 | 79.45 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8-rsb_4xb2-adamw-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes_20220315_123238-588c30be.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes_20220315_123238.log.json) | +| PSPNet | R-101-D8 | 512x1024 | 80000 | - | - | V100 | 79.76 | 81.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211-e1e1100f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211.log.json) | +| PSPNet (FP16) | R-101-D8 | 512x1024 | 80000 | 5.34 | 8.77 | V100 | 79.46 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_fp16_512x1024_80k_cityscapes/pspnet_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230919-a0875e5c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_fp16_512x1024_80k_cityscapes/pspnet_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230919.log.json) | +| PSPNet | R-18-D8 | 769x769 | 80000 | 1.9 | 6.20 | V100 | 75.90 | 77.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r18-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_769x769_80k_cityscapes/pspnet_r18-d8_769x769_80k_cityscapes_20201225_021458-3deefc62.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_769x769_80k_cityscapes/pspnet_r18-d8_769x769_80k_cityscapes-20201225_021458.log.json) | +| PSPNet | R-50-D8 | 769x769 | 80000 | - | - | V100 | 79.59 | 80.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_80k_cityscapes/pspnet_r50-d8_769x769_80k_cityscapes_20200606_210121-5ccf03dd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_80k_cityscapes/pspnet_r50-d8_769x769_80k_cityscapes_20200606_210121.log.json) | +| PSPNet | R-101-D8 | 769x769 | 80000 | - | - | V100 | 79.77 | 81.06 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.oz1z1penmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_80k_cityscapes/pspnet_r101-d8_769x769_80k_cityscapes_20200606_225055-dba412fa.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_80k_cityscapes/pspnet_r101-d8_769x769_80k_cityscapes_20200606_225055.log.json) | +| PSPNet | R-18b-D8 | 512x1024 | 80000 | 1.5 | 16.28 | V100 | 74.23 | 75.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r18b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_512x1024_80k_cityscapes/pspnet_r18b-d8_512x1024_80k_cityscapes_20201226_063116-26928a60.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_512x1024_80k_cityscapes/pspnet_r18b-d8_512x1024_80k_cityscapes-20201226_063116.log.json) | +| PSPNet | R-50b-D8 | 512x1024 | 80000 | 6.0 | 4.30 | V100 | 78.22 | 79.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_512x1024_80k_cityscapes/pspnet_r50b-d8_512x1024_80k_cityscapes_20201225_094315-6344287a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_512x1024_80k_cityscapes/pspnet_r50b-d8_512x1024_80k_cityscapes-20201225_094315.log.json) | +| PSPNet | R-101b-D8 | 512x1024 | 80000 | 9.5 | 2.76 | V100 | 79.69 | 80.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes_20201226_170012-3a4d38ab.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes-20201226_170012.log.json) | +| PSPNet | R-18b-D8 | 769x769 | 80000 | 1.7 | 6.41 | V100 | 74.92 | 76.90 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r18b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_769x769_80k_cityscapes/pspnet_r18b-d8_769x769_80k_cityscapes_20201226_080942-bf98d186.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_769x769_80k_cityscapes/pspnet_r18b-d8_769x769_80k_cityscapes-20201226_080942.log.json) | +| PSPNet | R-50b-D8 | 769x769 | 80000 | 6.8 | 1.88 | V100 | 78.50 | 79.96 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_769x769_80k_cityscapes/pspnet_r50b-d8_769x769_80k_cityscapes_20201225_094316-4c643cf6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_769x769_80k_cityscapes/pspnet_r50b-d8_769x769_80k_cityscapes-20201225_094316.log.json) | +| PSPNet | R-101b-D8 | 769x769 | 80000 | 10.8 | 1.17 | V100 | 78.87 | 80.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_769x769_80k_cityscapes/pspnet_r101b-d8_769x769_80k_cityscapes_20201226_171823-f0e7c293.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_769x769_80k_cityscapes/pspnet_r101b-d8_769x769_80k_cityscapes-20201226_171823.log.json) | +| PSPNet | R-50-D32 | 512x1024 | 80000 | 3.0 | 15.21 | V100 | 73.88 | 76.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50b-d32_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d32_512x1024_80k_cityscapes/pspnet_r50-d32_512x1024_80k_cityscapes_20220316_224840-9092b254.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d32_512x1024_80k_cityscapes/pspnet_r50-d32_512x1024_80k_cityscapes_20220316_224840.log.json) | +| PSPNet | R-50b-D32 rsb | 512x1024 | 80000 | 3.1 | 16.08 | V100 | 74.09 | 77.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d32_rsb_4xb2-adamw-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes_20220316_141229-dd9c9610.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes_20220316_141229.log.json) | +| PSPNet | R-50b-D32 | 512x1024 | 80000 | 2.9 | 15.41 | V100 | 72.61 | 75.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50b-d32_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d32_512x1024_80k_cityscapes/pspnet_r50b-d32_512x1024_80k_cityscapes_20220311_152152-23bcaf8c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d32_512x1024_80k_cityscapes/pspnet_r50b-d32_512x1024_80k_cityscapes_20220311_152152.log.json) | ### ADE20K -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| PSPNet | R-50-D8 | 512x512 | 80000 | 8.5 | 23.53 | 41.13 | 41.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_ade20k/pspnet_r50-d8_512x512_80k_ade20k_20200615_014128-15a8b914.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_ade20k/pspnet_r50-d8_512x512_80k_ade20k_20200615_014128.log.json) | -| PSPNet | R-101-D8 | 512x512 | 80000 | 12 | 15.30 | 43.57 | 44.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_ade20k/pspnet_r101-d8_512x512_80k_ade20k_20200614_031423-b6e782f0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_ade20k/pspnet_r101-d8_512x512_80k_ade20k_20200614_031423.log.json) | -| PSPNet | R-50-D8 | 512x512 | 160000 | - | - | 42.48 | 43.44 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_160k_ade20k/pspnet_r50-d8_512x512_160k_ade20k_20200615_184358-1890b0bd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_160k_ade20k/pspnet_r50-d8_512x512_160k_ade20k_20200615_184358.log.json) | -| PSPNet | R-101-D8 | 512x512 | 160000 | - | - | 44.39 | 45.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_160k_ade20k/pspnet_r101-d8_512x512_160k_ade20k_20200615_100650-967c316f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_160k_ade20k/pspnet_r101-d8_512x512_160k_ade20k_20200615_100650.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSPNet | R-50-D8 | 512x512 | 80000 | 8.5 | 23.53 | V100 | 41.13 | 41.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_ade20k/pspnet_r50-d8_512x512_80k_ade20k_20200615_014128-15a8b914.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_ade20k/pspnet_r50-d8_512x512_80k_ade20k_20200615_014128.log.json) | +| PSPNet | R-101-D8 | 512x512 | 80000 | 12 | 15.30 | V100 | 43.57 | 44.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_ade20k/pspnet_r101-d8_512x512_80k_ade20k_20200614_031423-b6e782f0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_ade20k/pspnet_r101-d8_512x512_80k_ade20k_20200614_031423.log.json) | +| PSPNet | R-50-D8 | 512x512 | 160000 | - | - | V100 | 42.48 | 43.44 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_160k_ade20k/pspnet_r50-d8_512x512_160k_ade20k_20200615_184358-1890b0bd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_160k_ade20k/pspnet_r50-d8_512x512_160k_ade20k_20200615_184358.log.json) | +| PSPNet | R-101-D8 | 512x512 | 160000 | - | - | V100 | 44.39 | 45.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_160k_ade20k/pspnet_r101-d8_512x512_160k_ade20k_20200615_100650-967c316f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_160k_ade20k/pspnet_r101-d8_512x512_160k_ade20k_20200615_100650.log.json) | ### Pascal VOC 2012 + Aug -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| PSPNet | R-50-D8 | 512x512 | 20000 | 6.1 | 23.59 | 76.78 | 77.61 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r50-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_20k_voc12aug/pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958-ed5dfbd9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_20k_voc12aug/pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958.log.json) | -| PSPNet | R-101-D8 | 512x512 | 20000 | 9.6 | 15.02 | 78.47 | 79.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r101-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_20k_voc12aug/pspnet_r101-d8_512x512_20k_voc12aug_20200617_102003-4aef3c9a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_20k_voc12aug/pspnet_r101-d8_512x512_20k_voc12aug_20200617_102003.log.json) | -| PSPNet | R-50-D8 | 512x512 | 40000 | - | - | 77.29 | 78.48 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r50-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_40k_voc12aug/pspnet_r50-d8_512x512_40k_voc12aug_20200613_161222-ae9c1b8c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_40k_voc12aug/pspnet_r50-d8_512x512_40k_voc12aug_20200613_161222.log.json) | -| PSPNet | R-101-D8 | 512x512 | 40000 | - | - | 78.52 | 79.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r101-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_40k_voc12aug/pspnet_r101-d8_512x512_40k_voc12aug_20200613_161222-bc933b18.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_40k_voc12aug/pspnet_r101-d8_512x512_40k_voc12aug_20200613_161222.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSPNet | R-50-D8 | 512x512 | 20000 | 6.1 | 23.59 | V100 | 76.78 | 77.61 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_20k_voc12aug/pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958-ed5dfbd9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_20k_voc12aug/pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958.log.json) | +| PSPNet | R-101-D8 | 512x512 | 20000 | 9.6 | 15.02 | V100 | 78.47 | 79.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_20k_voc12aug/pspnet_r101-d8_512x512_20k_voc12aug_20200617_102003-4aef3c9a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_20k_voc12aug/pspnet_r101-d8_512x512_20k_voc12aug_20200617_102003.log.json) | +| PSPNet | R-50-D8 | 512x512 | 40000 | - | - | V100 | 77.29 | 78.48 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_40k_voc12aug/pspnet_r50-d8_512x512_40k_voc12aug_20200613_161222-ae9c1b8c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_40k_voc12aug/pspnet_r50-d8_512x512_40k_voc12aug_20200613_161222.log.json) | +| PSPNet | R-101-D8 | 512x512 | 40000 | - | - | V100 | 78.52 | 79.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_40k_voc12aug/pspnet_r101-d8_512x512_40k_voc12aug_20200613_161222-bc933b18.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_40k_voc12aug/pspnet_r101-d8_512x512_40k_voc12aug_20200613_161222.log.json) | ### Pascal Context -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| PSPNet | R-101-D8 | 480x480 | 40000 | 8.8 | 9.68 | 46.60 | 47.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r101-d8_4xb4-40k_pascal-context-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context/pspnet_r101-d8_480x480_40k_pascal_context_20200911_211210-bf0f5d7c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context/pspnet_r101-d8_480x480_40k_pascal_context-20200911_211210.log.json) | -| PSPNet | R-101-D8 | 480x480 | 80000 | - | - | 46.03 | 47.15 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r101-d8_4xb4-80k_pascal-context-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context/pspnet_r101-d8_480x480_80k_pascal_context_20200911_190530-c86d6233.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context/pspnet_r101-d8_480x480_80k_pascal_context-20200911_190530.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSPNet | R-101-D8 | 480x480 | 40000 | 8.8 | 9.68 | V100 | 46.60 | 47.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-40k_pascal-context-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context/pspnet_r101-d8_480x480_40k_pascal_context_20200911_211210-bf0f5d7c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context/pspnet_r101-d8_480x480_40k_pascal_context-20200911_211210.log.json) | +| PSPNet | R-101-D8 | 480x480 | 80000 | - | - | V100 | 46.03 | 47.15 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-80k_pascal-context-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context/pspnet_r101-d8_480x480_80k_pascal_context_20200911_190530-c86d6233.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context/pspnet_r101-d8_480x480_80k_pascal_context-20200911_190530.log.json) | ### Pascal Context 59 -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| PSPNet | R-101-D8 | 480x480 | 40000 | - | - | 52.02 | 53.54 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r101-d8_4xb4-40k_pascal-context-59-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context_59/pspnet_r101-d8_480x480_40k_pascal_context_59_20210416_114524-86d44cd4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context_59/pspnet_r101-d8_480x480_40k_pascal_context_59-20210416_114524.log.json) | -| PSPNet | R-101-D8 | 480x480 | 80000 | - | - | 52.47 | 53.99 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r101-d8_4xb4-80k_pascal-context-59-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context_59/pspnet_r101-d8_480x480_80k_pascal_context_59_20210416_114418-fa6caaa2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context_59/pspnet_r101-d8_480x480_80k_pascal_context_59-20210416_114418.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSPNet | R-101-D8 | 480x480 | 40000 | - | - | V100 | 52.02 | 53.54 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-40k_pascal-context-59-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context_59/pspnet_r101-d8_480x480_40k_pascal_context_59_20210416_114524-86d44cd4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context_59/pspnet_r101-d8_480x480_40k_pascal_context_59-20210416_114524.log.json) | +| PSPNet | R-101-D8 | 480x480 | 80000 | - | - | V100 | 52.47 | 53.99 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-80k_pascal-context-59-480x480.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context_59/pspnet_r101-d8_480x480_80k_pascal_context_59_20210416_114418-fa6caaa2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context_59/pspnet_r101-d8_480x480_80k_pascal_context_59-20210416_114418.log.json) | ### Dark Zurich and Nighttime Driving We support evaluation results on these two datasets using models above trained on Cityscapes training set. -| Method | Backbone | Training Dataset | Test Dataset | mIoU | config | evaluation checkpoint | -| ------ | --------- | ----------------------- | ------------------------- | ----- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| PSPNet | R-50-D8 | Cityscapes Training set | Dark Zurich | 10.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024_dark-zurich-1920x1080.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338.log.json) | -| PSPNet | R-50-D8 | Cityscapes Training set | Nighttime Driving | 23.02 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024_night-driving-1920x1080.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338.log.json) | -| PSPNet | R-50-D8 | Cityscapes Training set | Cityscapes Validation set | 77.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338.log.json) | -| PSPNet | R-101-D8 | Cityscapes Training set | Dark Zurich | 10.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-512x1024_dark-zurich-1920x1080.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751-467e7cf4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751.log.json) | -| PSPNet | R-101-D8 | Cityscapes Training set | Nighttime Driving | 20.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-512x1024_night-driving-1920x1080.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751-467e7cf4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751.log.json) | -| PSPNet | R-101-D8 | Cityscapes Training set | Cityscapes Validation set | 78.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751-467e7cf4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751.log.json) | -| PSPNet | R-101b-D8 | Cityscapes Training set | Dark Zurich | 15.54 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-512x1024_dark-zurich-1920x1080.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes_20201226_170012-3a4d38ab.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes-20201226_170012.log.json) | -| PSPNet | R-101b-D8 | Cityscapes Training set | Nighttime Driving | 22.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-512x1024_night-driving-1920x1080.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes_20201226_170012-3a4d38ab.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes-20201226_170012.log.json) | -| PSPNet | R-101b-D8 | Cityscapes Training set | Cityscapes Validation set | 79.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes_20201226_170012-3a4d38ab.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes-20201226_170012.log.json) | +| Method | Backbone | Training Dataset | Test Dataset | mIoU | config | evaluation checkpoint | +| ------ | --------- | ----------------------- | ------------------------- | ----- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| PSPNet | R-50-D8 | Cityscapes Training set | Dark Zurich | 10.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024_dark-zurich-1920x1080.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338.log.json) | +| PSPNet | R-50-D8 | Cityscapes Training set | Nighttime Driving | 23.02 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024_night-driving-1920x1080.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338.log.json) | +| PSPNet | R-50-D8 | Cityscapes Training set | Cityscapes Validation set | 77.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338.log.json) | +| PSPNet | R-101-D8 | Cityscapes Training set | Dark Zurich | 10.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-512x1024_dark-zurich-1920x1080.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751-467e7cf4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751.log.json) | +| PSPNet | R-101-D8 | Cityscapes Training set | Nighttime Driving | 20.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-512x1024_night-driving-1920x1080.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751-467e7cf4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751.log.json) | +| PSPNet | R-101-D8 | Cityscapes Training set | Cityscapes Validation set | 78.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751-467e7cf4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751.log.json) | +| PSPNet | R-101b-D8 | Cityscapes Training set | Dark Zurich | 15.54 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-512x1024_dark-zurich-1920x1080.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes_20201226_170012-3a4d38ab.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes-20201226_170012.log.json) | +| PSPNet | R-101b-D8 | Cityscapes Training set | Nighttime Driving | 22.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-512x1024_night-driving-1920x1080.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes_20201226_170012-3a4d38ab.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes-20201226_170012.log.json) | +| PSPNet | R-101b-D8 | Cityscapes Training set | Cityscapes Validation set | 79.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes_20201226_170012-3a4d38ab.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes-20201226_170012.log.json) | ### COCO-Stuff 10k -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| PSPNet | R-50-D8 | 512x512 | 20000 | 9.6 | 20.5 | 35.69 | 36.62 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r50-d8_4xb4-20k_coco-stuff10k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k_20210820_203258-b88df27f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k_20210820_203258.log.json) | -| PSPNet | R-101-D8 | 512x512 | 20000 | 13.2 | 11.1 | 37.26 | 38.52 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r101-d8_4xb4-20k_coco-stuff10k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k_20210820_232135-76aae482.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k_20210820_232135.log.json) | -| PSPNet | R-50-D8 | 512x512 | 40000 | - | - | 36.33 | 37.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r50-d8_4xb4-40k_coco-stuff10k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k_20210821_030857-92e2902b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k_20210821_030857.log.json) | -| PSPNet | R-101-D8 | 512x512 | 40000 | - | - | 37.76 | 38.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r101-d8_4xb4-40k_coco-stuff10k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k_20210821_014022-831aec95.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k_20210821_014022.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSPNet | R-50-D8 | 512x512 | 20000 | 9.6 | 20.5 | V100 | 35.69 | 36.62 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb4-20k_coco-stuff10k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k_20210820_203258-b88df27f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k_20210820_203258.log.json) | +| PSPNet | R-101-D8 | 512x512 | 20000 | 13.2 | 11.1 | V100 | 37.26 | 38.52 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-20k_coco-stuff10k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k_20210820_232135-76aae482.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k_20210820_232135.log.json) | +| PSPNet | R-50-D8 | 512x512 | 40000 | - | - | V100 | 36.33 | 37.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb4-40k_coco-stuff10k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k_20210821_030857-92e2902b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k_20210821_030857.log.json) | +| PSPNet | R-101-D8 | 512x512 | 40000 | - | - | V100 | 37.76 | 38.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-40k_coco-stuff10k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k_20210821_014022-831aec95.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k_20210821_014022.log.json) | ### COCO-Stuff 164k -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| PSPNet | R-50-D8 | 512x512 | 80000 | 9.6 | 20.5 | 38.80 | 39.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r50-d8_4xb4-80k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k_20210707_152034-0e41b2db.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k_20210707_152034.log.json) | -| PSPNet | R-101-D8 | 512x512 | 80000 | 13.2 | 11.1 | 40.34 | 40.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r101-d8_4xb4-80k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k_20210707_152034-7eb41789.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k_20210707_152034.log.json) | -| PSPNet | R-50-D8 | 512x512 | 160000 | - | - | 39.64 | 39.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r50-d8_4xb4-160k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k_20210707_152004-51276a57.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k_20210707_152004.log.json) | -| PSPNet | R-101-D8 | 512x512 | 160000 | - | - | 41.28 | 41.66 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r101-d8_4xb4-160k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k_20210707_152004-4af9621b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k_20210707_152004.log.json) | -| PSPNet | R-50-D8 | 512x512 | 320000 | - | - | 40.53 | 40.75 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r50-d8_4xb4-320k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k_20210707_152004-be9610cc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k_20210707_152004.log.json) | -| PSPNet | R-101-D8 | 512x512 | 320000 | - | - | 41.95 | 42.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r101-d8_4xb4-320k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k_20210707_152004-72220c60.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k_20210707_152004.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSPNet | R-50-D8 | 512x512 | 80000 | 9.6 | 20.5 | V100 | 38.80 | 39.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb4-80k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k_20210707_152034-0e41b2db.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k_20210707_152034.log.json) | +| PSPNet | R-101-D8 | 512x512 | 80000 | 13.2 | 11.1 | V100 | 40.34 | 40.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-80k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k_20210707_152034-7eb41789.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k_20210707_152034.log.json) | +| PSPNet | R-50-D8 | 512x512 | 160000 | - | - | V100 | 39.64 | 39.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb4-160k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k_20210707_152004-51276a57.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k_20210707_152004.log.json) | +| PSPNet | R-101-D8 | 512x512 | 160000 | - | - | V100 | 41.28 | 41.66 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-160k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k_20210707_152004-4af9621b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k_20210707_152004.log.json) | +| PSPNet | R-50-D8 | 512x512 | 320000 | - | - | V100 | 40.53 | 40.75 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb4-320k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k_20210707_152004-be9610cc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k_20210707_152004.log.json) | +| PSPNet | R-101-D8 | 512x512 | 320000 | - | - | V100 | 41.95 | 42.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-320k_coco-stuff164k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k_20210707_152004-72220c60.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k_20210707_152004.log.json) | ### LoveDA -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| PSPNet | R-18-D8 | 512x512 | 80000 | 1.45 | 26.87 | 48.62 | 47.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r18-d8_4xb4-80k_loveda-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x512_80k_loveda/pspnet_r18-d8_512x512_80k_loveda_20211105_052100-b97697f1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x512_80k_loveda/pspnet_r18-d8_512x512_80k_loveda_20211105_052100.log.json) | -| PSPNet | R-50-D8 | 512x512 | 80000 | 6.14 | 6.60 | 50.46 | 50.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r50-d8_4xb4-80k_loveda-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_loveda/pspnet_r50-d8_512x512_80k_loveda_20211104_155728-88610f9f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_loveda/pspnet_r50-d8_512x512_80k_loveda_20211104_155728.log.json) | -| PSPNet | R-101-D8 | 512x512 | 80000 | 9.61 | 4.58 | 51.86 | 51.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r101-d8_4xb4-80k_loveda-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_loveda/pspnet_r101-d8_512x512_80k_loveda_20211104_153212-1c06c6a8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_loveda/pspnet_r101-d8_512x512_80k_loveda_20211104_153212.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| PSPNet | R-18-D8 | 512x512 | 80000 | 1.45 | 26.87 | V100 | 48.62 | 47.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r18-d8_4xb4-80k_loveda-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x512_80k_loveda/pspnet_r18-d8_512x512_80k_loveda_20211105_052100-b97697f1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x512_80k_loveda/pspnet_r18-d8_512x512_80k_loveda_20211105_052100.log.json) | +| PSPNet | R-50-D8 | 512x512 | 80000 | 6.14 | 6.60 | V100 | 50.46 | 50.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb4-80k_loveda-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_loveda/pspnet_r50-d8_512x512_80k_loveda_20211104_155728-88610f9f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_loveda/pspnet_r50-d8_512x512_80k_loveda_20211104_155728.log.json) | +| PSPNet | R-101-D8 | 512x512 | 80000 | 9.61 | 4.58 | V100 | 51.86 | 51.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-80k_loveda-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_loveda/pspnet_r101-d8_512x512_80k_loveda_20211104_153212-1c06c6a8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_loveda/pspnet_r101-d8_512x512_80k_loveda_20211104_153212.log.json) | ### Potsdam -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| PSPNet | R-18-D8 | 512x512 | 80000 | 1.50 | 85.12 | 77.09 | 78.30 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r18-d8_4xb4-80k_potsdam-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_potsdam/pspnet_r18-d8_4x4_512x512_80k_potsdam_20211220_125612-7cd046e1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_potsdam/pspnet_r18-d8_4x4_512x512_80k_potsdam_20211220_125612.log.json) | -| PSPNet | R-50-D8 | 512x512 | 80000 | 6.14 | 30.21 | 78.12 | 78.98 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r50-d8_4xb4-80k_potsdam-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_potsdam/pspnet_r50-d8_4x4_512x512_80k_potsdam_20211219_043541-2dd5fe67.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_potsdam/pspnet_r50-d8_4x4_512x512_80k_potsdam_20211219_043541.log.json) | -| PSPNet | R-101-D8 | 512x512 | 80000 | 9.61 | 19.40 | 78.62 | 79.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r101-d8_4xb4-80k_potsdam-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_potsdam/pspnet_r101-d8_4x4_512x512_80k_potsdam_20211220_125612-aed036c4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_potsdam/pspnet_r101-d8_4x4_512x512_80k_potsdam_20211220_125612.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSPNet | R-18-D8 | 512x512 | 80000 | 1.50 | 85.12 | V100 | 77.09 | 78.30 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r18-d8_4xb4-80k_potsdam-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_potsdam/pspnet_r18-d8_4x4_512x512_80k_potsdam_20211220_125612-7cd046e1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_potsdam/pspnet_r18-d8_4x4_512x512_80k_potsdam_20211220_125612.log.json) | +| PSPNet | R-50-D8 | 512x512 | 80000 | 6.14 | 30.21 | V100 | 78.12 | 78.98 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb4-80k_potsdam-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_potsdam/pspnet_r50-d8_4x4_512x512_80k_potsdam_20211219_043541-2dd5fe67.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_potsdam/pspnet_r50-d8_4x4_512x512_80k_potsdam_20211219_043541.log.json) | +| PSPNet | R-101-D8 | 512x512 | 80000 | 9.61 | 19.40 | V100 | 78.62 | 79.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-80k_potsdam-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_potsdam/pspnet_r101-d8_4x4_512x512_80k_potsdam_20211220_125612-aed036c4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_potsdam/pspnet_r101-d8_4x4_512x512_80k_potsdam_20211220_125612.log.json) | ### Vaihingen -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| PSPNet | R-18-D8 | 512x512 | 80000 | 1.45 | 85.06 | 71.46 | 73.36 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r18-d8_4xb4-80k_vaihingen-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_vaihingen/pspnet_r18-d8_4x4_512x512_80k_vaihingen_20211228_160355-52a8a6f6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_vaihingen/pspnet_r18-d8_4x4_512x512_80k_vaihingen_20211228_160355.log.json) | -| PSPNet | R-50-D8 | 512x512 | 80000 | 6.14 | 30.29 | 72.36 | 73.75 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r50-d8_4xb4-80k_vaihingen-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_vaihingen/pspnet_r50-d8_4x4_512x512_80k_vaihingen_20211228_160355-382f8f5b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_vaihingen/pspnet_r50-d8_4x4_512x512_80k_vaihingen_20211228_160355.log.json) | -| PSPNet | R-101-D8 | 512x512 | 80000 | 9.61 | 19.97 | 72.61 | 74.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r101-d8_4xb4-80k_vaihingen-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_vaihingen/pspnet_r101-d8_4x4_512x512_80k_vaihingen_20211231_230806-8eba0a09.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_vaihingen/pspnet_r101-d8_4x4_512x512_80k_vaihingen_20211231_230806.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSPNet | R-18-D8 | 512x512 | 80000 | 1.45 | 85.06 | V100 | 71.46 | 73.36 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r18-d8_4xb4-80k_vaihingen-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_vaihingen/pspnet_r18-d8_4x4_512x512_80k_vaihingen_20211228_160355-52a8a6f6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_vaihingen/pspnet_r18-d8_4x4_512x512_80k_vaihingen_20211228_160355.log.json) | +| PSPNet | R-50-D8 | 512x512 | 80000 | 6.14 | 30.29 | V100 | 72.36 | 73.75 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb4-80k_vaihingen-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_vaihingen/pspnet_r50-d8_4x4_512x512_80k_vaihingen_20211228_160355-382f8f5b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_vaihingen/pspnet_r50-d8_4x4_512x512_80k_vaihingen_20211228_160355.log.json) | +| PSPNet | R-101-D8 | 512x512 | 80000 | 9.61 | 19.97 | V100 | 72.61 | 74.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r101-d8_4xb4-80k_vaihingen-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_vaihingen/pspnet_r101-d8_4x4_512x512_80k_vaihingen_20211231_230806-8eba0a09.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_vaihingen/pspnet_r101-d8_4x4_512x512_80k_vaihingen_20211231_230806.log.json) | ### iSAID -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| PSPNet | R-18-D8 | 896x896 | 80000 | 4.52 | 26.91 | 60.22 | 61.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r18-d8_4xb4-80k_isaid-896x896.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_896x896_80k_isaid/pspnet_r18-d8_4x4_896x896_80k_isaid_20220110_180526-e84c0b6a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_896x896_80k_isaid/pspnet_r18-d8_4x4_896x896_80k_isaid_20220110_180526.log.json) | -| PSPNet | R-50-D8 | 896x896 | 80000 | 16.58 | 8.88 | 65.36 | 66.48 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/pspnet/pspnet_r50-d8_4xb4-80k_isaid-896x896.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_896x896_80k_isaid/pspnet_r50-d8_4x4_896x896_80k_isaid_20220110_180629-1f21dc32.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_896x896_80k_isaid/pspnet_r50-d8_4x4_896x896_80k_isaid_20220110_180629.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSPNet | R-18-D8 | 896x896 | 80000 | 4.52 | 26.91 | V100 | 60.22 | 61.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r18-d8_4xb4-80k_isaid-896x896.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_896x896_80k_isaid/pspnet_r18-d8_4x4_896x896_80k_isaid_20220110_180526-e84c0b6a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_896x896_80k_isaid/pspnet_r18-d8_4x4_896x896_80k_isaid_20220110_180526.log.json) | +| PSPNet | R-50-D8 | 896x896 | 80000 | 16.58 | 8.88 | V100 | 65.36 | 66.48 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet/pspnet_r50-d8_4xb4-80k_isaid-896x896.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_896x896_80k_isaid/pspnet_r50-d8_4x4_896x896_80k_isaid_20220110_180629-1f21dc32.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_896x896_80k_isaid/pspnet_r50-d8_4x4_896x896_80k_isaid_20220110_180629.log.json) | Note: @@ -180,3 +160,23 @@ Note: - `896x896` is the Crop Size of iSAID dataset, which is followed by the implementation of [PointFlow: Flowing Semantics Through Points for Aerial Image Segmentation](https://arxiv.org/pdf/2103.06564.pdf) - `rsb` is short for 'Resnet strikes back'. - The `b` in `R-50b` means ResNetV1b, which is a standard ResNet backbone. In MMSegmentation, default backbone is ResNetV1c, which usually performs better in semantic segmentation task. + +## Citation + +```bibtex +@inproceedings{zhao2017pspnet, + title={Pyramid Scene Parsing Network}, + author={Zhao, Hengshuang and Shi, Jianping and Qi, Xiaojuan and Wang, Xiaogang and Jia, Jiaya}, + booktitle={CVPR}, + year={2017} +} +``` + +```bibtex +@article{wightman2021resnet, + title={Resnet strikes back: An improved training procedure in timm}, + author={Wightman, Ross and Touvron, Hugo and J{\'e}gou, Herv{\'e}}, + journal={arXiv preprint arXiv:2110.00476}, + year={2021} +} +``` diff --git a/configs/pspnet/metafile.yaml b/configs/pspnet/metafile.yaml new file mode 100644 index 0000000000..d00b89d5cf --- /dev/null +++ b/configs/pspnet/metafile.yaml @@ -0,0 +1,1303 @@ +Collections: +- Name: PSPNet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + - Pascal Context + - Pascal Context 59 + - Dark Zurich and Nighttime Driving + - COCO-Stuff 10k + - COCO-Stuff 164k + - LoveDA + - Potsdam + - Vaihingen + - iSAID + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + README: configs/pspnet/README.md + Frameworks: + - PyTorch +Models: +- Name: pspnet_r50-d8_4xb2-40k_cityscapes-512x1024 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.85 + mIoU(ms+flip): 79.18 + Config: configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb2-40k_cityscapes-512x1024 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.34 + mIoU(ms+flip): 79.74 + Config: configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751-467e7cf4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb2-40k_cityscapes-769x769 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.26 + mIoU(ms+flip): 79.88 + Config: configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_40k_cityscapes/pspnet_r50-d8_769x769_40k_cityscapes_20200606_112725-86638686.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_40k_cityscapes/pspnet_r50-d8_769x769_40k_cityscapes_20200606_112725.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb2-40k_cityscapes-769x769 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.08 + mIoU(ms+flip): 80.28 + Config: configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 10.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_40k_cityscapes/pspnet_r101-d8_769x769_40k_cityscapes_20200606_112753-61c6f5be.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_40k_cityscapes/pspnet_r101-d8_769x769_40k_cityscapes_20200606_112753.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r18-d8_4xb2-80k_cityscapes-512x1024 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.87 + mIoU(ms+flip): 76.04 + Config: configs/pspnet/pspnet_r18-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 1.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes/pspnet_r18-d8_512x1024_80k_cityscapes_20201225_021458-09ffa746.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes/pspnet_r18-d8_512x1024_80k_cityscapes-20201225_021458.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb2-80k_cityscapes-512x1024 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.55 + mIoU(ms+flip): 79.79 + Config: configs/pspnet/pspnet_r50-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_512x1024_80k_cityscapes_20200606_112131-2376f12b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_512x1024_80k_cityscapes_20200606_112131.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8-rsb_4xb2-adamw-80k_cityscapes-512x1024 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.47 + mIoU(ms+flip): 79.45 + Config: configs/pspnet/pspnet_r50-d8-rsb_4xb2-adamw-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50b-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes_20220315_123238-588c30be.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes_20220315_123238.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb2-80k_cityscapes-512x1024 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.76 + mIoU(ms+flip): 81.01 + Config: configs/pspnet/pspnet_r101-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211-e1e1100f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb2-amp-80k_cityscapes-512x1024 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.46 + Config: configs/pspnet/pspnet_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - PSPNet + - (FP16) + Training Resources: 4x V100 GPUS + Memory (GB): 5.34 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_fp16_512x1024_80k_cityscapes/pspnet_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230919-a0875e5c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_fp16_512x1024_80k_cityscapes/pspnet_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230919.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r18-d8_4xb2-80k_cityscapes-769x769 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.9 + mIoU(ms+flip): 77.86 + Config: configs/pspnet/pspnet_r18-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 1.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_769x769_80k_cityscapes/pspnet_r18-d8_769x769_80k_cityscapes_20201225_021458-3deefc62.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_769x769_80k_cityscapes/pspnet_r18-d8_769x769_80k_cityscapes-20201225_021458.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb2-80k_cityscapes-769x769 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.59 + mIoU(ms+flip): 80.69 + Config: configs/pspnet/pspnet_r50-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_80k_cityscapes/pspnet_r50-d8_769x769_80k_cityscapes_20200606_210121-5ccf03dd.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_80k_cityscapes/pspnet_r50-d8_769x769_80k_cityscapes_20200606_210121.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb2-80k_cityscapes-769x769 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.77 + mIoU(ms+flip): 81.06 + Config: configs/pspnet/pspnet_r101-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.oz1z1penmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_80k_cityscapes/pspnet_r101-d8_769x769_80k_cityscapes_20200606_225055-dba412fa.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_80k_cityscapes/pspnet_r101-d8_769x769_80k_cityscapes_20200606_225055.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r18b-d8_4xb2-80k_cityscapes-512x1024 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.23 + mIoU(ms+flip): 75.79 + Config: configs/pspnet/pspnet_r18b-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18b-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 1.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_512x1024_80k_cityscapes/pspnet_r18b-d8_512x1024_80k_cityscapes_20201226_063116-26928a60.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_512x1024_80k_cityscapes/pspnet_r18b-d8_512x1024_80k_cityscapes-20201226_063116.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50b-d8_4xb2-80k_cityscapes-512x1024 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.22 + mIoU(ms+flip): 79.46 + Config: configs/pspnet/pspnet_r50b-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50b-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_512x1024_80k_cityscapes/pspnet_r50b-d8_512x1024_80k_cityscapes_20201225_094315-6344287a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_512x1024_80k_cityscapes/pspnet_r50b-d8_512x1024_80k_cityscapes-20201225_094315.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101b-d8_4xb2-80k_cityscapes-512x1024 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.69 + mIoU(ms+flip): 80.79 + Config: configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101b-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes_20201226_170012-3a4d38ab.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes-20201226_170012.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r18b-d8_4xb2-80k_cityscapes-769x769 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.92 + mIoU(ms+flip): 76.9 + Config: configs/pspnet/pspnet_r18b-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-18b-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 1.7 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_769x769_80k_cityscapes/pspnet_r18b-d8_769x769_80k_cityscapes_20201226_080942-bf98d186.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_769x769_80k_cityscapes/pspnet_r18b-d8_769x769_80k_cityscapes-20201226_080942.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50b-d8_4xb2-80k_cityscapes-769x769 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.5 + mIoU(ms+flip): 79.96 + Config: configs/pspnet/pspnet_r50b-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50b-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_769x769_80k_cityscapes/pspnet_r50b-d8_769x769_80k_cityscapes_20201225_094316-4c643cf6.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_769x769_80k_cityscapes/pspnet_r50b-d8_769x769_80k_cityscapes-20201225_094316.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101b-d8_4xb2-80k_cityscapes-769x769 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.87 + mIoU(ms+flip): 80.04 + Config: configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101b-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 10.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_769x769_80k_cityscapes/pspnet_r101b-d8_769x769_80k_cityscapes_20201226_171823-f0e7c293.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_769x769_80k_cityscapes/pspnet_r101b-d8_769x769_80k_cityscapes-20201226_171823.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50b-d32_4xb2-80k_cityscapes-512x1024 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.88 + mIoU(ms+flip): 76.85 + Config: configs/pspnet/pspnet_r50b-d32_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50-D32 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 3.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d32_512x1024_80k_cityscapes/pspnet_r50-d32_512x1024_80k_cityscapes_20220316_224840-9092b254.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d32_512x1024_80k_cityscapes/pspnet_r50-d32_512x1024_80k_cityscapes_20220316_224840.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d32_rsb_4xb2-adamw-80k_cityscapes-512x1024 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.09 + mIoU(ms+flip): 77.18 + Config: configs/pspnet/pspnet_r50-d32_rsb_4xb2-adamw-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50b-D32 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 3.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes_20220316_141229-dd9c9610.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes_20220316_141229.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50b-d32_4xb2-80k_cityscapes-512x1024 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 72.61 + mIoU(ms+flip): 75.51 + Config: configs/pspnet/pspnet_r50b-d32_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50b-D32 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 2.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d32_512x1024_80k_cityscapes/pspnet_r50b-d32_512x1024_80k_cityscapes_20220311_152152-23bcaf8c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d32_512x1024_80k_cityscapes/pspnet_r50b-d32_512x1024_80k_cityscapes_20220311_152152.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb4-80k_ade20k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.13 + mIoU(ms+flip): 41.94 + Config: configs/pspnet/pspnet_r50-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 8.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_ade20k/pspnet_r50-d8_512x512_80k_ade20k_20200615_014128-15a8b914.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_ade20k/pspnet_r50-d8_512x512_80k_ade20k_20200615_014128.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-80k_ade20k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.57 + mIoU(ms+flip): 44.35 + Config: configs/pspnet/pspnet_r101-d8_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 12.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_ade20k/pspnet_r101-d8_512x512_80k_ade20k_20200614_031423-b6e782f0.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_ade20k/pspnet_r101-d8_512x512_80k_ade20k_20200614_031423.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb4-160k_ade20k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.48 + mIoU(ms+flip): 43.44 + Config: configs/pspnet/pspnet_r50-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_160k_ade20k/pspnet_r50-d8_512x512_160k_ade20k_20200615_184358-1890b0bd.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_160k_ade20k/pspnet_r50-d8_512x512_160k_ade20k_20200615_184358.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-160k_ade20k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 44.39 + mIoU(ms+flip): 45.35 + Config: configs/pspnet/pspnet_r101-d8_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_160k_ade20k/pspnet_r101-d8_512x512_160k_ade20k_20200615_100650-967c316f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_160k_ade20k/pspnet_r101-d8_512x512_160k_ade20k_20200615_100650.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb4-20k_voc12aug-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.78 + mIoU(ms+flip): 77.61 + Config: configs/pspnet/pspnet_r50-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_20k_voc12aug/pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958-ed5dfbd9.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_20k_voc12aug/pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-20k_voc12aug-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 78.47 + mIoU(ms+flip): 79.25 + Config: configs/pspnet/pspnet_r101-d8_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_20k_voc12aug/pspnet_r101-d8_512x512_20k_voc12aug_20200617_102003-4aef3c9a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_20k_voc12aug/pspnet_r101-d8_512x512_20k_voc12aug_20200617_102003.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb4-40k_voc12aug-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.29 + mIoU(ms+flip): 78.48 + Config: configs/pspnet/pspnet_r50-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_40k_voc12aug/pspnet_r50-d8_512x512_40k_voc12aug_20200613_161222-ae9c1b8c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_40k_voc12aug/pspnet_r50-d8_512x512_40k_voc12aug_20200613_161222.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-40k_voc12aug-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 78.52 + mIoU(ms+flip): 79.57 + Config: configs/pspnet/pspnet_r101-d8_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_40k_voc12aug/pspnet_r101-d8_512x512_40k_voc12aug_20200613_161222-bc933b18.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_40k_voc12aug/pspnet_r101-d8_512x512_40k_voc12aug_20200613_161222.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-40k_pascal-context-480x480 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Pascal Context + Metrics: + mIoU: 46.6 + mIoU(ms+flip): 47.78 + Config: configs/pspnet/pspnet_r101-d8_4xb4-40k_pascal-context-480x480.py + Metadata: + Training Data: Pascal Context + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 8.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context/pspnet_r101-d8_480x480_40k_pascal_context_20200911_211210-bf0f5d7c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context/pspnet_r101-d8_480x480_40k_pascal_context-20200911_211210.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-80k_pascal-context-480x480 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Pascal Context + Metrics: + mIoU: 46.03 + mIoU(ms+flip): 47.15 + Config: configs/pspnet/pspnet_r101-d8_4xb4-80k_pascal-context-480x480.py + Metadata: + Training Data: Pascal Context + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context/pspnet_r101-d8_480x480_80k_pascal_context_20200911_190530-c86d6233.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context/pspnet_r101-d8_480x480_80k_pascal_context-20200911_190530.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-40k_pascal-context-59-480x480 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Pascal Context 59 + Metrics: + mIoU: 52.02 + mIoU(ms+flip): 53.54 + Config: configs/pspnet/pspnet_r101-d8_4xb4-40k_pascal-context-59-480x480.py + Metadata: + Training Data: Pascal Context 59 + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context_59/pspnet_r101-d8_480x480_40k_pascal_context_59_20210416_114524-86d44cd4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context_59/pspnet_r101-d8_480x480_40k_pascal_context_59-20210416_114524.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-80k_pascal-context-59-480x480 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Pascal Context 59 + Metrics: + mIoU: 52.47 + mIoU(ms+flip): 53.99 + Config: configs/pspnet/pspnet_r101-d8_4xb4-80k_pascal-context-59-480x480.py + Metadata: + Training Data: Pascal Context 59 + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context_59/pspnet_r101-d8_480x480_80k_pascal_context_59_20210416_114418-fa6caaa2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context_59/pspnet_r101-d8_480x480_80k_pascal_context_59-20210416_114418.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb4-20k_coco-stuff10k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 10k + Metrics: + mIoU: 35.69 + mIoU(ms+flip): 36.62 + Config: configs/pspnet/pspnet_r50-d8_4xb4-20k_coco-stuff10k-512x512.py + Metadata: + Training Data: COCO-Stuff 10k + Batch Size: 16 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k_20210820_203258-b88df27f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k_20210820_203258.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-20k_coco-stuff10k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 10k + Metrics: + mIoU: 37.26 + mIoU(ms+flip): 38.52 + Config: configs/pspnet/pspnet_r101-d8_4xb4-20k_coco-stuff10k-512x512.py + Metadata: + Training Data: COCO-Stuff 10k + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 13.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k_20210820_232135-76aae482.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k_20210820_232135.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb4-40k_coco-stuff10k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 10k + Metrics: + mIoU: 36.33 + mIoU(ms+flip): 37.24 + Config: configs/pspnet/pspnet_r50-d8_4xb4-40k_coco-stuff10k-512x512.py + Metadata: + Training Data: COCO-Stuff 10k + Batch Size: 16 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k_20210821_030857-92e2902b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k_20210821_030857.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-40k_coco-stuff10k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 10k + Metrics: + mIoU: 37.76 + mIoU(ms+flip): 38.86 + Config: configs/pspnet/pspnet_r101-d8_4xb4-40k_coco-stuff10k-512x512.py + Metadata: + Training Data: COCO-Stuff 10k + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k_20210821_014022-831aec95.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k_20210821_014022.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb4-80k_coco-stuff164k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 38.8 + mIoU(ms+flip): 39.19 + Config: configs/pspnet/pspnet_r50-d8_4xb4-80k_coco-stuff164k-512x512.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k_20210707_152034-0e41b2db.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k_20210707_152034.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-80k_coco-stuff164k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 40.34 + mIoU(ms+flip): 40.79 + Config: configs/pspnet/pspnet_r101-d8_4xb4-80k_coco-stuff164k-512x512.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 13.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k_20210707_152034-7eb41789.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k_20210707_152034.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb4-160k_coco-stuff164k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 39.64 + mIoU(ms+flip): 39.97 + Config: configs/pspnet/pspnet_r50-d8_4xb4-160k_coco-stuff164k-512x512.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k_20210707_152004-51276a57.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k_20210707_152004.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-160k_coco-stuff164k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 41.28 + mIoU(ms+flip): 41.66 + Config: configs/pspnet/pspnet_r101-d8_4xb4-160k_coco-stuff164k-512x512.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k_20210707_152004-4af9621b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k_20210707_152004.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb4-320k_coco-stuff164k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 40.53 + mIoU(ms+flip): 40.75 + Config: configs/pspnet/pspnet_r50-d8_4xb4-320k_coco-stuff164k-512x512.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k_20210707_152004-be9610cc.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k_20210707_152004.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-320k_coco-stuff164k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 41.95 + mIoU(ms+flip): 42.42 + Config: configs/pspnet/pspnet_r101-d8_4xb4-320k_coco-stuff164k-512x512.py + Metadata: + Training Data: COCO-Stuff 164k + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k_20210707_152004-72220c60.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k_20210707_152004.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r18-d8_4xb4-80k_loveda-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: LoveDA + Metrics: + mIoU: 48.62 + mIoU(ms+flip): 47.57 + Config: configs/pspnet/pspnet_r18-d8_4xb4-80k_loveda-512x512.py + Metadata: + Training Data: LoveDA + Batch Size: 16 + Architecture: + - R-18-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 1.45 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x512_80k_loveda/pspnet_r18-d8_512x512_80k_loveda_20211105_052100-b97697f1.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x512_80k_loveda/pspnet_r18-d8_512x512_80k_loveda_20211105_052100.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb4-80k_loveda-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: LoveDA + Metrics: + mIoU: 50.46 + mIoU(ms+flip): 50.19 + Config: configs/pspnet/pspnet_r50-d8_4xb4-80k_loveda-512x512.py + Metadata: + Training Data: LoveDA + Batch Size: 16 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.14 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_loveda/pspnet_r50-d8_512x512_80k_loveda_20211104_155728-88610f9f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_loveda/pspnet_r50-d8_512x512_80k_loveda_20211104_155728.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-80k_loveda-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: LoveDA + Metrics: + mIoU: 51.86 + mIoU(ms+flip): 51.34 + Config: configs/pspnet/pspnet_r101-d8_4xb4-80k_loveda-512x512.py + Metadata: + Training Data: LoveDA + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.61 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_loveda/pspnet_r101-d8_512x512_80k_loveda_20211104_153212-1c06c6a8.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_loveda/pspnet_r101-d8_512x512_80k_loveda_20211104_153212.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r18-d8_4xb4-80k_potsdam-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Potsdam + Metrics: + mIoU: 77.09 + mIoU(ms+flip): 78.3 + Config: configs/pspnet/pspnet_r18-d8_4xb4-80k_potsdam-512x512.py + Metadata: + Training Data: Potsdam + Batch Size: 16 + Architecture: + - R-18-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 1.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_potsdam/pspnet_r18-d8_4x4_512x512_80k_potsdam_20211220_125612-7cd046e1.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_potsdam/pspnet_r18-d8_4x4_512x512_80k_potsdam_20211220_125612.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb4-80k_potsdam-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Potsdam + Metrics: + mIoU: 78.12 + mIoU(ms+flip): 78.98 + Config: configs/pspnet/pspnet_r50-d8_4xb4-80k_potsdam-512x512.py + Metadata: + Training Data: Potsdam + Batch Size: 16 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.14 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_potsdam/pspnet_r50-d8_4x4_512x512_80k_potsdam_20211219_043541-2dd5fe67.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_potsdam/pspnet_r50-d8_4x4_512x512_80k_potsdam_20211219_043541.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-80k_potsdam-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Potsdam + Metrics: + mIoU: 78.62 + mIoU(ms+flip): 79.47 + Config: configs/pspnet/pspnet_r101-d8_4xb4-80k_potsdam-512x512.py + Metadata: + Training Data: Potsdam + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.61 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_potsdam/pspnet_r101-d8_4x4_512x512_80k_potsdam_20211220_125612-aed036c4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_potsdam/pspnet_r101-d8_4x4_512x512_80k_potsdam_20211220_125612.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r18-d8_4xb4-80k_vaihingen-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Vaihingen + Metrics: + mIoU: 71.46 + mIoU(ms+flip): 73.36 + Config: configs/pspnet/pspnet_r18-d8_4xb4-80k_vaihingen-512x512.py + Metadata: + Training Data: Vaihingen + Batch Size: 16 + Architecture: + - R-18-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 1.45 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_vaihingen/pspnet_r18-d8_4x4_512x512_80k_vaihingen_20211228_160355-52a8a6f6.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_vaihingen/pspnet_r18-d8_4x4_512x512_80k_vaihingen_20211228_160355.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb4-80k_vaihingen-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Vaihingen + Metrics: + mIoU: 72.36 + mIoU(ms+flip): 73.75 + Config: configs/pspnet/pspnet_r50-d8_4xb4-80k_vaihingen-512x512.py + Metadata: + Training Data: Vaihingen + Batch Size: 16 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.14 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_vaihingen/pspnet_r50-d8_4x4_512x512_80k_vaihingen_20211228_160355-382f8f5b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_vaihingen/pspnet_r50-d8_4x4_512x512_80k_vaihingen_20211228_160355.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r101-d8_4xb4-80k_vaihingen-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Vaihingen + Metrics: + mIoU: 72.61 + mIoU(ms+flip): 74.18 + Config: configs/pspnet/pspnet_r101-d8_4xb4-80k_vaihingen-512x512.py + Metadata: + Training Data: Vaihingen + Batch Size: 16 + Architecture: + - R-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.61 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_vaihingen/pspnet_r101-d8_4x4_512x512_80k_vaihingen_20211231_230806-8eba0a09.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_vaihingen/pspnet_r101-d8_4x4_512x512_80k_vaihingen_20211231_230806.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r18-d8_4xb4-80k_isaid-896x896 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: iSAID + Metrics: + mIoU: 60.22 + mIoU(ms+flip): 61.25 + Config: configs/pspnet/pspnet_r18-d8_4xb4-80k_isaid-896x896.py + Metadata: + Training Data: iSAID + Batch Size: 16 + Architecture: + - R-18-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 4.52 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_896x896_80k_isaid/pspnet_r18-d8_4x4_896x896_80k_isaid_20220110_180526-e84c0b6a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_896x896_80k_isaid/pspnet_r18-d8_4x4_896x896_80k_isaid_20220110_180526.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch +- Name: pspnet_r50-d8_4xb4-80k_isaid-896x896 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: iSAID + Metrics: + mIoU: 65.36 + mIoU(ms+flip): 66.48 + Config: configs/pspnet/pspnet_r50-d8_4xb4-80k_isaid-896x896.py + Metadata: + Training Data: iSAID + Batch Size: 16 + Architecture: + - R-50-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 16.58 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_896x896_80k_isaid/pspnet_r50-d8_4x4_896x896_80k_isaid_20220110_180629-1f21dc32.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_896x896_80k_isaid/pspnet_r50-d8_4x4_896x896_80k_isaid_20220110_180629.log.json + Paper: + Title: Pyramid Scene Parsing Network + URL: https://arxiv.org/abs/1612.01105 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Framework: PyTorch diff --git a/configs/pspnet/pspnet.yml b/configs/pspnet/pspnet.yml deleted file mode 100644 index 7f811efd0f..0000000000 --- a/configs/pspnet/pspnet.yml +++ /dev/null @@ -1,1077 +0,0 @@ -Collections: -- Name: PSPNet - Metadata: - Training Data: - - Cityscapes - - ADE20K - - Pascal VOC 2012 + Aug - - Pascal Context - - Pascal Context 59 - - Dark Zurich and Nighttime Driving - - COCO-Stuff 10k - - COCO-Stuff 164k - - LoveDA - - Potsdam - - Vaihingen - - iSAID - Paper: - URL: https://arxiv.org/abs/1612.01105 - Title: Pyramid Scene Parsing Network - README: configs/pspnet/README.md - Code: - URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 - Version: v0.17.0 - Converted From: - Code: https://github.com/hszhao/PSPNet -Models: -- Name: pspnet_r50-d8_4xb2-40k_cityscapes-512x1024 - In Collection: PSPNet - Metadata: - backbone: R-50-D8 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 245.7 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 6.1 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 77.85 - mIoU(ms+flip): 79.18 - Config: configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth -- Name: pspnet_r101-d8_4xb2-40k_cityscapes-512x1024 - In Collection: PSPNet - Metadata: - backbone: R-101-D8 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 373.13 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 9.6 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.34 - mIoU(ms+flip): 79.74 - Config: configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751-467e7cf4.pth -- Name: pspnet_r50-d8_4xb2-40k_cityscapes-769x769 - In Collection: PSPNet - Metadata: - backbone: R-50-D8 - crop size: (769,769) - lr schd: 40000 - inference time (ms/im): - - value: 568.18 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 6.9 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.26 - mIoU(ms+flip): 79.88 - Config: configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_40k_cityscapes/pspnet_r50-d8_769x769_40k_cityscapes_20200606_112725-86638686.pth -- Name: pspnet_r101-d8_4xb2-40k_cityscapes-769x769 - In Collection: PSPNet - Metadata: - backbone: R-101-D8 - crop size: (769,769) - lr schd: 40000 - inference time (ms/im): - - value: 869.57 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 10.9 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.08 - mIoU(ms+flip): 80.28 - Config: configs/pspnet/pspnet_r101-d8_4xb2-40k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_40k_cityscapes/pspnet_r101-d8_769x769_40k_cityscapes_20200606_112753-61c6f5be.pth -- Name: pspnet_r18-d8_4xb2-80k_cityscapes-512x1024 - In Collection: PSPNet - Metadata: - backbone: R-18-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 63.65 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 1.7 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 74.87 - mIoU(ms+flip): 76.04 - Config: configs/pspnet/pspnet_r18-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes/pspnet_r18-d8_512x1024_80k_cityscapes_20201225_021458-09ffa746.pth -- Name: pspnet_r50-d8_4xb2-80k_cityscapes-512x1024 - In Collection: PSPNet - Metadata: - backbone: R-50-D8 - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.55 - mIoU(ms+flip): 79.79 - Config: configs/pspnet/pspnet_r50-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_512x1024_80k_cityscapes_20200606_112131-2376f12b.pth -- Name: pspnet_r50-d8-rsb_4xb2-adamw-80k_cityscapes-512x1024 - In Collection: PSPNet - Metadata: - backbone: R-50b-D8 rsb - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 261.78 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 6.2 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.47 - mIoU(ms+flip): 79.45 - Config: configs/pspnet/pspnet_r50-d8-rsb_4xb2-adamw-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes_20220315_123238-588c30be.pth -- Name: pspnet_r101-d8_4xb2-80k_cityscapes-512x1024 - In Collection: PSPNet - Metadata: - backbone: R-101-D8 - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.76 - mIoU(ms+flip): 81.01 - Config: configs/pspnet/pspnet_r101-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211-e1e1100f.pth -- Name: pspnet_r101-d8_4xb2-amp-80k_cityscapes-512x1024 - In Collection: PSPNet - Metadata: - backbone: R-101-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 114.03 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: AMP - resolution: (512,1024) - Training Memory (GB): 5.34 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.46 - Config: configs/pspnet/pspnet_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_fp16_512x1024_80k_cityscapes/pspnet_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230919-a0875e5c.pth -- Name: pspnet_r18-d8_4xb2-80k_cityscapes-769x769 - In Collection: PSPNet - Metadata: - backbone: R-18-D8 - crop size: (769,769) - lr schd: 80000 - inference time (ms/im): - - value: 161.29 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 1.9 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 75.9 - mIoU(ms+flip): 77.86 - Config: configs/pspnet/pspnet_r18-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_769x769_80k_cityscapes/pspnet_r18-d8_769x769_80k_cityscapes_20201225_021458-3deefc62.pth -- Name: pspnet_r50-d8_4xb2-80k_cityscapes-769x769 - In Collection: PSPNet - Metadata: - backbone: R-50-D8 - crop size: (769,769) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.59 - mIoU(ms+flip): 80.69 - Config: configs/pspnet/pspnet_r50-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_80k_cityscapes/pspnet_r50-d8_769x769_80k_cityscapes_20200606_210121-5ccf03dd.pth -- Name: pspnet_r101-d8_4xb2-80k_cityscapes-769x769 - In Collection: PSPNet - Metadata: - backbone: R-101-D8 - crop size: (769,769) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.77 - mIoU(ms+flip): 81.06 - Config: configs/pspnet/pspnet_r101-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_80k_cityscapes/pspnet_r101-d8_769x769_80k_cityscapes_20200606_225055-dba412fa.pth -- Name: pspnet_r18b-d8_4xb2-80k_cityscapes-512x1024 - In Collection: PSPNet - Metadata: - backbone: R-18b-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 61.43 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 1.5 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 74.23 - mIoU(ms+flip): 75.79 - Config: configs/pspnet/pspnet_r18b-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_512x1024_80k_cityscapes/pspnet_r18b-d8_512x1024_80k_cityscapes_20201226_063116-26928a60.pth -- Name: pspnet_r50b-d8_4xb2-80k_cityscapes-512x1024 - In Collection: PSPNet - Metadata: - backbone: R-50b-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 232.56 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 6.0 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.22 - mIoU(ms+flip): 79.46 - Config: configs/pspnet/pspnet_r50b-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_512x1024_80k_cityscapes/pspnet_r50b-d8_512x1024_80k_cityscapes_20201225_094315-6344287a.pth -- Name: pspnet_r101b-d8_4xb2-80k_cityscapes-512x1024 - In Collection: PSPNet - Metadata: - backbone: R-101b-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 362.32 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 9.5 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.69 - mIoU(ms+flip): 80.79 - Config: configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes_20201226_170012-3a4d38ab.pth -- Name: pspnet_r18b-d8_4xb2-80k_cityscapes-769x769 - In Collection: PSPNet - Metadata: - backbone: R-18b-D8 - crop size: (769,769) - lr schd: 80000 - inference time (ms/im): - - value: 156.01 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 1.7 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 74.92 - mIoU(ms+flip): 76.9 - Config: configs/pspnet/pspnet_r18b-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_769x769_80k_cityscapes/pspnet_r18b-d8_769x769_80k_cityscapes_20201226_080942-bf98d186.pth -- Name: pspnet_r50b-d8_4xb2-80k_cityscapes-769x769 - In Collection: PSPNet - Metadata: - backbone: R-50b-D8 - crop size: (769,769) - lr schd: 80000 - inference time (ms/im): - - value: 531.91 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 6.8 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.5 - mIoU(ms+flip): 79.96 - Config: configs/pspnet/pspnet_r50b-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_769x769_80k_cityscapes/pspnet_r50b-d8_769x769_80k_cityscapes_20201225_094316-4c643cf6.pth -- Name: pspnet_r101b-d8_4xb2-80k_cityscapes-769x769 - In Collection: PSPNet - Metadata: - backbone: R-101b-D8 - crop size: (769,769) - lr schd: 80000 - inference time (ms/im): - - value: 854.7 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 10.8 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.87 - mIoU(ms+flip): 80.04 - Config: configs/pspnet/pspnet_r101b-d8_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_769x769_80k_cityscapes/pspnet_r101b-d8_769x769_80k_cityscapes_20201226_171823-f0e7c293.pth -- Name: pspnet_r50b-d32_4xb2-80k_cityscapes-512x1024 - In Collection: PSPNet - Metadata: - backbone: R-50-D32 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 65.75 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 3.0 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 73.88 - mIoU(ms+flip): 76.85 - Config: configs/pspnet/pspnet_r50b-d32_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d32_512x1024_80k_cityscapes/pspnet_r50-d32_512x1024_80k_cityscapes_20220316_224840-9092b254.pth -- Name: pspnet_r50-d32_rsb_4xb2-adamw-80k_cityscapes-512x1024 - In Collection: PSPNet - Metadata: - backbone: R-50b-D32 rsb - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 62.19 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 3.1 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 74.09 - mIoU(ms+flip): 77.18 - Config: configs/pspnet/pspnet_r50-d32_rsb_4xb2-adamw-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes_20220316_141229-dd9c9610.pth -- Name: pspnet_r50b-d32_4xb2-80k_cityscapes-512x1024 - In Collection: PSPNet - Metadata: - backbone: R-50b-D32 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 64.89 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 2.9 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 72.61 - mIoU(ms+flip): 75.51 - Config: configs/pspnet/pspnet_r50b-d32_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d32_512x1024_80k_cityscapes/pspnet_r50b-d32_512x1024_80k_cityscapes_20220311_152152-23bcaf8c.pth -- Name: pspnet_r50-d8_4xb4-80k_ade20k-512x512 - In Collection: PSPNet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 42.5 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 8.5 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 41.13 - mIoU(ms+flip): 41.94 - Config: configs/pspnet/pspnet_r50-d8_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_ade20k/pspnet_r50-d8_512x512_80k_ade20k_20200615_014128-15a8b914.pth -- Name: pspnet_r101-d8_4xb4-80k_ade20k-512x512 - In Collection: PSPNet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 65.36 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 12.0 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 43.57 - mIoU(ms+flip): 44.35 - Config: configs/pspnet/pspnet_r101-d8_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_ade20k/pspnet_r101-d8_512x512_80k_ade20k_20200614_031423-b6e782f0.pth -- Name: pspnet_r50-d8_4xb4-160k_ade20k-512x512 - In Collection: PSPNet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 42.48 - mIoU(ms+flip): 43.44 - Config: configs/pspnet/pspnet_r50-d8_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_160k_ade20k/pspnet_r50-d8_512x512_160k_ade20k_20200615_184358-1890b0bd.pth -- Name: pspnet_r101-d8_4xb4-160k_ade20k-512x512 - In Collection: PSPNet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 44.39 - mIoU(ms+flip): 45.35 - Config: configs/pspnet/pspnet_r101-d8_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_160k_ade20k/pspnet_r101-d8_512x512_160k_ade20k_20200615_100650-967c316f.pth -- Name: pspnet_r50-d8_4xb4-20k_voc12aug-512x512 - In Collection: PSPNet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 20000 - inference time (ms/im): - - value: 42.39 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 6.1 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 76.78 - mIoU(ms+flip): 77.61 - Config: configs/pspnet/pspnet_r50-d8_4xb4-20k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_20k_voc12aug/pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958-ed5dfbd9.pth -- Name: pspnet_r101-d8_4xb4-20k_voc12aug-512x512 - In Collection: PSPNet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 20000 - inference time (ms/im): - - value: 66.58 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 9.6 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 78.47 - mIoU(ms+flip): 79.25 - Config: configs/pspnet/pspnet_r101-d8_4xb4-20k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_20k_voc12aug/pspnet_r101-d8_512x512_20k_voc12aug_20200617_102003-4aef3c9a.pth -- Name: pspnet_r50-d8_4xb4-40k_voc12aug-512x512 - In Collection: PSPNet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 77.29 - mIoU(ms+flip): 78.48 - Config: configs/pspnet/pspnet_r50-d8_4xb4-40k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_40k_voc12aug/pspnet_r50-d8_512x512_40k_voc12aug_20200613_161222-ae9c1b8c.pth -- Name: pspnet_r101-d8_4xb4-40k_voc12aug-512x512 - In Collection: PSPNet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 78.52 - mIoU(ms+flip): 79.57 - Config: configs/pspnet/pspnet_r101-d8_4xb4-40k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_40k_voc12aug/pspnet_r101-d8_512x512_40k_voc12aug_20200613_161222-bc933b18.pth -- Name: pspnet_r101-d8_4xb4-40k_pascal-context-480x480 - In Collection: PSPNet - Metadata: - backbone: R-101-D8 - crop size: (480,480) - lr schd: 40000 - inference time (ms/im): - - value: 103.31 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (480,480) - Training Memory (GB): 8.8 - Results: - - Task: Semantic Segmentation - Dataset: Pascal Context - Metrics: - mIoU: 46.6 - mIoU(ms+flip): 47.78 - Config: configs/pspnet/pspnet_r101-d8_4xb4-40k_pascal-context-480x480.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context/pspnet_r101-d8_480x480_40k_pascal_context_20200911_211210-bf0f5d7c.pth -- Name: pspnet_r101-d8_4xb4-80k_pascal-context-480x480 - In Collection: PSPNet - Metadata: - backbone: R-101-D8 - crop size: (480,480) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal Context - Metrics: - mIoU: 46.03 - mIoU(ms+flip): 47.15 - Config: configs/pspnet/pspnet_r101-d8_4xb4-80k_pascal-context-480x480.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context/pspnet_r101-d8_480x480_80k_pascal_context_20200911_190530-c86d6233.pth -- Name: pspnet_r101-d8_4xb4-40k_pascal-context-59-480x480 - In Collection: PSPNet - Metadata: - backbone: R-101-D8 - crop size: (480,480) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal Context 59 - Metrics: - mIoU: 52.02 - mIoU(ms+flip): 53.54 - Config: configs/pspnet/pspnet_r101-d8_4xb4-40k_pascal-context-59-480x480.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context_59/pspnet_r101-d8_480x480_40k_pascal_context_59_20210416_114524-86d44cd4.pth -- Name: pspnet_r101-d8_4xb4-80k_pascal-context-59-480x480 - In Collection: PSPNet - Metadata: - backbone: R-101-D8 - crop size: (480,480) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal Context 59 - Metrics: - mIoU: 52.47 - mIoU(ms+flip): 53.99 - Config: configs/pspnet/pspnet_r101-d8_4xb4-80k_pascal-context-59-480x480.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context_59/pspnet_r101-d8_480x480_80k_pascal_context_59_20210416_114418-fa6caaa2.pth -- Name: pspnet_r50-d8_4xb4-20k_coco-stuff10k-512x512 - In Collection: PSPNet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 20000 - inference time (ms/im): - - value: 48.78 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 9.6 - Results: - - Task: Semantic Segmentation - Dataset: COCO-Stuff 10k - Metrics: - mIoU: 35.69 - mIoU(ms+flip): 36.62 - Config: configs/pspnet/pspnet_r50-d8_4xb4-20k_coco-stuff10k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k_20210820_203258-b88df27f.pth -- Name: pspnet_r101-d8_4xb4-20k_coco-stuff10k-512x512 - In Collection: PSPNet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 20000 - inference time (ms/im): - - value: 90.09 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 13.2 - Results: - - Task: Semantic Segmentation - Dataset: COCO-Stuff 10k - Metrics: - mIoU: 37.26 - mIoU(ms+flip): 38.52 - Config: configs/pspnet/pspnet_r101-d8_4xb4-20k_coco-stuff10k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k_20210820_232135-76aae482.pth -- Name: pspnet_r50-d8_4xb4-40k_coco-stuff10k-512x512 - In Collection: PSPNet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: COCO-Stuff 10k - Metrics: - mIoU: 36.33 - mIoU(ms+flip): 37.24 - Config: configs/pspnet/pspnet_r50-d8_4xb4-40k_coco-stuff10k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k_20210821_030857-92e2902b.pth -- Name: pspnet_r101-d8_4xb4-40k_coco-stuff10k-512x512 - In Collection: PSPNet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: COCO-Stuff 10k - Metrics: - mIoU: 37.76 - mIoU(ms+flip): 38.86 - Config: configs/pspnet/pspnet_r101-d8_4xb4-40k_coco-stuff10k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k_20210821_014022-831aec95.pth -- Name: pspnet_r50-d8_4xb4-80k_coco-stuff164k-512x512 - In Collection: PSPNet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 48.78 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 9.6 - Results: - - Task: Semantic Segmentation - Dataset: COCO-Stuff 164k - Metrics: - mIoU: 38.8 - mIoU(ms+flip): 39.19 - Config: configs/pspnet/pspnet_r50-d8_4xb4-80k_coco-stuff164k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k_20210707_152034-0e41b2db.pth -- Name: pspnet_r101-d8_4xb4-80k_coco-stuff164k-512x512 - In Collection: PSPNet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 90.09 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 13.2 - Results: - - Task: Semantic Segmentation - Dataset: COCO-Stuff 164k - Metrics: - mIoU: 40.34 - mIoU(ms+flip): 40.79 - Config: configs/pspnet/pspnet_r101-d8_4xb4-80k_coco-stuff164k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k_20210707_152034-7eb41789.pth -- Name: pspnet_r50-d8_4xb4-160k_coco-stuff164k-512x512 - In Collection: PSPNet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: COCO-Stuff 164k - Metrics: - mIoU: 39.64 - mIoU(ms+flip): 39.97 - Config: configs/pspnet/pspnet_r50-d8_4xb4-160k_coco-stuff164k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k_20210707_152004-51276a57.pth -- Name: pspnet_r101-d8_4xb4-160k_coco-stuff164k-512x512 - In Collection: PSPNet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: COCO-Stuff 164k - Metrics: - mIoU: 41.28 - mIoU(ms+flip): 41.66 - Config: configs/pspnet/pspnet_r101-d8_4xb4-160k_coco-stuff164k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k_20210707_152004-4af9621b.pth -- Name: pspnet_r50-d8_4xb4-320k_coco-stuff164k-512x512 - In Collection: PSPNet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 320000 - Results: - - Task: Semantic Segmentation - Dataset: COCO-Stuff 164k - Metrics: - mIoU: 40.53 - mIoU(ms+flip): 40.75 - Config: configs/pspnet/pspnet_r50-d8_4xb4-320k_coco-stuff164k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k_20210707_152004-be9610cc.pth -- Name: pspnet_r101-d8_4xb4-320k_coco-stuff164k-512x512 - In Collection: PSPNet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 320000 - Results: - - Task: Semantic Segmentation - Dataset: COCO-Stuff 164k - Metrics: - mIoU: 41.95 - mIoU(ms+flip): 42.42 - Config: configs/pspnet/pspnet_r101-d8_4xb4-320k_coco-stuff164k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k_20210707_152004-72220c60.pth -- Name: pspnet_r18-d8_4xb4-80k_loveda-512x512 - In Collection: PSPNet - Metadata: - backbone: R-18-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 37.22 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 1.45 - Results: - - Task: Semantic Segmentation - Dataset: LoveDA - Metrics: - mIoU: 48.62 - mIoU(ms+flip): 47.57 - Config: configs/pspnet/pspnet_r18-d8_4xb4-80k_loveda-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x512_80k_loveda/pspnet_r18-d8_512x512_80k_loveda_20211105_052100-b97697f1.pth -- Name: pspnet_r50-d8_4xb4-80k_loveda-512x512 - In Collection: PSPNet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 151.52 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 6.14 - Results: - - Task: Semantic Segmentation - Dataset: LoveDA - Metrics: - mIoU: 50.46 - mIoU(ms+flip): 50.19 - Config: configs/pspnet/pspnet_r50-d8_4xb4-80k_loveda-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_loveda/pspnet_r50-d8_512x512_80k_loveda_20211104_155728-88610f9f.pth -- Name: pspnet_r101-d8_4xb4-80k_loveda-512x512 - In Collection: PSPNet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 218.34 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 9.61 - Results: - - Task: Semantic Segmentation - Dataset: LoveDA - Metrics: - mIoU: 51.86 - mIoU(ms+flip): 51.34 - Config: configs/pspnet/pspnet_r101-d8_4xb4-80k_loveda-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_loveda/pspnet_r101-d8_512x512_80k_loveda_20211104_153212-1c06c6a8.pth -- Name: pspnet_r18-d8_4xb4-80k_potsdam-512x512 - In Collection: PSPNet - Metadata: - backbone: R-18-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 11.75 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 1.5 - Results: - - Task: Semantic Segmentation - Dataset: Potsdam - Metrics: - mIoU: 77.09 - mIoU(ms+flip): 78.3 - Config: configs/pspnet/pspnet_r18-d8_4xb4-80k_potsdam-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_potsdam/pspnet_r18-d8_4x4_512x512_80k_potsdam_20211220_125612-7cd046e1.pth -- Name: pspnet_r50-d8_4xb4-80k_potsdam-512x512 - In Collection: PSPNet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 33.1 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 6.14 - Results: - - Task: Semantic Segmentation - Dataset: Potsdam - Metrics: - mIoU: 78.12 - mIoU(ms+flip): 78.98 - Config: configs/pspnet/pspnet_r50-d8_4xb4-80k_potsdam-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_potsdam/pspnet_r50-d8_4x4_512x512_80k_potsdam_20211219_043541-2dd5fe67.pth -- Name: pspnet_r101-d8_4xb4-80k_potsdam-512x512 - In Collection: PSPNet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 51.55 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 9.61 - Results: - - Task: Semantic Segmentation - Dataset: Potsdam - Metrics: - mIoU: 78.62 - mIoU(ms+flip): 79.47 - Config: configs/pspnet/pspnet_r101-d8_4xb4-80k_potsdam-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_potsdam/pspnet_r101-d8_4x4_512x512_80k_potsdam_20211220_125612-aed036c4.pth -- Name: pspnet_r18-d8_4xb4-80k_vaihingen-512x512 - In Collection: PSPNet - Metadata: - backbone: R-18-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 11.76 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 1.45 - Results: - - Task: Semantic Segmentation - Dataset: Vaihingen - Metrics: - mIoU: 71.46 - mIoU(ms+flip): 73.36 - Config: configs/pspnet/pspnet_r18-d8_4xb4-80k_vaihingen-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_vaihingen/pspnet_r18-d8_4x4_512x512_80k_vaihingen_20211228_160355-52a8a6f6.pth -- Name: pspnet_r50-d8_4xb4-80k_vaihingen-512x512 - In Collection: PSPNet - Metadata: - backbone: R-50-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 33.01 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 6.14 - Results: - - Task: Semantic Segmentation - Dataset: Vaihingen - Metrics: - mIoU: 72.36 - mIoU(ms+flip): 73.75 - Config: configs/pspnet/pspnet_r50-d8_4xb4-80k_vaihingen-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_vaihingen/pspnet_r50-d8_4x4_512x512_80k_vaihingen_20211228_160355-382f8f5b.pth -- Name: pspnet_r101-d8_4xb4-80k_vaihingen-512x512 - In Collection: PSPNet - Metadata: - backbone: R-101-D8 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 50.08 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 9.61 - Results: - - Task: Semantic Segmentation - Dataset: Vaihingen - Metrics: - mIoU: 72.61 - mIoU(ms+flip): 74.18 - Config: configs/pspnet/pspnet_r101-d8_4xb4-80k_vaihingen-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_vaihingen/pspnet_r101-d8_4x4_512x512_80k_vaihingen_20211231_230806-8eba0a09.pth -- Name: pspnet_r18-d8_4xb4-80k_isaid-896x896 - In Collection: PSPNet - Metadata: - backbone: R-18-D8 - crop size: (896,896) - lr schd: 80000 - inference time (ms/im): - - value: 37.16 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (896,896) - Training Memory (GB): 4.52 - Results: - - Task: Semantic Segmentation - Dataset: iSAID - Metrics: - mIoU: 60.22 - mIoU(ms+flip): 61.25 - Config: configs/pspnet/pspnet_r18-d8_4xb4-80k_isaid-896x896.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_896x896_80k_isaid/pspnet_r18-d8_4x4_896x896_80k_isaid_20220110_180526-e84c0b6a.pth -- Name: pspnet_r50-d8_4xb4-80k_isaid-896x896 - In Collection: PSPNet - Metadata: - backbone: R-50-D8 - crop size: (896,896) - lr schd: 80000 - inference time (ms/im): - - value: 112.61 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (896,896) - Training Memory (GB): 16.58 - Results: - - Task: Semantic Segmentation - Dataset: iSAID - Metrics: - mIoU: 65.36 - mIoU(ms+flip): 66.48 - Config: configs/pspnet/pspnet_r50-d8_4xb4-80k_isaid-896x896.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_896x896_80k_isaid/pspnet_r50-d8_4x4_896x896_80k_isaid_20220110_180629-1f21dc32.pth diff --git a/configs/resnest/README.md b/configs/resnest/README.md index 7f07d147b7..304791abe9 100644 --- a/configs/resnest/README.md +++ b/configs/resnest/README.md @@ -1,6 +1,6 @@ # ResNeSt -[ResNeSt: Split-Attention Networks](https://arxiv.org/abs/2004.08955) +> [ResNeSt: Split-Attention Networks](https://arxiv.org/abs/2004.08955) ## Introduction @@ -22,6 +22,26 @@ It is well known that featuremap attention and multi-path representation are imp +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------: | -------------- | ------ | ----: | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | S-101-D8 | 512x1024 | 80000 | 11.4 | 2.39 | V100 | 77.56 | 78.98 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/resnest/resnest_s101-d8_fcn_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x1024_80k_cityscapes/fcn_s101-d8_512x1024_80k_cityscapes_20200807_140631-f8d155b3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x1024_80k_cityscapes/fcn_s101-d8_512x1024_80k_cityscapes-20200807_140631.log.json) | +| PSPNet | S-101-D8 | 512x1024 | 80000 | 11.8 | 2.52 | V100 | 78.57 | 79.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/resnest/resnest_s101-d8_pspnet_4xb2-80k_cityscapes512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x1024_80k_cityscapes/pspnet_s101-d8_512x1024_80k_cityscapes_20200807_140631-c75f3b99.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x1024_80k_cityscapes/pspnet_s101-d8_512x1024_80k_cityscapes-20200807_140631.log.json) | +| DeepLabV3 | S-101-D8 | 512x1024 | 80000 | 11.9 | 1.88 | V100 | 79.67 | 80.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/resnest/resnest_s101-d8_deeplabv3_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes/deeplabv3_s101-d8_512x1024_80k_cityscapes_20200807_144429-b73c4270.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes/deeplabv3_s101-d8_512x1024_80k_cityscapes-20200807_144429.log.json) | +| DeepLabV3+ | S-101-D8 | 512x1024 | 80000 | 13.2 | 2.36 | V100 | 79.62 | 80.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/resnest/resnest_s101-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes/deeplabv3plus_s101-d8_512x1024_80k_cityscapes_20200807_144429-1239eb43.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes/deeplabv3plus_s101-d8_512x1024_80k_cityscapes-20200807_144429.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------: | -------------- | ------ | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | S-101-D8 | 512x512 | 160000 | 14.2 | 12.86 | V100 | 45.62 | 46.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/resnest/resnest_s101-d8_fcn_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x512_160k_ade20k/fcn_s101-d8_512x512_160k_ade20k_20200807_145416-d3160329.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x512_160k_ade20k/fcn_s101-d8_512x512_160k_ade20k-20200807_145416.log.json) | +| PSPNet | S-101-D8 | 512x512 | 160000 | 14.2 | 13.02 | V100 | 45.44 | 46.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/resnest/resnest_s101-d8_pspnet_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x512_160k_ade20k/pspnet_s101-d8_512x512_160k_ade20k_20200807_145416-a6daa92a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x512_160k_ade20k/pspnet_s101-d8_512x512_160k_ade20k-20200807_145416.log.json) | +| DeepLabV3 | S-101-D8 | 512x512 | 160000 | 14.6 | 9.28 | V100 | 45.71 | 46.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/resnest/resnest_s101-d8_deeplabv3_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x512_160k_ade20k/deeplabv3_s101-d8_512x512_160k_ade20k_20200807_144503-17ecabe5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x512_160k_ade20k/deeplabv3_s101-d8_512x512_160k_ade20k-20200807_144503.log.json) | +| DeepLabV3+ | S-101-D8 | 512x512 | 160000 | 16.2 | 11.96 | V100 | 46.47 | 47.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/resnest/resnest_s101-d8_deeplabv3plus_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k/deeplabv3plus_s101-d8_512x512_160k_ade20k_20200807_144503-27b26226.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k/deeplabv3plus_s101-d8_512x512_160k_ade20k-20200807_144503.log.json) | + ## Citation ```bibtex @@ -32,23 +52,3 @@ journal={arXiv preprint arXiv:2004.08955}, year={2020} } ``` - -## Results and models - -### Cityscapes - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ---------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| FCN | S-101-D8 | 512x1024 | 80000 | 11.4 | 2.39 | 77.56 | 78.98 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/resnest/resnest_s101-d8_fcn_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x1024_80k_cityscapes/fcn_s101-d8_512x1024_80k_cityscapes_20200807_140631-f8d155b3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x1024_80k_cityscapes/fcn_s101-d8_512x1024_80k_cityscapes-20200807_140631.log.json) | -| PSPNet | S-101-D8 | 512x1024 | 80000 | 11.8 | 2.52 | 78.57 | 79.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/resnest/resnest_s101-d8_pspnet_4xb2-80k_cityscapes512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x1024_80k_cityscapes/pspnet_s101-d8_512x1024_80k_cityscapes_20200807_140631-c75f3b99.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x1024_80k_cityscapes/pspnet_s101-d8_512x1024_80k_cityscapes-20200807_140631.log.json) | -| DeepLabV3 | S-101-D8 | 512x1024 | 80000 | 11.9 | 1.88 | 79.67 | 80.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/resnest/resnest_s101-d8_deeplabv3_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes/deeplabv3_s101-d8_512x1024_80k_cityscapes_20200807_144429-b73c4270.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes/deeplabv3_s101-d8_512x1024_80k_cityscapes-20200807_144429.log.json) | -| DeepLabV3+ | S-101-D8 | 512x1024 | 80000 | 13.2 | 2.36 | 79.62 | 80.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/resnest/resnest_s101-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes/deeplabv3plus_s101-d8_512x1024_80k_cityscapes_20200807_144429-1239eb43.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes/deeplabv3plus_s101-d8_512x1024_80k_cityscapes-20200807_144429.log.json) | - -### ADE20K - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ---------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | --------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| FCN | S-101-D8 | 512x512 | 160000 | 14.2 | 12.86 | 45.62 | 46.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/resnest/resnest_s101-d8_fcn_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x512_160k_ade20k/fcn_s101-d8_512x512_160k_ade20k_20200807_145416-d3160329.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x512_160k_ade20k/fcn_s101-d8_512x512_160k_ade20k-20200807_145416.log.json) | -| PSPNet | S-101-D8 | 512x512 | 160000 | 14.2 | 13.02 | 45.44 | 46.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/resnest/resnest_s101-d8_pspnet_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x512_160k_ade20k/pspnet_s101-d8_512x512_160k_ade20k_20200807_145416-a6daa92a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x512_160k_ade20k/pspnet_s101-d8_512x512_160k_ade20k-20200807_145416.log.json) | -| DeepLabV3 | S-101-D8 | 512x512 | 160000 | 14.6 | 9.28 | 45.71 | 46.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/resnest/resnest_s101-d8_deeplabv3_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x512_160k_ade20k/deeplabv3_s101-d8_512x512_160k_ade20k_20200807_144503-17ecabe5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x512_160k_ade20k/deeplabv3_s101-d8_512x512_160k_ade20k-20200807_144503.log.json) | -| DeepLabV3+ | S-101-D8 | 512x512 | 160000 | 16.2 | 11.96 | 46.47 | 47.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/resnest/resnest_s101-d8_deeplabv3plus_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k/deeplabv3plus_s101-d8_512x512_160k_ade20k_20200807_144503-27b26226.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k/deeplabv3plus_s101-d8_512x512_160k_ade20k-20200807_144503.log.json) | diff --git a/configs/resnest/metafile.yaml b/configs/resnest/metafile.yaml new file mode 100644 index 0000000000..0b8d41ebfd --- /dev/null +++ b/configs/resnest/metafile.yaml @@ -0,0 +1,193 @@ +Models: +- Name: resnest_s101-d8_fcn_4xb2-80k_cityscapes-512x1024 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.56 + mIoU(ms+flip): 78.98 + Config: configs/resnest/resnest_s101-d8_fcn_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - S-101-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 11.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x1024_80k_cityscapes/fcn_s101-d8_512x1024_80k_cityscapes_20200807_140631-f8d155b3.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x1024_80k_cityscapes/fcn_s101-d8_512x1024_80k_cityscapes-20200807_140631.log.json + Paper: + Title: 'ResNeSt: Split-Attention Networks' + URL: https://arxiv.org/abs/2004.08955 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/resnest.py#L271 + Framework: PyTorch +- Name: resnest_s101-d8_pspnet_4xb2-80k_cityscapes512x1024 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.57 + mIoU(ms+flip): 79.19 + Config: configs/resnest/resnest_s101-d8_pspnet_4xb2-80k_cityscapes512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - S-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 11.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x1024_80k_cityscapes/pspnet_s101-d8_512x1024_80k_cityscapes_20200807_140631-c75f3b99.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x1024_80k_cityscapes/pspnet_s101-d8_512x1024_80k_cityscapes-20200807_140631.log.json + Paper: + Title: 'ResNeSt: Split-Attention Networks' + URL: https://arxiv.org/abs/2004.08955 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/resnest.py#L271 + Framework: PyTorch +- Name: resnest_s101-d8_deeplabv3_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.67 + mIoU(ms+flip): 80.51 + Config: configs/resnest/resnest_s101-d8_deeplabv3_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - S-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 11.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes/deeplabv3_s101-d8_512x1024_80k_cityscapes_20200807_144429-b73c4270.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes/deeplabv3_s101-d8_512x1024_80k_cityscapes-20200807_144429.log.json + Paper: + Title: 'ResNeSt: Split-Attention Networks' + URL: https://arxiv.org/abs/2004.08955 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/resnest.py#L271 + Framework: PyTorch +- Name: resnest_s101-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.62 + mIoU(ms+flip): 80.27 + Config: configs/resnest/resnest_s101-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - S-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 13.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes/deeplabv3plus_s101-d8_512x1024_80k_cityscapes_20200807_144429-1239eb43.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes/deeplabv3plus_s101-d8_512x1024_80k_cityscapes-20200807_144429.log.json + Paper: + Title: 'ResNeSt: Split-Attention Networks' + URL: https://arxiv.org/abs/2004.08955 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/resnest.py#L271 + Framework: PyTorch +- Name: resnest_s101-d8_fcn_4xb4-160k_ade20k-512x512 + In Collection: FCN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.62 + mIoU(ms+flip): 46.16 + Config: configs/resnest/resnest_s101-d8_fcn_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - S-101-D8 + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 14.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x512_160k_ade20k/fcn_s101-d8_512x512_160k_ade20k_20200807_145416-d3160329.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x512_160k_ade20k/fcn_s101-d8_512x512_160k_ade20k-20200807_145416.log.json + Paper: + Title: 'ResNeSt: Split-Attention Networks' + URL: https://arxiv.org/abs/2004.08955 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/resnest.py#L271 + Framework: PyTorch +- Name: resnest_s101-d8_pspnet_4xb4-160k_ade20k-512x512 + In Collection: PSPNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.44 + mIoU(ms+flip): 46.28 + Config: configs/resnest/resnest_s101-d8_pspnet_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - S-101-D8 + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 14.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x512_160k_ade20k/pspnet_s101-d8_512x512_160k_ade20k_20200807_145416-a6daa92a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x512_160k_ade20k/pspnet_s101-d8_512x512_160k_ade20k-20200807_145416.log.json + Paper: + Title: 'ResNeSt: Split-Attention Networks' + URL: https://arxiv.org/abs/2004.08955 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/resnest.py#L271 + Framework: PyTorch +- Name: resnest_s101-d8_deeplabv3_4xb4-160k_ade20k-512x512 + In Collection: DeepLabV3 + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.71 + mIoU(ms+flip): 46.59 + Config: configs/resnest/resnest_s101-d8_deeplabv3_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - S-101-D8 + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 14.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x512_160k_ade20k/deeplabv3_s101-d8_512x512_160k_ade20k_20200807_144503-17ecabe5.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x512_160k_ade20k/deeplabv3_s101-d8_512x512_160k_ade20k-20200807_144503.log.json + Paper: + Title: 'ResNeSt: Split-Attention Networks' + URL: https://arxiv.org/abs/2004.08955 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/resnest.py#L271 + Framework: PyTorch +- Name: resnest_s101-d8_deeplabv3plus_4xb4-160k_ade20k-512x512 + In Collection: DeepLabV3+ + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 46.47 + mIoU(ms+flip): 47.27 + Config: configs/resnest/resnest_s101-d8_deeplabv3plus_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - S-101-D8 + - DeepLabV3+ + Training Resources: 4x V100 GPUS + Memory (GB): 16.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k/deeplabv3plus_s101-d8_512x512_160k_ade20k_20200807_144503-27b26226.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k/deeplabv3plus_s101-d8_512x512_160k_ade20k-20200807_144503.log.json + Paper: + Title: 'ResNeSt: Split-Attention Networks' + URL: https://arxiv.org/abs/2004.08955 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/resnest.py#L271 + Framework: PyTorch diff --git a/configs/resnest/resnest.yml b/configs/resnest/resnest.yml deleted file mode 100644 index ab897e3bd5..0000000000 --- a/configs/resnest/resnest.yml +++ /dev/null @@ -1,177 +0,0 @@ -Models: -- Name: resnest_s101-d8_fcn_4xb2-80k_cityscapes-512x1024 - In Collection: FCN - Metadata: - backbone: S-101-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 418.41 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 11.4 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 77.56 - mIoU(ms+flip): 78.98 - Config: configs/resnest/resnest_s101-d8_fcn_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x1024_80k_cityscapes/fcn_s101-d8_512x1024_80k_cityscapes_20200807_140631-f8d155b3.pth -- Name: resnest_s101-d8_pspnet_4xb2-80k_cityscapes512x1024 - In Collection: PSPNet - Metadata: - backbone: S-101-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 396.83 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 11.8 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.57 - mIoU(ms+flip): 79.19 - Config: configs/resnest/resnest_s101-d8_pspnet_4xb2-80k_cityscapes512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x1024_80k_cityscapes/pspnet_s101-d8_512x1024_80k_cityscapes_20200807_140631-c75f3b99.pth -- Name: resnest_s101-d8_deeplabv3_4xb2-80k_cityscapes-512x1024 - In Collection: DeepLabV3 - Metadata: - backbone: S-101-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 531.91 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 11.9 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.67 - mIoU(ms+flip): 80.51 - Config: configs/resnest/resnest_s101-d8_deeplabv3_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes/deeplabv3_s101-d8_512x1024_80k_cityscapes_20200807_144429-b73c4270.pth -- Name: resnest_s101-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024 - In Collection: DeepLabV3+ - Metadata: - backbone: S-101-D8 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 423.73 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 13.2 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.62 - mIoU(ms+flip): 80.27 - Config: configs/resnest/resnest_s101-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes/deeplabv3plus_s101-d8_512x1024_80k_cityscapes_20200807_144429-1239eb43.pth -- Name: resnest_s101-d8_fcn_4xb4-160k_ade20k-512x512 - In Collection: FCN - Metadata: - backbone: S-101-D8 - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 77.76 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 14.2 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 45.62 - mIoU(ms+flip): 46.16 - Config: configs/resnest/resnest_s101-d8_fcn_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x512_160k_ade20k/fcn_s101-d8_512x512_160k_ade20k_20200807_145416-d3160329.pth -- Name: resnest_s101-d8_pspnet_4xb4-160k_ade20k-512x512 - In Collection: PSPNet - Metadata: - backbone: S-101-D8 - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 76.8 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 14.2 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 45.44 - mIoU(ms+flip): 46.28 - Config: configs/resnest/resnest_s101-d8_pspnet_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x512_160k_ade20k/pspnet_s101-d8_512x512_160k_ade20k_20200807_145416-a6daa92a.pth -- Name: resnest_s101-d8_deeplabv3_4xb4-160k_ade20k-512x512 - In Collection: DeepLabV3 - Metadata: - backbone: S-101-D8 - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 107.76 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 14.6 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 45.71 - mIoU(ms+flip): 46.59 - Config: configs/resnest/resnest_s101-d8_deeplabv3_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x512_160k_ade20k/deeplabv3_s101-d8_512x512_160k_ade20k_20200807_144503-17ecabe5.pth -- Name: resnest_s101-d8_deeplabv3plus_4xb4-160k_ade20k-512x512 - In Collection: DeepLabV3+ - Metadata: - backbone: S-101-D8 - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 83.61 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 16.2 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 46.47 - mIoU(ms+flip): 47.27 - Config: configs/resnest/resnest_s101-d8_deeplabv3plus_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k/deeplabv3plus_s101-d8_512x512_160k_ade20k_20200807_144503-27b26226.pth diff --git a/configs/segformer/README.md b/configs/segformer/README.md index be64099da3..f8999b0efa 100644 --- a/configs/segformer/README.md +++ b/configs/segformer/README.md @@ -1,6 +1,6 @@ # SegFormer -[SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) +> [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) ## Introduction @@ -22,17 +22,6 @@ We present SegFormer, a simple, efficient yet powerful semantic segmentation fra -## Citation - -```bibtex -@article{xie2021segformer, - title={SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers}, - author={Xie, Enze and Wang, Wenhai and Yu, Zhiding and Anandkumar, Anima and Alvarez, Jose M and Luo, Ping}, - journal={arXiv preprint arXiv:2105.15203}, - year={2021} -} -``` - ## Usage To use other repositories' pre-trained models, it is necessary to convert keys. @@ -49,15 +38,15 @@ This script convert model from `PRETRAIN_PATH` and store the converted model in ### ADE20K -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| --------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ---------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| Segformer | MIT-B0 | 512x512 | 160000 | 2.1 | 51.32 | 37.41 | 38.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/segformer/segformer_mit-b0_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_512x512_160k_ade20k/segformer_mit-b0_512x512_160k_ade20k_20210726_101530-8ffa8fda.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_512x512_160k_ade20k/segformer_mit-b0_512x512_160k_ade20k_20210726_101530.log.json) | -| Segformer | MIT-B1 | 512x512 | 160000 | 2.6 | 47.66 | 40.97 | 42.54 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/segformer/segformer_mit-b1_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_512x512_160k_ade20k/segformer_mit-b1_512x512_160k_ade20k_20210726_112106-d70e859d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_512x512_160k_ade20k/segformer_mit-b1_512x512_160k_ade20k_20210726_112106.log.json) | -| Segformer | MIT-B2 | 512x512 | 160000 | 3.6 | 30.88 | 45.58 | 47.03 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/segformer/segformer_mit-b2_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_512x512_160k_ade20k/segformer_mit-b2_512x512_160k_ade20k_20210726_112103-cbd414ac.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_512x512_160k_ade20k/segformer_mit-b2_512x512_160k_ade20k_20210726_112103.log.json) | -| Segformer | MIT-B3 | 512x512 | 160000 | 4.8 | 22.11 | 47.82 | 48.81 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/segformer/segformer_mit-b3_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_512x512_160k_ade20k/segformer_mit-b3_512x512_160k_ade20k_20210726_081410-962b98d2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_512x512_160k_ade20k/segformer_mit-b3_512x512_160k_ade20k_20210726_081410.log.json) | -| Segformer | MIT-B4 | 512x512 | 160000 | 6.1 | 15.45 | 48.46 | 49.76 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/segformer/segformer_mit-b4_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_512x512_160k_ade20k/segformer_mit-b4_512x512_160k_ade20k_20210728_183055-7f509d7d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_512x512_160k_ade20k/segformer_mit-b4_512x512_160k_ade20k_20210728_183055.log.json) | -| Segformer | MIT-B5 | 512x512 | 160000 | 7.2 | 11.89 | 49.13 | 50.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/segformer/segformer_mit-b5_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_512x512_160k_ade20k/segformer_mit-b5_512x512_160k_ade20k_20210726_145235-94cedf59.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_512x512_160k_ade20k/segformer_mit-b5_512x512_160k_ade20k_20210726_145235.log.json) | -| Segformer | MIT-B5 | 640x640 | 160000 | 11.5 | 11.30 | 49.62 | 50.36 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/segformer/segformer_mit-b5_8xb2-160k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_640x640_160k_ade20k/segformer_mit-b5_640x640_160k_ade20k_20210801_121243-41d2845b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_640x640_160k_ade20k/segformer_mit-b5_640x640_160k_ade20k_20210801_121243.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------: | -------------- | -------- | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| Segformer | MIT-B0 | 512x512 | 160000 | 2.1 | 51.32 | 1080 Ti | 37.41 | 38.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segformer/segformer_mit-b0_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_512x512_160k_ade20k/segformer_mit-b0_512x512_160k_ade20k_20210726_101530-8ffa8fda.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_512x512_160k_ade20k/segformer_mit-b0_512x512_160k_ade20k_20210726_101530.log.json) | +| Segformer | MIT-B1 | 512x512 | 160000 | 2.6 | 47.66 | TITAN Xp | 40.97 | 42.54 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segformer/segformer_mit-b1_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_512x512_160k_ade20k/segformer_mit-b1_512x512_160k_ade20k_20210726_112106-d70e859d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_512x512_160k_ade20k/segformer_mit-b1_512x512_160k_ade20k_20210726_112106.log.json) | +| Segformer | MIT-B2 | 512x512 | 160000 | 3.6 | 30.88 | TITAN Xp | 45.58 | 47.03 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segformer/segformer_mit-b2_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_512x512_160k_ade20k/segformer_mit-b2_512x512_160k_ade20k_20210726_112103-cbd414ac.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_512x512_160k_ade20k/segformer_mit-b2_512x512_160k_ade20k_20210726_112103.log.json) | +| Segformer | MIT-B3 | 512x512 | 160000 | 4.8 | 22.11 | V100 | 47.82 | 48.81 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segformer/segformer_mit-b3_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_512x512_160k_ade20k/segformer_mit-b3_512x512_160k_ade20k_20210726_081410-962b98d2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_512x512_160k_ade20k/segformer_mit-b3_512x512_160k_ade20k_20210726_081410.log.json) | +| Segformer | MIT-B4 | 512x512 | 160000 | 6.1 | 15.45 | V100 | 48.46 | 49.76 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segformer/segformer_mit-b4_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_512x512_160k_ade20k/segformer_mit-b4_512x512_160k_ade20k_20210728_183055-7f509d7d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_512x512_160k_ade20k/segformer_mit-b4_512x512_160k_ade20k_20210728_183055.log.json) | +| Segformer | MIT-B5 | 512x512 | 160000 | 7.2 | 11.89 | V100 | 49.13 | 50.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segformer/segformer_mit-b5_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_512x512_160k_ade20k/segformer_mit-b5_512x512_160k_ade20k_20210726_145235-94cedf59.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_512x512_160k_ade20k/segformer_mit-b5_512x512_160k_ade20k_20210726_145235.log.json) | +| Segformer | MIT-B5 | 640x640 | 160000 | 11.5 | 11.30 | V100 | 49.62 | 50.36 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segformer/segformer_mit-b5_8xb2-160k_ade20k-640x640.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_640x640_160k_ade20k/segformer_mit-b5_640x640_160k_ade20k_20210801_121243-41d2845b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_640x640_160k_ade20k/segformer_mit-b5_640x640_160k_ade20k_20210801_121243.log.json) | Evaluation with AlignedResize: @@ -91,11 +80,22 @@ test_pipeline = [ The lower fps result is caused by the sliding window inference scheme (window size:1024x1024). -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| --------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| Segformer | MIT-B0 | 1024x1024 | 160000 | 3.64 | 4.74 | 76.54 | 78.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/segformer/segformer_mit-b0_8xb1-160k_cityscapes-1024x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes/segformer_mit-b0_8x1_1024x1024_160k_cityscapes_20211208_101857-e7f88502.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes/segformer_mit-b0_8x1_1024x1024_160k_cityscapes_20211208_101857.log.json) | -| Segformer | MIT-B1 | 1024x1024 | 160000 | 4.49 | 4.3 | 78.56 | 79.73 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/segformer/segformer_mit-b1_8xb1-160k_cityscapes-1024x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes/segformer_mit-b1_8x1_1024x1024_160k_cityscapes_20211208_064213-655c7b3f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes/segformer_mit-b1_8x1_1024x1024_160k_cityscapes_20211208_064213.log.json) | -| Segformer | MIT-B2 | 1024x1024 | 160000 | 7.42 | 3.36 | 81.08 | 82.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/segformer/segformer_mit-b2_8xb1-160k_cityscapes-1024x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes/segformer_mit-b2_8x1_1024x1024_160k_cityscapes_20211207_134205-6096669a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes/segformer_mit-b2_8x1_1024x1024_160k_cityscapes_20211207_134205.log.json) | -| Segformer | MIT-B3 | 1024x1024 | 160000 | 10.86 | 2.53 | 81.94 | 83.14 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/segformer/segformer_mit-b3_8xb1-160k_cityscapes-1024x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes/segformer_mit-b3_8x1_1024x1024_160k_cityscapes_20211206_224823-a8f8a177.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes/segformer_mit-b3_8x1_1024x1024_160k_cityscapes_20211206_224823.log.json) | -| Segformer | MIT-B4 | 1024x1024 | 160000 | 15.07 | 1.88 | 81.89 | 83.38 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/segformer/segformer_mit-b4_8xb1-160k_cityscapes-1024x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes/segformer_mit-b4_8x1_1024x1024_160k_cityscapes_20211207_080709-07f6c333.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes/segformer_mit-b4_8x1_1024x1024_160k_cityscapes_20211207_080709.log.json) | -| Segformer | MIT-B5 | 1024x1024 | 160000 | 18.00 | 1.39 | 82.25 | 83.48 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/segformer/segformer_mit-b5_8xb1-160k_cityscapes-1024x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes/segformer_mit-b5_8x1_1024x1024_160k_cityscapes_20211206_072934-87a052ec.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes/segformer_mit-b5_8x1_1024x1024_160k_cityscapes_20211206_072934.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------: | -------------- | ------ | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Segformer | MIT-B0 | 1024x1024 | 160000 | 3.64 | 4.74 | V100 | 76.54 | 78.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segformer/segformer_mit-b0_8xb1-160k_cityscapes-1024x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes/segformer_mit-b0_8x1_1024x1024_160k_cityscapes_20211208_101857-e7f88502.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes/segformer_mit-b0_8x1_1024x1024_160k_cityscapes_20211208_101857.log.json) | +| Segformer | MIT-B1 | 1024x1024 | 160000 | 4.49 | 4.3 | V100 | 78.56 | 79.73 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segformer/segformer_mit-b1_8xb1-160k_cityscapes-1024x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes/segformer_mit-b1_8x1_1024x1024_160k_cityscapes_20211208_064213-655c7b3f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes/segformer_mit-b1_8x1_1024x1024_160k_cityscapes_20211208_064213.log.json) | +| Segformer | MIT-B2 | 1024x1024 | 160000 | 7.42 | 3.36 | V100 | 81.08 | 82.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segformer/segformer_mit-b2_8xb1-160k_cityscapes-1024x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes/segformer_mit-b2_8x1_1024x1024_160k_cityscapes_20211207_134205-6096669a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes/segformer_mit-b2_8x1_1024x1024_160k_cityscapes_20211207_134205.log.json) | +| Segformer | MIT-B3 | 1024x1024 | 160000 | 10.86 | 2.53 | V100 | 81.94 | 83.14 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segformer/segformer_mit-b3_8xb1-160k_cityscapes-1024x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes/segformer_mit-b3_8x1_1024x1024_160k_cityscapes_20211206_224823-a8f8a177.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes/segformer_mit-b3_8x1_1024x1024_160k_cityscapes_20211206_224823.log.json) | +| Segformer | MIT-B4 | 1024x1024 | 160000 | 15.07 | 1.88 | V100 | 81.89 | 83.38 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segformer/segformer_mit-b4_8xb1-160k_cityscapes-1024x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes/segformer_mit-b4_8x1_1024x1024_160k_cityscapes_20211207_080709-07f6c333.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes/segformer_mit-b4_8x1_1024x1024_160k_cityscapes_20211207_080709.log.json) | +| Segformer | MIT-B5 | 1024x1024 | 160000 | 18.00 | 1.39 | V100 | 82.25 | 83.48 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segformer/segformer_mit-b5_8xb1-160k_cityscapes-1024x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes/segformer_mit-b5_8x1_1024x1024_160k_cityscapes_20211206_072934-87a052ec.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes/segformer_mit-b5_8x1_1024x1024_160k_cityscapes_20211206_072934.log.json) | + +## Citation + +```bibtex +@article{xie2021segformer, + title={SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers}, + author={Xie, Enze and Wang, Wenhai and Yu, Zhiding and Anandkumar, Anima and Alvarez, Jose M and Luo, Ping}, + journal={arXiv preprint arXiv:2105.15203}, + year={2021} +} +``` diff --git a/configs/segformer/metafile.yaml b/configs/segformer/metafile.yaml new file mode 100644 index 0000000000..7fb38d745b --- /dev/null +++ b/configs/segformer/metafile.yaml @@ -0,0 +1,340 @@ +Collections: +- Name: Segformer + License: Apache License 2.0 + Metadata: + Training Data: + - ADE20K + - Cityscapes + Paper: + Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers' + URL: https://arxiv.org/abs/2105.15203 + README: configs/segformer/README.md + Frameworks: + - PyTorch +Models: +- Name: segformer_mit-b0_8xb2-160k_ade20k-512x512 + In Collection: Segformer + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 37.41 + mIoU(ms+flip): 38.34 + Config: configs/segformer/segformer_mit-b0_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - MIT-B0 + - Segformer + Training Resources: 8x 1080 Ti GPUS + Memory (GB): 2.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_512x512_160k_ade20k/segformer_mit-b0_512x512_160k_ade20k_20210726_101530-8ffa8fda.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_512x512_160k_ade20k/segformer_mit-b0_512x512_160k_ade20k_20210726_101530.log.json + Paper: + Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers' + URL: https://arxiv.org/abs/2105.15203 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mit.py#L246 + Framework: PyTorch +- Name: segformer_mit-b1_8xb2-160k_ade20k-512x512 + In Collection: Segformer + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 40.97 + mIoU(ms+flip): 42.54 + Config: configs/segformer/segformer_mit-b1_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - MIT-B1 + - Segformer + Training Resources: 8x TITAN Xp GPUS + Memory (GB): 2.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_512x512_160k_ade20k/segformer_mit-b1_512x512_160k_ade20k_20210726_112106-d70e859d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_512x512_160k_ade20k/segformer_mit-b1_512x512_160k_ade20k_20210726_112106.log.json + Paper: + Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers' + URL: https://arxiv.org/abs/2105.15203 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mit.py#L246 + Framework: PyTorch +- Name: segformer_mit-b2_8xb2-160k_ade20k-512x512 + In Collection: Segformer + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.58 + mIoU(ms+flip): 47.03 + Config: configs/segformer/segformer_mit-b2_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - MIT-B2 + - Segformer + Training Resources: 8x TITAN Xp GPUS + Memory (GB): 3.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_512x512_160k_ade20k/segformer_mit-b2_512x512_160k_ade20k_20210726_112103-cbd414ac.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_512x512_160k_ade20k/segformer_mit-b2_512x512_160k_ade20k_20210726_112103.log.json + Paper: + Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers' + URL: https://arxiv.org/abs/2105.15203 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mit.py#L246 + Framework: PyTorch +- Name: segformer_mit-b3_8xb2-160k_ade20k-512x512 + In Collection: Segformer + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 47.82 + mIoU(ms+flip): 48.81 + Config: configs/segformer/segformer_mit-b3_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - MIT-B3 + - Segformer + Training Resources: 8x V100 GPUS + Memory (GB): 4.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_512x512_160k_ade20k/segformer_mit-b3_512x512_160k_ade20k_20210726_081410-962b98d2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_512x512_160k_ade20k/segformer_mit-b3_512x512_160k_ade20k_20210726_081410.log.json + Paper: + Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers' + URL: https://arxiv.org/abs/2105.15203 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mit.py#L246 + Framework: PyTorch +- Name: segformer_mit-b4_8xb2-160k_ade20k-512x512 + In Collection: Segformer + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 48.46 + mIoU(ms+flip): 49.76 + Config: configs/segformer/segformer_mit-b4_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - MIT-B4 + - Segformer + Training Resources: 8x V100 GPUS + Memory (GB): 6.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_512x512_160k_ade20k/segformer_mit-b4_512x512_160k_ade20k_20210728_183055-7f509d7d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_512x512_160k_ade20k/segformer_mit-b4_512x512_160k_ade20k_20210728_183055.log.json + Paper: + Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers' + URL: https://arxiv.org/abs/2105.15203 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mit.py#L246 + Framework: PyTorch +- Name: segformer_mit-b5_8xb2-160k_ade20k-512x512 + In Collection: Segformer + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 49.13 + mIoU(ms+flip): 50.22 + Config: configs/segformer/segformer_mit-b5_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - MIT-B5 + - Segformer + Training Resources: 8x V100 GPUS + Memory (GB): 7.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_512x512_160k_ade20k/segformer_mit-b5_512x512_160k_ade20k_20210726_145235-94cedf59.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_512x512_160k_ade20k/segformer_mit-b5_512x512_160k_ade20k_20210726_145235.log.json + Paper: + Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers' + URL: https://arxiv.org/abs/2105.15203 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mit.py#L246 + Framework: PyTorch +- Name: segformer_mit-b5_8xb2-160k_ade20k-640x640 + In Collection: Segformer + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 49.62 + mIoU(ms+flip): 50.36 + Config: configs/segformer/segformer_mit-b5_8xb2-160k_ade20k-640x640.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - MIT-B5 + - Segformer + Training Resources: 8x V100 GPUS + Memory (GB): 11.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_640x640_160k_ade20k/segformer_mit-b5_640x640_160k_ade20k_20210801_121243-41d2845b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_640x640_160k_ade20k/segformer_mit-b5_640x640_160k_ade20k_20210801_121243.log.json + Paper: + Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers' + URL: https://arxiv.org/abs/2105.15203 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mit.py#L246 + Framework: PyTorch +- Name: segformer_mit-b0_8xb1-160k_cityscapes-1024x1024 + In Collection: Segformer + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.54 + mIoU(ms+flip): 78.22 + Config: configs/segformer/segformer_mit-b0_8xb1-160k_cityscapes-1024x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - MIT-B0 + - Segformer + Training Resources: 8x V100 GPUS + Memory (GB): 3.64 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes/segformer_mit-b0_8x1_1024x1024_160k_cityscapes_20211208_101857-e7f88502.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes/segformer_mit-b0_8x1_1024x1024_160k_cityscapes_20211208_101857.log.json + Paper: + Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers' + URL: https://arxiv.org/abs/2105.15203 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mit.py#L246 + Framework: PyTorch +- Name: segformer_mit-b1_8xb1-160k_cityscapes-1024x1024 + In Collection: Segformer + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.56 + mIoU(ms+flip): 79.73 + Config: configs/segformer/segformer_mit-b1_8xb1-160k_cityscapes-1024x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - MIT-B1 + - Segformer + Training Resources: 8x V100 GPUS + Memory (GB): 4.49 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes/segformer_mit-b1_8x1_1024x1024_160k_cityscapes_20211208_064213-655c7b3f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes/segformer_mit-b1_8x1_1024x1024_160k_cityscapes_20211208_064213.log.json + Paper: + Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers' + URL: https://arxiv.org/abs/2105.15203 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mit.py#L246 + Framework: PyTorch +- Name: segformer_mit-b2_8xb1-160k_cityscapes-1024x1024 + In Collection: Segformer + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 81.08 + mIoU(ms+flip): 82.18 + Config: configs/segformer/segformer_mit-b2_8xb1-160k_cityscapes-1024x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - MIT-B2 + - Segformer + Training Resources: 8x V100 GPUS + Memory (GB): 7.42 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes/segformer_mit-b2_8x1_1024x1024_160k_cityscapes_20211207_134205-6096669a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes/segformer_mit-b2_8x1_1024x1024_160k_cityscapes_20211207_134205.log.json + Paper: + Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers' + URL: https://arxiv.org/abs/2105.15203 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mit.py#L246 + Framework: PyTorch +- Name: segformer_mit-b3_8xb1-160k_cityscapes-1024x1024 + In Collection: Segformer + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 81.94 + mIoU(ms+flip): 83.14 + Config: configs/segformer/segformer_mit-b3_8xb1-160k_cityscapes-1024x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - MIT-B3 + - Segformer + Training Resources: 8x V100 GPUS + Memory (GB): 10.86 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes/segformer_mit-b3_8x1_1024x1024_160k_cityscapes_20211206_224823-a8f8a177.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes/segformer_mit-b3_8x1_1024x1024_160k_cityscapes_20211206_224823.log.json + Paper: + Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers' + URL: https://arxiv.org/abs/2105.15203 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mit.py#L246 + Framework: PyTorch +- Name: segformer_mit-b4_8xb1-160k_cityscapes-1024x1024 + In Collection: Segformer + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 81.89 + mIoU(ms+flip): 83.38 + Config: configs/segformer/segformer_mit-b4_8xb1-160k_cityscapes-1024x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - MIT-B4 + - Segformer + Training Resources: 8x V100 GPUS + Memory (GB): 15.07 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes/segformer_mit-b4_8x1_1024x1024_160k_cityscapes_20211207_080709-07f6c333.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes/segformer_mit-b4_8x1_1024x1024_160k_cityscapes_20211207_080709.log.json + Paper: + Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers' + URL: https://arxiv.org/abs/2105.15203 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mit.py#L246 + Framework: PyTorch +- Name: segformer_mit-b5_8xb1-160k_cityscapes-1024x1024 + In Collection: Segformer + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 82.25 + mIoU(ms+flip): 83.48 + Config: configs/segformer/segformer_mit-b5_8xb1-160k_cityscapes-1024x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - MIT-B5 + - Segformer + Training Resources: 8x V100 GPUS + Memory (GB): 18.0 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes/segformer_mit-b5_8x1_1024x1024_160k_cityscapes_20211206_072934-87a052ec.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes/segformer_mit-b5_8x1_1024x1024_160k_cityscapes_20211206_072934.log.json + Paper: + Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers' + URL: https://arxiv.org/abs/2105.15203 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mit.py#L246 + Framework: PyTorch diff --git a/configs/segformer/segformer.yml b/configs/segformer/segformer.yml deleted file mode 100644 index 4a3818e16e..0000000000 --- a/configs/segformer/segformer.yml +++ /dev/null @@ -1,303 +0,0 @@ -Collections: -- Name: Segformer - Metadata: - Training Data: - - ADE20K - - Cityscapes - Paper: - URL: https://arxiv.org/abs/2105.15203 - Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with - Transformers' - README: configs/segformer/README.md - Code: - URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mit.py#L246 - Version: v0.17.0 - Converted From: - Code: https://github.com/NVlabs/SegFormer -Models: -- Name: segformer_mit-b0_8xb2-160k_ade20k-512x512 - In Collection: Segformer - Metadata: - backbone: MIT-B0 - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 19.49 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 2.1 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 37.41 - mIoU(ms+flip): 38.34 - Config: configs/segformer/segformer_mit-b0_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_512x512_160k_ade20k/segformer_mit-b0_512x512_160k_ade20k_20210726_101530-8ffa8fda.pth -- Name: segformer_mit-b1_8xb2-160k_ade20k-512x512 - In Collection: Segformer - Metadata: - backbone: MIT-B1 - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 20.98 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 2.6 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 40.97 - mIoU(ms+flip): 42.54 - Config: configs/segformer/segformer_mit-b1_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_512x512_160k_ade20k/segformer_mit-b1_512x512_160k_ade20k_20210726_112106-d70e859d.pth -- Name: segformer_mit-b2_8xb2-160k_ade20k-512x512 - In Collection: Segformer - Metadata: - backbone: MIT-B2 - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 32.38 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 3.6 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 45.58 - mIoU(ms+flip): 47.03 - Config: configs/segformer/segformer_mit-b2_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_512x512_160k_ade20k/segformer_mit-b2_512x512_160k_ade20k_20210726_112103-cbd414ac.pth -- Name: segformer_mit-b3_8xb2-160k_ade20k-512x512 - In Collection: Segformer - Metadata: - backbone: MIT-B3 - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 45.23 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 4.8 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 47.82 - mIoU(ms+flip): 48.81 - Config: configs/segformer/segformer_mit-b3_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_512x512_160k_ade20k/segformer_mit-b3_512x512_160k_ade20k_20210726_081410-962b98d2.pth -- Name: segformer_mit-b4_8xb2-160k_ade20k-512x512 - In Collection: Segformer - Metadata: - backbone: MIT-B4 - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 64.72 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 6.1 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 48.46 - mIoU(ms+flip): 49.76 - Config: configs/segformer/segformer_mit-b4_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_512x512_160k_ade20k/segformer_mit-b4_512x512_160k_ade20k_20210728_183055-7f509d7d.pth -- Name: segformer_mit-b5_8xb2-160k_ade20k-512x512 - In Collection: Segformer - Metadata: - backbone: MIT-B5 - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 84.1 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 7.2 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 49.13 - mIoU(ms+flip): 50.22 - Config: configs/segformer/segformer_mit-b5_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_512x512_160k_ade20k/segformer_mit-b5_512x512_160k_ade20k_20210726_145235-94cedf59.pth -- Name: segformer_mit-b5_8xb2-160k_ade20k-640x640 - In Collection: Segformer - Metadata: - backbone: MIT-B5 - crop size: (640,640) - lr schd: 160000 - inference time (ms/im): - - value: 88.5 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (640,640) - Training Memory (GB): 11.5 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 49.62 - mIoU(ms+flip): 50.36 - Config: configs/segformer/segformer_mit-b5_8xb2-160k_ade20k-640x640.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_640x640_160k_ade20k/segformer_mit-b5_640x640_160k_ade20k_20210801_121243-41d2845b.pth -- Name: segformer_mit-b0_8xb1-160k_cityscapes-1024x1024 - In Collection: Segformer - Metadata: - backbone: MIT-B0 - crop size: (1024,1024) - lr schd: 160000 - inference time (ms/im): - - value: 210.97 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (1024,1024) - Training Memory (GB): 3.64 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 76.54 - mIoU(ms+flip): 78.22 - Config: configs/segformer/segformer_mit-b0_8xb1-160k_cityscapes-1024x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes/segformer_mit-b0_8x1_1024x1024_160k_cityscapes_20211208_101857-e7f88502.pth -- Name: segformer_mit-b1_8xb1-160k_cityscapes-1024x1024 - In Collection: Segformer - Metadata: - backbone: MIT-B1 - crop size: (1024,1024) - lr schd: 160000 - inference time (ms/im): - - value: 232.56 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (1024,1024) - Training Memory (GB): 4.49 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.56 - mIoU(ms+flip): 79.73 - Config: configs/segformer/segformer_mit-b1_8xb1-160k_cityscapes-1024x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes/segformer_mit-b1_8x1_1024x1024_160k_cityscapes_20211208_064213-655c7b3f.pth -- Name: segformer_mit-b2_8xb1-160k_cityscapes-1024x1024 - In Collection: Segformer - Metadata: - backbone: MIT-B2 - crop size: (1024,1024) - lr schd: 160000 - inference time (ms/im): - - value: 297.62 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (1024,1024) - Training Memory (GB): 7.42 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 81.08 - mIoU(ms+flip): 82.18 - Config: configs/segformer/segformer_mit-b2_8xb1-160k_cityscapes-1024x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes/segformer_mit-b2_8x1_1024x1024_160k_cityscapes_20211207_134205-6096669a.pth -- Name: segformer_mit-b3_8xb1-160k_cityscapes-1024x1024 - In Collection: Segformer - Metadata: - backbone: MIT-B3 - crop size: (1024,1024) - lr schd: 160000 - inference time (ms/im): - - value: 395.26 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (1024,1024) - Training Memory (GB): 10.86 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 81.94 - mIoU(ms+flip): 83.14 - Config: configs/segformer/segformer_mit-b3_8xb1-160k_cityscapes-1024x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes/segformer_mit-b3_8x1_1024x1024_160k_cityscapes_20211206_224823-a8f8a177.pth -- Name: segformer_mit-b4_8xb1-160k_cityscapes-1024x1024 - In Collection: Segformer - Metadata: - backbone: MIT-B4 - crop size: (1024,1024) - lr schd: 160000 - inference time (ms/im): - - value: 531.91 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (1024,1024) - Training Memory (GB): 15.07 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 81.89 - mIoU(ms+flip): 83.38 - Config: configs/segformer/segformer_mit-b4_8xb1-160k_cityscapes-1024x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes/segformer_mit-b4_8x1_1024x1024_160k_cityscapes_20211207_080709-07f6c333.pth -- Name: segformer_mit-b5_8xb1-160k_cityscapes-1024x1024 - In Collection: Segformer - Metadata: - backbone: MIT-B5 - crop size: (1024,1024) - lr schd: 160000 - inference time (ms/im): - - value: 719.42 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (1024,1024) - Training Memory (GB): 18.0 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 82.25 - mIoU(ms+flip): 83.48 - Config: configs/segformer/segformer_mit-b5_8xb1-160k_cityscapes-1024x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes/segformer_mit-b5_8x1_1024x1024_160k_cityscapes_20211206_072934-87a052ec.pth diff --git a/configs/segmenter/README.md b/configs/segmenter/README.md index 984ef9f510..103b125472 100644 --- a/configs/segmenter/README.md +++ b/configs/segmenter/README.md @@ -1,6 +1,6 @@ # Segmenter -[Segmenter: Transformer for Semantic Segmentation](https://arxiv.org/abs/2105.05633) +> [Segmenter: Transformer for Semantic Segmentation](https://arxiv.org/abs/2105.05633) ## Introduction @@ -22,16 +22,6 @@ Image segmentation is often ambiguous at the level of individual image patches a -```bibtex -@inproceedings{strudel2021segmenter, - title={Segmenter: Transformer for semantic segmentation}, - author={Strudel, Robin and Garcia, Ricardo and Laptev, Ivan and Schmid, Cordelia}, - booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, - pages={7262--7272}, - year={2021} -} -``` - ## Usage We have provided pretrained models converted from [ViT-AugReg](https://github.com/rwightman/pytorch-image-models/blob/f55c22bebf9d8afc449d317a723231ef72e0d662/timm/models/vision_transformer.py#L54-L106). @@ -54,21 +44,33 @@ This script convert model from `PRETRAIN_PATH` and store the converted model in In our default setting, pretrained models and their corresponding [ViT-AugReg](https://github.com/rwightman/pytorch-image-models/blob/f55c22bebf9d8afc449d317a723231ef72e0d662/timm/models/vision_transformer.py#L54-L106) models could be defined below: -| pretrained models | original models | -| --------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| vit_tiny_p16_384.pth | ['vit_tiny_patch16_384'](https://storage.googleapis.com/vit_models/augreg/Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npz) | -| vit_small_p16_384.pth | ['vit_small_patch16_384'](https://storage.googleapis.com/vit_models/augreg/S_16-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npz) | -| vit_base_p16_384.pth | ['vit_base_patch16_384'](https://storage.googleapis.com/vit_models/augreg/B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_384.npz) | -| vit_large_p16_384.pth | ['vit_large_patch16_384'](https://storage.googleapis.com/vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz) | +| pretrained models | original models | +| --------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| vit_tiny_p16_384.pth | [vit_tiny_patch16_384](https://storage.googleapis.com/vit_models/augreg/Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npz) | +| vit_small_p16_384.pth | [vit_small_patch16_384](https://storage.googleapis.com/vit_models/augreg/S_16-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npz) | +| vit_base_p16_384.pth | [vit_base_patch16_384](https://storage.googleapis.com/vit_models/augreg/B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_384.npz) | +| vit_large_p16_384.pth | [vit_large_patch16_384](https://storage.googleapis.com/vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz) | ## Results and models ### ADE20K -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ---------------- | -------- | --------- | ------- | -------- | -------------- | ----- | ------------- | -------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| Segmenter Mask | ViT-T_16 | 512x512 | 160000 | 1.21 | 27.98 | 39.99 | 40.83 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/segmenter/segmenter_vit-t_mask_8xb1-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k/segmenter_vit-t_mask_8x1_512x512_160k_ade20k_20220105_151706-ffcf7509.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k/segmenter_vit-t_mask_8x1_512x512_160k_ade20k_20220105_151706.log.json) | -| Segmenter Linear | ViT-S_16 | 512x512 | 160000 | 1.78 | 28.07 | 45.75 | 46.82 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/segmenter/segmenter_vit-s_fcn_8xb1-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-s_linear_8x1_512x512_160k_ade20k/segmenter_vit-s_linear_8x1_512x512_160k_ade20k_20220105_151713-39658c46.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-s_linear_8x1_512x512_160k_ade20k/segmenter_vit-s_linear_8x1_512x512_160k_ade20k_20220105_151713.log.json) | -| Segmenter Mask | ViT-S_16 | 512x512 | 160000 | 2.03 | 24.80 | 46.19 | 47.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/segmenter/segmenter_vit-s_mask_8xb1-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k/segmenter_vit-s_mask_8x1_512x512_160k_ade20k_20220105_151706-511bb103.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k/segmenter_vit-s_mask_8x1_512x512_160k_ade20k_20220105_151706.log.json) | -| Segmenter Mask | ViT-B_16 | 512x512 | 160000 | 4.20 | 13.20 | 49.60 | 51.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/segmenter/segmenter_vit-b_mask_8xb1-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-b_mask_8x1_512x512_160k_ade20k/segmenter_vit-b_mask_8x1_512x512_160k_ade20k_20220105_151706-bc533b08.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-b_mask_8x1_512x512_160k_ade20k/segmenter_vit-b_mask_8x1_512x512_160k_ade20k_20220105_151706.log.json) | -| Segmenter Mask | ViT-L_16 | 640x640 | 160000 | 16.56 | 2.62 | 52.16 | 53.65 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/segmenter/segmenter_vit-l_mask_8xb1-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-l_mask_8x1_512x512_160k_ade20k/segmenter_vit-l_mask_8x1_512x512_160k_ade20k_20220105_162750-7ef345be.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-l_mask_8x1_512x512_160k_ade20k/segmenter_vit-l_mask_8x1_512x512_160k_ade20k_20220105_162750.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------------- | -------- | --------- | ------- | -------- | -------------- | ------ | ----- | ------------- | ----------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Segmenter Mask | ViT-T_16 | 512x512 | 160000 | 1.21 | 27.98 | V100 | 39.99 | 40.83 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segmenter/segmenter_vit-t_mask_8xb1-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k/segmenter_vit-t_mask_8x1_512x512_160k_ade20k_20220105_151706-ffcf7509.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k/segmenter_vit-t_mask_8x1_512x512_160k_ade20k_20220105_151706.log.json) | +| Segmenter Linear | ViT-S_16 | 512x512 | 160000 | 1.78 | 28.07 | V100 | 45.75 | 46.82 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segmenter/segmenter_vit-s_fcn_8xb1-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-s_linear_8x1_512x512_160k_ade20k/segmenter_vit-s_linear_8x1_512x512_160k_ade20k_20220105_151713-39658c46.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-s_linear_8x1_512x512_160k_ade20k/segmenter_vit-s_linear_8x1_512x512_160k_ade20k_20220105_151713.log.json) | +| Segmenter Mask | ViT-S_16 | 512x512 | 160000 | 2.03 | 24.80 | V100 | 46.19 | 47.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segmenter/segmenter_vit-s_mask_8xb1-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k/segmenter_vit-s_mask_8x1_512x512_160k_ade20k_20220105_151706-511bb103.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k/segmenter_vit-s_mask_8x1_512x512_160k_ade20k_20220105_151706.log.json) | +| Segmenter Mask | ViT-B_16 | 512x512 | 160000 | 4.20 | 13.20 | V100 | 49.60 | 51.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segmenter/segmenter_vit-b_mask_8xb1-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-b_mask_8x1_512x512_160k_ade20k/segmenter_vit-b_mask_8x1_512x512_160k_ade20k_20220105_151706-bc533b08.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-b_mask_8x1_512x512_160k_ade20k/segmenter_vit-b_mask_8x1_512x512_160k_ade20k_20220105_151706.log.json) | +| Segmenter Mask | ViT-L_16 | 640x640 | 160000 | 16.56 | 2.62 | V100 | 52.16 | 53.65 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segmenter/segmenter_vit-l_mask_8xb1-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-l_mask_8x1_512x512_160k_ade20k/segmenter_vit-l_mask_8x1_512x512_160k_ade20k_20220105_162750-7ef345be.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-l_mask_8x1_512x512_160k_ade20k/segmenter_vit-l_mask_8x1_512x512_160k_ade20k_20220105_162750.log.json) | + +## Citation + +```bibtex +@inproceedings{strudel2021segmenter, + title={Segmenter: Transformer for semantic segmentation}, + author={Strudel, Robin and Garcia, Ricardo and Laptev, Ivan and Schmid, Cordelia}, + booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, + pages={7262--7272}, + year={2021} +} +``` diff --git a/configs/segmenter/metafile.yaml b/configs/segmenter/metafile.yaml new file mode 100644 index 0000000000..ff2aa448bb --- /dev/null +++ b/configs/segmenter/metafile.yaml @@ -0,0 +1,138 @@ +Collections: +- Name: Segmenter + License: Apache License 2.0 + Metadata: + Training Data: + - ADE20K + Paper: + Title: 'Segmenter: Transformer for Semantic Segmentation' + URL: https://arxiv.org/abs/2105.05633 + README: configs/segmenter/README.md + Frameworks: + - PyTorch +Models: +- Name: segmenter_vit-t_mask_8xb1-160k_ade20k-512x512 + In Collection: Segmenter + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 39.99 + mIoU(ms+flip): 40.83 + Config: configs/segmenter/segmenter_vit-t_mask_8xb1-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 8 + Architecture: + - ViT-T_16 + - Segmenter + - Mask + Training Resources: 8x V100 GPUS + Memory (GB): 1.21 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k/segmenter_vit-t_mask_8x1_512x512_160k_ade20k_20220105_151706-ffcf7509.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k/segmenter_vit-t_mask_8x1_512x512_160k_ade20k_20220105_151706.log.json + Paper: + Title: 'Segmenter: Transformer for Semantic Segmentation' + URL: https://arxiv.org/abs/2105.05633 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.21.0/mmseg/models/decode_heads/segmenter_mask_head.py#L15 + Framework: PyTorch +- Name: segmenter_vit-s_fcn_8xb1-160k_ade20k-512x512 + In Collection: Segmenter + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.75 + mIoU(ms+flip): 46.82 + Config: configs/segmenter/segmenter_vit-s_fcn_8xb1-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 8 + Architecture: + - ViT-S_16 + - Segmenter + - Linear + Training Resources: 8x V100 GPUS + Memory (GB): 1.78 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-s_linear_8x1_512x512_160k_ade20k/segmenter_vit-s_linear_8x1_512x512_160k_ade20k_20220105_151713-39658c46.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-s_linear_8x1_512x512_160k_ade20k/segmenter_vit-s_linear_8x1_512x512_160k_ade20k_20220105_151713.log.json + Paper: + Title: 'Segmenter: Transformer for Semantic Segmentation' + URL: https://arxiv.org/abs/2105.05633 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.21.0/mmseg/models/decode_heads/segmenter_mask_head.py#L15 + Framework: PyTorch +- Name: segmenter_vit-s_mask_8xb1-160k_ade20k-512x512 + In Collection: Segmenter + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 46.19 + mIoU(ms+flip): 47.85 + Config: configs/segmenter/segmenter_vit-s_mask_8xb1-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 8 + Architecture: + - ViT-S_16 + - Segmenter + - Mask + Training Resources: 8x V100 GPUS + Memory (GB): 2.03 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k/segmenter_vit-s_mask_8x1_512x512_160k_ade20k_20220105_151706-511bb103.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k/segmenter_vit-s_mask_8x1_512x512_160k_ade20k_20220105_151706.log.json + Paper: + Title: 'Segmenter: Transformer for Semantic Segmentation' + URL: https://arxiv.org/abs/2105.05633 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.21.0/mmseg/models/decode_heads/segmenter_mask_head.py#L15 + Framework: PyTorch +- Name: segmenter_vit-b_mask_8xb1-160k_ade20k-512x512 + In Collection: Segmenter + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 49.6 + mIoU(ms+flip): 51.07 + Config: configs/segmenter/segmenter_vit-b_mask_8xb1-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 8 + Architecture: + - ViT-B_16 + - Segmenter + - Mask + Training Resources: 8x V100 GPUS + Memory (GB): 4.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-b_mask_8x1_512x512_160k_ade20k/segmenter_vit-b_mask_8x1_512x512_160k_ade20k_20220105_151706-bc533b08.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-b_mask_8x1_512x512_160k_ade20k/segmenter_vit-b_mask_8x1_512x512_160k_ade20k_20220105_151706.log.json + Paper: + Title: 'Segmenter: Transformer for Semantic Segmentation' + URL: https://arxiv.org/abs/2105.05633 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.21.0/mmseg/models/decode_heads/segmenter_mask_head.py#L15 + Framework: PyTorch +- Name: segmenter_vit-l_mask_8xb1-160k_ade20k-512x512 + In Collection: Segmenter + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 52.16 + mIoU(ms+flip): 53.65 + Config: configs/segmenter/segmenter_vit-l_mask_8xb1-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 8 + Architecture: + - ViT-L_16 + - Segmenter + - Mask + Training Resources: 8x V100 GPUS + Memory (GB): 16.56 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-l_mask_8x1_512x512_160k_ade20k/segmenter_vit-l_mask_8x1_512x512_160k_ade20k_20220105_162750-7ef345be.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-l_mask_8x1_512x512_160k_ade20k/segmenter_vit-l_mask_8x1_512x512_160k_ade20k_20220105_162750.log.json + Paper: + Title: 'Segmenter: Transformer for Semantic Segmentation' + URL: https://arxiv.org/abs/2105.05633 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.21.0/mmseg/models/decode_heads/segmenter_mask_head.py#L15 + Framework: PyTorch diff --git a/configs/segmenter/segmenter.yml b/configs/segmenter/segmenter.yml deleted file mode 100644 index 1069f003b7..0000000000 --- a/configs/segmenter/segmenter.yml +++ /dev/null @@ -1,125 +0,0 @@ -Collections: -- Name: Segmenter - Metadata: - Training Data: - - ADE20K - Paper: - URL: https://arxiv.org/abs/2105.05633 - Title: 'Segmenter: Transformer for Semantic Segmentation' - README: configs/segmenter/README.md - Code: - URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.21.0/mmseg/models/decode_heads/segmenter_mask_head.py#L15 - Version: v0.21.0 - Converted From: - Code: https://github.com/rstrudel/segmenter -Models: -- Name: segmenter_vit-t_mask_8xb1-160k_ade20k-512x512 - In Collection: Segmenter - Metadata: - backbone: ViT-T_16 - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 35.74 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 1.21 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 39.99 - mIoU(ms+flip): 40.83 - Config: configs/segmenter/segmenter_vit-t_mask_8xb1-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k/segmenter_vit-t_mask_8x1_512x512_160k_ade20k_20220105_151706-ffcf7509.pth -- Name: segmenter_vit-s_fcn_8xb1-160k_ade20k-512x512 - In Collection: Segmenter - Metadata: - backbone: ViT-S_16 - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 35.63 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 1.78 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 45.75 - mIoU(ms+flip): 46.82 - Config: configs/segmenter/segmenter_vit-s_fcn_8xb1-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-s_linear_8x1_512x512_160k_ade20k/segmenter_vit-s_linear_8x1_512x512_160k_ade20k_20220105_151713-39658c46.pth -- Name: segmenter_vit-s_mask_8xb1-160k_ade20k-512x512 - In Collection: Segmenter - Metadata: - backbone: ViT-S_16 - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 40.32 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 2.03 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 46.19 - mIoU(ms+flip): 47.85 - Config: configs/segmenter/segmenter_vit-s_mask_8xb1-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k/segmenter_vit-s_mask_8x1_512x512_160k_ade20k_20220105_151706-511bb103.pth -- Name: segmenter_vit-b_mask_8xb1-160k_ade20k-512x512 - In Collection: Segmenter - Metadata: - backbone: ViT-B_16 - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 75.76 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 4.2 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 49.6 - mIoU(ms+flip): 51.07 - Config: configs/segmenter/segmenter_vit-b_mask_8xb1-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-b_mask_8x1_512x512_160k_ade20k/segmenter_vit-b_mask_8x1_512x512_160k_ade20k_20220105_151706-bc533b08.pth -- Name: segmenter_vit-l_mask_8xb1-160k_ade20k-512x512 - In Collection: Segmenter - Metadata: - backbone: ViT-L_16 - crop size: (640,640) - lr schd: 160000 - inference time (ms/im): - - value: 381.68 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (640,640) - Training Memory (GB): 16.56 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 52.16 - mIoU(ms+flip): 53.65 - Config: configs/segmenter/segmenter_vit-l_mask_8xb1-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-l_mask_8x1_512x512_160k_ade20k/segmenter_vit-l_mask_8x1_512x512_160k_ade20k_20220105_162750-7ef345be.pth diff --git a/configs/segnext/README.md b/configs/segnext/README.md new file mode 100644 index 0000000000..d7434a0621 --- /dev/null +++ b/configs/segnext/README.md @@ -0,0 +1,63 @@ +# SegNeXt + +> [SegNeXt: Rethinking Convolutional Attention Design for Semantic Segmentation](https://arxiv.org/abs/2209.08575) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +We present SegNeXt, a simple convolutional network architecture for semantic segmentation. Recent transformer-based models have dominated the field of semantic segmentation due to the efficiency of self-attention in encoding spatial information. In this paper, we show that convolutional attention is a more efficient and effective way to encode contextual information than the self-attention mechanism in transformers. By re-examining the characteristics owned by successful segmentation models, we discover several key components leading to the performance improvement of segmentation models. This motivates us to design a novel convolutional attention network that uses cheap convolutional operations. Without bells and whistles, our SegNeXt significantly improves the performance of previous state-of-the-art methods on popular benchmarks, including ADE20K, Cityscapes, COCO-Stuff, Pascal VOC, Pascal Context, and iSAID. Notably, SegNeXt outperforms EfficientNet-L2 w/ NAS-FPN and achieves 90.6% mIoU on the Pascal VOC 2012 test leaderboard using only 1/10 parameters of it. On average, SegNeXt achieves about 2.0% mIoU improvements compared to the state-of-the-art methods on the ADE20K datasets with the same or fewer computations. Code is available at [this https URL](https://github.com/uyzhang/JSeg) (Jittor) and [this https URL](https://github.com/Visual-Attention-Network/SegNeXt) (Pytorch). + + + +
+ +
+ +## Results and models + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------- | -------- | --------- | ------- | -------- | -------------- | ------ | ----- | ------------- | ----------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| SegNeXt | MSCAN-T | 512x512 | 160000 | 17.88 | 52.38 | A100 | 41.50 | 42.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segnext/segnext_mscan-t_1xb16-adamw-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-t_1x16_512x512_adamw_160k_ade20k/segnext_mscan-t_1x16_512x512_adamw_160k_ade20k_20230210_140244-05bd8466.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-t_1x16_512x512_adamw_160k_ade20k/segnext_mscan-t_1x16_512x512_adamw_160k_ade20k_20230210_140244.log.json) | +| SegNeXt | MSCAN-S | 512x512 | 160000 | 21.47 | 42.27 | A100 | 44.16 | 45.81 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segnext/segnext_mscan-s_1xb16-adamw-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-s_1x16_512x512_adamw_160k_ade20k/segnext_mscan-s_1x16_512x512_adamw_160k_ade20k_20230214_113014-43013668.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-s_1x16_512x512_adamw_160k_ade20k/segnext_mscan-s_1x16_512x512_adamw_160k_ade20k_20230214_113014.log.json) | +| SegNeXt | MSCAN-B | 512x512 | 160000 | 31.03 | 35.15 | A100 | 48.03 | 49.68 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segnext/segnext_mscan-b_1xb16-adamw-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-b_1x16_512x512_adamw_160k_ade20k/segnext_mscan-b_1x16_512x512_adamw_160k_ade20k_20230209_172053-b6f6c70c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-b_1x16_512x512_adamw_160k_ade20k/segnext_mscan-b_1x16_512x512_adamw_160k_ade20k_20230209_172053.log.json) | +| SegNeXt | MSCAN-L | 512x512 | 160000 | 43.32 | 22.91 | A100 | 50.99 | 52.10 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segnext/segnext_mscan-l_1xb16-adamw-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-l_1x16_512x512_adamw_160k_ade20k/segnext_mscan-l_1x16_512x512_adamw_160k_ade20k_20230209_172055-19b14b63.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-l_1x16_512x512_adamw_160k_ade20k/segnext_mscan-l_1x16_512x512_adamw_160k_ade20k_20230209_172055.log.json) | + +Note: + +- When we integrated SegNeXt into MMSegmentation, we modified some layers' names to make them more precise and concise without changing the model architecture. Therefore, the keys of pre-trained weights are different from the [original weights](https://cloud.tsinghua.edu.cn/d/c15b25a6745946618462/), but don't worry about these changes. we have converted them and uploaded the checkpoints, you might find URL of pre-trained checkpoints in config files and can use them directly for training. + +- The total batch size is 16. We trained for SegNeXt with a single GPU as the performance degrades significantly when using`SyncBN` (mainly in `OverlapPatchEmbed` modules of `MSCAN`) of PyTorch 1.9. + +- There will be subtle differences when model testing as Non-negative Matrix Factorization (NMF) in `LightHamHead` will be initialized randomly. To control this randomness, please set the random seed when model testing. You can modify [`./tools/test.py`](https://github.com/open-mmlab/mmsegmentation/blob/main/tools/test.py) like: + +```python +def main(): + from mmengine.runner import seg_random_seed + random_seed = xxx # set random seed recorded in training log + set_random_seed(random_seed, deterministic=False) + ... +``` + +- This model performance is sensitive to the seed values used, please refer to the log file for the specific settings of the seed. If you choose a different seed, the results might differ from the table results. Take SegNeXt Large for example, its results range from 49.60 to 51.0. + +## Citation + +```bibtex +@article{guo2022segnext, + title={SegNeXt: Rethinking Convolutional Attention Design for Semantic Segmentation}, + author={Guo, Meng-Hao and Lu, Cheng-Ze and Hou, Qibin and Liu, Zhengning and Cheng, Ming-Ming and Hu, Shi-Min}, + journal={arXiv preprint arXiv:2209.08575}, + year={2022} +} +``` diff --git a/configs/segnext/metafile.yaml b/configs/segnext/metafile.yaml new file mode 100644 index 0000000000..3c8ff5bb92 --- /dev/null +++ b/configs/segnext/metafile.yaml @@ -0,0 +1,109 @@ +Collections: +- Name: SegNeXt + License: Apache License 2.0 + Metadata: + Training Data: + - ADE20K + Paper: + Title: 'SegNeXt: Rethinking Convolutional Attention Design for Semantic Segmentation' + URL: https://arxiv.org/abs/2209.08575 + README: configs/segnext/README.md + Frameworks: + - PyTorch +Models: +- Name: segnext_mscan-t_1xb16-adamw-160k_ade20k-512x512 + In Collection: SegNeXt + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.5 + mIoU(ms+flip): 42.59 + Config: configs/segnext/segnext_mscan-t_1xb16-adamw-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - MSCAN-T + - SegNeXt + Training Resources: 1x A100 GPUS + Memory (GB): 17.88 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-t_1x16_512x512_adamw_160k_ade20k/segnext_mscan-t_1x16_512x512_adamw_160k_ade20k_20230210_140244-05bd8466.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-t_1x16_512x512_adamw_160k_ade20k/segnext_mscan-t_1x16_512x512_adamw_160k_ade20k_20230210_140244.log.json + Paper: + Title: 'SegNeXt: Rethinking Convolutional Attention Design for Semantic Segmentation' + URL: https://arxiv.org/abs/2209.08575 + Code: https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/models/backbones/mscan.py#L328 + Framework: PyTorch +- Name: segnext_mscan-s_1xb16-adamw-160k_ade20k-512x512 + In Collection: SegNeXt + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 44.16 + mIoU(ms+flip): 45.81 + Config: configs/segnext/segnext_mscan-s_1xb16-adamw-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - MSCAN-S + - SegNeXt + Training Resources: 1x A100 GPUS + Memory (GB): 21.47 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-s_1x16_512x512_adamw_160k_ade20k/segnext_mscan-s_1x16_512x512_adamw_160k_ade20k_20230214_113014-43013668.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-s_1x16_512x512_adamw_160k_ade20k/segnext_mscan-s_1x16_512x512_adamw_160k_ade20k_20230214_113014.log.json + Paper: + Title: 'SegNeXt: Rethinking Convolutional Attention Design for Semantic Segmentation' + URL: https://arxiv.org/abs/2209.08575 + Code: https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/models/backbones/mscan.py#L328 + Framework: PyTorch +- Name: segnext_mscan-b_1xb16-adamw-160k_ade20k-512x512 + In Collection: SegNeXt + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 48.03 + mIoU(ms+flip): 49.68 + Config: configs/segnext/segnext_mscan-b_1xb16-adamw-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - MSCAN-B + - SegNeXt + Training Resources: 1x A100 GPUS + Memory (GB): 31.03 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-b_1x16_512x512_adamw_160k_ade20k/segnext_mscan-b_1x16_512x512_adamw_160k_ade20k_20230209_172053-b6f6c70c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-b_1x16_512x512_adamw_160k_ade20k/segnext_mscan-b_1x16_512x512_adamw_160k_ade20k_20230209_172053.log.json + Paper: + Title: 'SegNeXt: Rethinking Convolutional Attention Design for Semantic Segmentation' + URL: https://arxiv.org/abs/2209.08575 + Code: https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/models/backbones/mscan.py#L328 + Framework: PyTorch +- Name: segnext_mscan-l_1xb16-adamw-160k_ade20k-512x512 + In Collection: SegNeXt + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 50.99 + mIoU(ms+flip): 52.1 + Config: configs/segnext/segnext_mscan-l_1xb16-adamw-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - MSCAN-L + - SegNeXt + Training Resources: 1x A100 GPUS + Memory (GB): 43.32 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-l_1x16_512x512_adamw_160k_ade20k/segnext_mscan-l_1x16_512x512_adamw_160k_ade20k_20230209_172055-19b14b63.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/segnext/segnext_mscan-l_1x16_512x512_adamw_160k_ade20k/segnext_mscan-l_1x16_512x512_adamw_160k_ade20k_20230209_172055.log.json + Paper: + Title: 'SegNeXt: Rethinking Convolutional Attention Design for Semantic Segmentation' + URL: https://arxiv.org/abs/2209.08575 + Code: https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/models/backbones/mscan.py#L328 + Framework: PyTorch diff --git a/configs/segnext/segnext_mscan-b_1xb16-adamw-160k_ade20k-512x512.py b/configs/segnext/segnext_mscan-b_1xb16-adamw-160k_ade20k-512x512.py new file mode 100644 index 0000000000..000f448483 --- /dev/null +++ b/configs/segnext/segnext_mscan-b_1xb16-adamw-160k_ade20k-512x512.py @@ -0,0 +1,28 @@ +_base_ = './segnext_mscan-t_1xb16-adamw-160k_ade20k-512x512.py' + +# model settings +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segnext/mscan_b_20230227-3ab7d230.pth' # noqa +ham_norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + embed_dims=[64, 128, 320, 512], + depths=[3, 3, 12, 3], + init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), + drop_path_rate=0.1, + norm_cfg=dict(type='BN', requires_grad=True)), + decode_head=dict( + type='LightHamHead', + in_channels=[128, 320, 512], + in_index=[1, 2, 3], + channels=512, + ham_channels=512, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=ham_norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/segnext/segnext_mscan-l_1xb16-adamw-160k_ade20k-512x512.py b/configs/segnext/segnext_mscan-l_1xb16-adamw-160k_ade20k-512x512.py new file mode 100644 index 0000000000..212d0a8557 --- /dev/null +++ b/configs/segnext/segnext_mscan-l_1xb16-adamw-160k_ade20k-512x512.py @@ -0,0 +1,27 @@ +_base_ = './segnext_mscan-t_1xb16-adamw-160k_ade20k-512x512.py' +# model settings +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segnext/mscan_l_20230227-cef260d4.pth' # noqa +ham_norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + embed_dims=[64, 128, 320, 512], + depths=[3, 5, 27, 3], + init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), + drop_path_rate=0.3, + norm_cfg=dict(type='BN', requires_grad=True)), + decode_head=dict( + type='LightHamHead', + in_channels=[128, 320, 512], + in_index=[1, 2, 3], + channels=1024, + ham_channels=1024, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=ham_norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/segnext/segnext_mscan-s_1xb16-adamw-160k_ade20k-512x512.py b/configs/segnext/segnext_mscan-s_1xb16-adamw-160k_ade20k-512x512.py new file mode 100644 index 0000000000..9a90779a60 --- /dev/null +++ b/configs/segnext/segnext_mscan-s_1xb16-adamw-160k_ade20k-512x512.py @@ -0,0 +1,27 @@ +_base_ = './segnext_mscan-t_1xb16-adamw-160k_ade20k-512x512.py' +# model settings +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segnext/mscan_s_20230227-f33ccdf2.pth' # noqa +ham_norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + embed_dims=[64, 128, 320, 512], + depths=[2, 2, 4, 2], + init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), + norm_cfg=dict(type='BN', requires_grad=True)), + decode_head=dict( + type='LightHamHead', + in_channels=[128, 320, 512], + in_index=[1, 2, 3], + channels=256, + ham_channels=256, + ham_kwargs=dict(MD_R=16), + dropout_ratio=0.1, + num_classes=150, + norm_cfg=ham_norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/segnext/segnext_mscan-t_1xb16-adamw-160k_ade20k-512x512.py b/configs/segnext/segnext_mscan-t_1xb16-adamw-160k_ade20k-512x512.py new file mode 100644 index 0000000000..c8d6da85ff --- /dev/null +++ b/configs/segnext/segnext_mscan-t_1xb16-adamw-160k_ade20k-512x512.py @@ -0,0 +1,84 @@ +_base_ = [ + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py', + '../_base_/datasets/ade20k.py' +] +# model settings +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segnext/mscan_t_20230227-119e8c9f.pth' # noqa +ham_norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +crop_size = (512, 512) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255, + size=(512, 512), + test_cfg=dict(size_divisor=32)) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained=None, + backbone=dict( + type='MSCAN', + init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), + embed_dims=[32, 64, 160, 256], + mlp_ratios=[8, 8, 4, 4], + drop_rate=0.0, + drop_path_rate=0.1, + depths=[3, 3, 5, 2], + attention_kernel_sizes=[5, [1, 7], [1, 11], [1, 21]], + attention_kernel_paddings=[2, [0, 3], [0, 5], [0, 10]], + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='BN', requires_grad=True)), + decode_head=dict( + type='LightHamHead', + in_channels=[64, 160, 256], + in_index=[1, 2, 3], + channels=256, + ham_channels=256, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=ham_norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ham_kwargs=dict( + MD_S=1, + MD_R=16, + train_steps=6, + eval_steps=7, + inv_t=100, + rand_init=True)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) + +# dataset settings +train_dataloader = dict(batch_size=16) + +# optimizer +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=dict( + type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), + paramwise_cfg=dict( + custom_keys={ + 'pos_block': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.), + 'head': dict(lr_mult=10.) + })) + +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + power=1.0, + begin=1500, + end=160000, + eta_min=0.0, + by_epoch=False, + ) +] diff --git a/configs/sem_fpn/README.md b/configs/sem_fpn/README.md index fcef72d2a5..697cf506e2 100644 --- a/configs/sem_fpn/README.md +++ b/configs/sem_fpn/README.md @@ -1,6 +1,6 @@ # Semantic FPN -[Panoptic Feature Pyramid Networks](https://arxiv.org/abs/1901.02446) +> [Panoptic Feature Pyramid Networks](https://arxiv.org/abs/1901.02446) ## Introduction @@ -22,6 +22,22 @@ The recently introduced panoptic segmentation task has renewed our community's i +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------: | -------------- | ------ | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FPN | R-50 | 512x1024 | 80000 | 2.8 | 13.54 | V100 | 74.52 | 76.08 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/sem_fpn/fpn_r50_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x1024_80k_cityscapes/fpn_r50_512x1024_80k_cityscapes_20200717_021437-94018a0d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x1024_80k_cityscapes/fpn_r50_512x1024_80k_cityscapes-20200717_021437.log.json) | +| FPN | R-101 | 512x1024 | 80000 | 3.9 | 10.29 | V100 | 75.80 | 77.40 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/sem_fpn/fpn_r101_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x1024_80k_cityscapes/fpn_r101_512x1024_80k_cityscapes_20200717_012416-c5800d4c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x1024_80k_cityscapes/fpn_r101_512x1024_80k_cityscapes-20200717_012416.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------: | -------------- | ------ | ----: | ------------- | --------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FPN | R-50 | 512x512 | 160000 | 4.9 | 55.77 | V100 | 37.49 | 39.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/sem_fpn/fpn_r50_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x512_160k_ade20k/fpn_r50_512x512_160k_ade20k_20200718_131734-5b5a6ab9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x512_160k_ade20k/fpn_r50_512x512_160k_ade20k-20200718_131734.log.json) | +| FPN | R-101 | 512x512 | 160000 | 5.9 | 40.58 | V100 | 39.35 | 40.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/sem_fpn/fpn_r101_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x512_160k_ade20k/fpn_r101_512x512_160k_ade20k_20200718_131734-306b5004.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x512_160k_ade20k/fpn_r101_512x512_160k_ade20k-20200718_131734.log.json) | + ## Citation ```bibtex @@ -33,19 +49,3 @@ The recently introduced panoptic segmentation task has renewed our community's i year={2019} } ``` - -## Results and models - -### Cityscapes - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ---------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| FPN | R-50 | 512x1024 | 80000 | 2.8 | 13.54 | 74.52 | 76.08 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/sem_fpn/fpn_r50_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x1024_80k_cityscapes/fpn_r50_512x1024_80k_cityscapes_20200717_021437-94018a0d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x1024_80k_cityscapes/fpn_r50_512x1024_80k_cityscapes-20200717_021437.log.json) | -| FPN | R-101 | 512x1024 | 80000 | 3.9 | 10.29 | 75.80 | 77.40 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/sem_fpn/fpn_r101_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x1024_80k_cityscapes/fpn_r101_512x1024_80k_cityscapes_20200717_012416-c5800d4c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x1024_80k_cityscapes/fpn_r101_512x1024_80k_cityscapes-20200717_012416.log.json) | - -### ADE20K - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| FPN | R-50 | 512x512 | 160000 | 4.9 | 55.77 | 37.49 | 39.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/sem_fpn/fpn_r50_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x512_160k_ade20k/fpn_r50_512x512_160k_ade20k_20200718_131734-5b5a6ab9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x512_160k_ade20k/fpn_r50_512x512_160k_ade20k-20200718_131734.log.json) | -| FPN | R-101 | 512x512 | 160000 | 5.9 | 40.58 | 39.35 | 40.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/sem_fpn/fpn_r101_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x512_160k_ade20k/fpn_r101_512x512_160k_ade20k_20200718_131734-306b5004.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x512_160k_ade20k/fpn_r101_512x512_160k_ade20k-20200718_131734.log.json) | diff --git a/configs/sem_fpn/metafile.yaml b/configs/sem_fpn/metafile.yaml new file mode 100644 index 0000000000..e734897245 --- /dev/null +++ b/configs/sem_fpn/metafile.yaml @@ -0,0 +1,110 @@ +Collections: +- Name: FPN + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + Paper: + Title: Panoptic Feature Pyramid Networks + URL: https://arxiv.org/abs/1901.02446 + README: configs/sem_fpn/README.md + Frameworks: + - PyTorch +Models: +- Name: fpn_r50_4xb2-80k_cityscapes-512x1024 + In Collection: FPN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.52 + mIoU(ms+flip): 76.08 + Config: configs/sem_fpn/fpn_r50_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50 + - FPN + Training Resources: 4x V100 GPUS + Memory (GB): 2.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x1024_80k_cityscapes/fpn_r50_512x1024_80k_cityscapes_20200717_021437-94018a0d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x1024_80k_cityscapes/fpn_r50_512x1024_80k_cityscapes-20200717_021437.log.json + Paper: + Title: Panoptic Feature Pyramid Networks + URL: https://arxiv.org/abs/1901.02446 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fpn_head.py#L12 + Framework: PyTorch +- Name: fpn_r101_4xb2-80k_cityscapes-512x1024 + In Collection: FPN + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.8 + mIoU(ms+flip): 77.4 + Config: configs/sem_fpn/fpn_r101_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101 + - FPN + Training Resources: 4x V100 GPUS + Memory (GB): 3.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x1024_80k_cityscapes/fpn_r101_512x1024_80k_cityscapes_20200717_012416-c5800d4c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x1024_80k_cityscapes/fpn_r101_512x1024_80k_cityscapes-20200717_012416.log.json + Paper: + Title: Panoptic Feature Pyramid Networks + URL: https://arxiv.org/abs/1901.02446 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fpn_head.py#L12 + Framework: PyTorch +- Name: fpn_r50_4xb4-160k_ade20k-512x512 + In Collection: FPN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 37.49 + mIoU(ms+flip): 39.09 + Config: configs/sem_fpn/fpn_r50_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50 + - FPN + Training Resources: 4x V100 GPUS + Memory (GB): 4.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x512_160k_ade20k/fpn_r50_512x512_160k_ade20k_20200718_131734-5b5a6ab9.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x512_160k_ade20k/fpn_r50_512x512_160k_ade20k-20200718_131734.log.json + Paper: + Title: Panoptic Feature Pyramid Networks + URL: https://arxiv.org/abs/1901.02446 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fpn_head.py#L12 + Framework: PyTorch +- Name: fpn_r101_4xb4-160k_ade20k-512x512 + In Collection: FPN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 39.35 + mIoU(ms+flip): 40.72 + Config: configs/sem_fpn/fpn_r101_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101 + - FPN + Training Resources: 4x V100 GPUS + Memory (GB): 5.9 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x512_160k_ade20k/fpn_r101_512x512_160k_ade20k_20200718_131734-306b5004.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x512_160k_ade20k/fpn_r101_512x512_160k_ade20k-20200718_131734.log.json + Paper: + Title: Panoptic Feature Pyramid Networks + URL: https://arxiv.org/abs/1901.02446 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fpn_head.py#L12 + Framework: PyTorch diff --git a/configs/sem_fpn/sem_fpn.yml b/configs/sem_fpn/sem_fpn.yml deleted file mode 100644 index 79ed0b81c4..0000000000 --- a/configs/sem_fpn/sem_fpn.yml +++ /dev/null @@ -1,104 +0,0 @@ -Collections: -- Name: FPN - Metadata: - Training Data: - - Cityscapes - - ADE20K - Paper: - URL: https://arxiv.org/abs/1901.02446 - Title: Panoptic Feature Pyramid Networks - README: configs/sem_fpn/README.md - Code: - URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fpn_head.py#L12 - Version: v0.17.0 - Converted From: - Code: https://github.com/facebookresearch/detectron2 -Models: -- Name: fpn_r50_4xb2-80k_cityscapes-512x1024 - In Collection: FPN - Metadata: - backbone: R-50 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 73.86 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 2.8 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 74.52 - mIoU(ms+flip): 76.08 - Config: configs/sem_fpn/fpn_r50_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x1024_80k_cityscapes/fpn_r50_512x1024_80k_cityscapes_20200717_021437-94018a0d.pth -- Name: fpn_r101_4xb2-80k_cityscapes-512x1024 - In Collection: FPN - Metadata: - backbone: R-101 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 97.18 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 3.9 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 75.8 - mIoU(ms+flip): 77.4 - Config: configs/sem_fpn/fpn_r101_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x1024_80k_cityscapes/fpn_r101_512x1024_80k_cityscapes_20200717_012416-c5800d4c.pth -- Name: fpn_r50_4xb4-160k_ade20k-512x512 - In Collection: FPN - Metadata: - backbone: R-50 - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 17.93 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 4.9 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 37.49 - mIoU(ms+flip): 39.09 - Config: configs/sem_fpn/fpn_r50_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x512_160k_ade20k/fpn_r50_512x512_160k_ade20k_20200718_131734-5b5a6ab9.pth -- Name: fpn_r101_4xb4-160k_ade20k-512x512 - In Collection: FPN - Metadata: - backbone: R-101 - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 24.64 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 5.9 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 39.35 - mIoU(ms+flip): 40.72 - Config: configs/sem_fpn/fpn_r101_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x512_160k_ade20k/fpn_r101_512x512_160k_ade20k_20200718_131734-306b5004.pth diff --git a/configs/setr/README.md b/configs/setr/README.md index 1aa3f245a7..15be6ec099 100644 --- a/configs/setr/README.md +++ b/configs/setr/README.md @@ -1,6 +1,6 @@ # SETR -[Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective with Transformers](https://arxiv.org/abs/2012.15840) +> [Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective with Transformers](https://arxiv.org/abs/2012.15840) ## Introduction @@ -26,17 +26,6 @@ Most recent semantic segmentation methods adopt a fully-convolutional network (F This head has two version head. ``` -## Citation - -```bibtex -@article{zheng2020rethinking, - title={Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective with Transformers}, - author={Zheng, Sixiao and Lu, Jiachen and Zhao, Hengshuang and Zhu, Xiatian and Luo, Zekun and Wang, Yabiao and Fu, Yanwei and Feng, Jianfeng and Xiang, Tao and Torr, Philip HS and others}, - journal={arXiv preprint arXiv:2012.15840}, - year={2020} -} -``` - ## Usage You can download the pretrain from [here](https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_p16_384-b3be5167.pth). Then you can convert its keys with the script `vit2mmseg.py` in the tools directory. @@ -58,17 +47,28 @@ This script convert the model from `PRETRAIN_PATH` and store the converted model ### ADE20K -| Method | Backbone | Crop Size | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ---------- | -------- | --------- | ---------- | ------- | -------- | -------------- | ----- | ------------: | ----------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| SETR Naive | ViT-L | 512x512 | 16 | 160000 | 18.40 | 4.72 | 48.28 | 49.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/setr/setr_vit-l_naive_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_512x512_160k_b16_ade20k/setr_naive_512x512_160k_b16_ade20k_20210619_191258-061f24f5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_512x512_160k_b16_ade20k/setr_naive_512x512_160k_b16_ade20k_20210619_191258.log.json) | -| SETR PUP | ViT-L | 512x512 | 16 | 160000 | 19.54 | 4.50 | 48.24 | 49.99 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/setr/setr_vit-l_pup_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_512x512_160k_b16_ade20k/setr_pup_512x512_160k_b16_ade20k_20210619_191343-7e0ce826.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_512x512_160k_b16_ade20k/setr_pup_512x512_160k_b16_ade20k_20210619_191343.log.json) | -| SETR MLA | ViT-L | 512x512 | 8 | 160000 | 10.96 | - | 47.34 | 49.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/setr/setr_vit-l-mla_8xb1-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b8_ade20k/setr_mla_512x512_160k_b8_ade20k_20210619_191118-c6d21df0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b8_ade20k/setr_mla_512x512_160k_b8_ade20k_20210619_191118.log.json) | -| SETR MLA | ViT-L | 512x512 | 16 | 160000 | 17.30 | 5.25 | 47.39 | 49.37 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/setr/setr_vit-l_mla_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b16_ade20k/setr_mla_512x512_160k_b16_ade20k_20210619_191057-f9741de7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b16_ade20k/setr_mla_512x512_160k_b16_ade20k_20210619_191057.log.json) | +| Method | Backbone | Crop Size | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ---------- | ------- | -------- | -------------- | ------ | ----- | ------------: | -------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| SETR Naive | ViT-L | 512x512 | 16 | 160000 | 18.40 | 4.72 | V100 | 48.28 | 49.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/setr/setr_vit-l_naive_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_512x512_160k_b16_ade20k/setr_naive_512x512_160k_b16_ade20k_20210619_191258-061f24f5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_512x512_160k_b16_ade20k/setr_naive_512x512_160k_b16_ade20k_20210619_191258.log.json) | +| SETR PUP | ViT-L | 512x512 | 16 | 160000 | 19.54 | 4.50 | V100 | 48.24 | 49.99 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/setr/setr_vit-l_pup_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_512x512_160k_b16_ade20k/setr_pup_512x512_160k_b16_ade20k_20210619_191343-7e0ce826.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_512x512_160k_b16_ade20k/setr_pup_512x512_160k_b16_ade20k_20210619_191343.log.json) | +| SETR MLA | ViT-L | 512x512 | 8 | 160000 | 10.96 | - | V100 | 47.34 | 49.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/setr/setr_vit-l-mla_8xb1-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b8_ade20k/setr_mla_512x512_160k_b8_ade20k_20210619_191118-c6d21df0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b8_ade20k/setr_mla_512x512_160k_b8_ade20k_20210619_191118.log.json) | +| SETR MLA | ViT-L | 512x512 | 16 | 160000 | 17.30 | 5.25 | V100 | 47.39 | 49.37 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/setr/setr_vit-l_mla_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b16_ade20k/setr_mla_512x512_160k_b16_ade20k_20210619_191057-f9741de7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b16_ade20k/setr_mla_512x512_160k_b16_ade20k_20210619_191057.log.json) | ### Cityscapes -| Method | Backbone | Crop Size | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ---------- | -------- | --------- | ---------- | ------- | -------- | -------------- | ----- | ------------: | -------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| SETR Naive | ViT-L | 768x768 | 8 | 80000 | 24.06 | 0.39 | 78.10 | 80.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/setr/setr_vit-l_naive_8xb1-80k_cityscapes-768x768.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_vit-large_8x1_768x768_80k_cityscapes/setr_naive_vit-large_8x1_768x768_80k_cityscapes_20211123_000505-20728e80.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_vit-large_8x1_768x768_80k_cityscapes/setr_naive_vit-large_8x1_768x768_80k_cityscapes_20211123_000505.log.json) | -| SETR PUP | ViT-L | 768x768 | 8 | 80000 | 27.96 | 0.37 | 79.21 | 81.02 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/setr/setr_vit-l_pup_8xb1-80k_cityscapes-768x768.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_vit-large_8x1_768x768_80k_cityscapes/setr_pup_vit-large_8x1_768x768_80k_cityscapes_20211122_155115-f6f37b8f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_vit-large_8x1_768x768_80k_cityscapes/setr_pup_vit-large_8x1_768x768_80k_cityscapes_20211122_155115.log.json) | -| SETR MLA | ViT-L | 768x768 | 8 | 80000 | 24.10 | 0.41 | 77.00 | 79.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/setr/setr_vit-l_mla_8xb1-80k_cityscapes-768x768.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_vit-large_8x1_768x768_80k_cityscapes/setr_mla_vit-large_8x1_768x768_80k_cityscapes_20211119_101003-7f8dccbe.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_vit-large_8x1_768x768_80k_cityscapes/setr_mla_vit-large_8x1_768x768_80k_cityscapes_20211119_101003.log.json) | +| Method | Backbone | Crop Size | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ---------- | ------- | -------- | -------------- | ------ | ----- | ------------: | ----------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| SETR Naive | ViT-L | 768x768 | 8 | 80000 | 24.06 | 0.39 | V100 | 78.10 | 80.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/setr/setr_vit-l_naive_8xb1-80k_cityscapes-768x768.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_vit-large_8x1_768x768_80k_cityscapes/setr_naive_vit-large_8x1_768x768_80k_cityscapes_20211123_000505-20728e80.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_vit-large_8x1_768x768_80k_cityscapes/setr_naive_vit-large_8x1_768x768_80k_cityscapes_20211123_000505.log.json) | +| SETR PUP | ViT-L | 768x768 | 8 | 80000 | 27.96 | 0.37 | V100 | 79.21 | 81.02 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/setr/setr_vit-l_pup_8xb1-80k_cityscapes-768x768.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_vit-large_8x1_768x768_80k_cityscapes/setr_pup_vit-large_8x1_768x768_80k_cityscapes_20211122_155115-f6f37b8f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_vit-large_8x1_768x768_80k_cityscapes/setr_pup_vit-large_8x1_768x768_80k_cityscapes_20211122_155115.log.json) | +| SETR MLA | ViT-L | 768x768 | 8 | 80000 | 24.10 | 0.41 | V100 | 77.00 | 79.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/setr/setr_vit-l_mla_8xb1-80k_cityscapes-768x768.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_vit-large_8x1_768x768_80k_cityscapes/setr_mla_vit-large_8x1_768x768_80k_cityscapes_20211119_101003-7f8dccbe.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_vit-large_8x1_768x768_80k_cityscapes/setr_mla_vit-large_8x1_768x768_80k_cityscapes_20211119_101003.log.json) | + +## Citation + +```bibtex +@article{zheng2020rethinking, + title={Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective with Transformers}, + author={Zheng, Sixiao and Lu, Jiachen and Zhao, Hengshuang and Zhu, Xiatian and Luo, Zekun and Wang, Yabiao and Fu, Yanwei and Feng, Jianfeng and Xiang, Tao and Torr, Philip HS and others}, + journal={arXiv preprint arXiv:2012.15840}, + year={2020} +} +``` diff --git a/configs/setr/metafile.yaml b/configs/setr/metafile.yaml new file mode 100644 index 0000000000..8e6bc087dd --- /dev/null +++ b/configs/setr/metafile.yaml @@ -0,0 +1,197 @@ +Collections: +- Name: SETR + License: Apache License 2.0 + Metadata: + Training Data: + - ADE20K + - Cityscapes + Paper: + Title: Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective + with Transformers + URL: https://arxiv.org/abs/2012.15840 + README: configs/setr/README.md + Frameworks: + - PyTorch +Models: +- Name: setr_vit-l_naive_8xb2-160k_ade20k-512x512 + In Collection: SETR + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 48.28 + mIoU(ms+flip): 49.56 + Config: configs/setr/setr_vit-l_naive_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - ViT-L + - SETR + - Naive + Training Resources: 8x V100 GPUS + Memory (GB): 18.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_512x512_160k_b16_ade20k/setr_naive_512x512_160k_b16_ade20k_20210619_191258-061f24f5.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_512x512_160k_b16_ade20k/setr_naive_512x512_160k_b16_ade20k_20210619_191258.log.json + Paper: + Title: Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective + with Transformers + URL: https://arxiv.org/abs/2012.15840 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/setr_up_head.py#L11 + Framework: PyTorch +- Name: setr_vit-l_pup_8xb2-160k_ade20k-512x512 + In Collection: SETR + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 48.24 + mIoU(ms+flip): 49.99 + Config: configs/setr/setr_vit-l_pup_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - ViT-L + - SETR + - PUP + Training Resources: 8x V100 GPUS + Memory (GB): 19.54 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_512x512_160k_b16_ade20k/setr_pup_512x512_160k_b16_ade20k_20210619_191343-7e0ce826.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_512x512_160k_b16_ade20k/setr_pup_512x512_160k_b16_ade20k_20210619_191343.log.json + Paper: + Title: Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective + with Transformers + URL: https://arxiv.org/abs/2012.15840 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/setr_up_head.py#L11 + Framework: PyTorch +- Name: setr_vit-l-mla_8xb1-160k_ade20k-512x512 + In Collection: SETR + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 47.34 + mIoU(ms+flip): 49.05 + Config: configs/setr/setr_vit-l-mla_8xb1-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 8 + Architecture: + - ViT-L + - SETR + - MLA + Training Resources: 8x V100 GPUS + Memory (GB): 10.96 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b8_ade20k/setr_mla_512x512_160k_b8_ade20k_20210619_191118-c6d21df0.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b8_ade20k/setr_mla_512x512_160k_b8_ade20k_20210619_191118.log.json + Paper: + Title: Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective + with Transformers + URL: https://arxiv.org/abs/2012.15840 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/setr_up_head.py#L11 + Framework: PyTorch +- Name: setr_vit-l_mla_8xb2-160k_ade20k-512x512 + In Collection: SETR + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 47.39 + mIoU(ms+flip): 49.37 + Config: configs/setr/setr_vit-l_mla_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - ViT-L + - SETR + - MLA + Training Resources: 8x V100 GPUS + Memory (GB): 17.3 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b16_ade20k/setr_mla_512x512_160k_b16_ade20k_20210619_191057-f9741de7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b16_ade20k/setr_mla_512x512_160k_b16_ade20k_20210619_191057.log.json + Paper: + Title: Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective + with Transformers + URL: https://arxiv.org/abs/2012.15840 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/setr_up_head.py#L11 + Framework: PyTorch +- Name: setr_vit-l_naive_8xb1-80k_cityscapes-768x768 + In Collection: SETR + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.1 + mIoU(ms+flip): 80.22 + Config: configs/setr/setr_vit-l_naive_8xb1-80k_cityscapes-768x768.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - ViT-L + - SETR + - Naive + Training Resources: 8x V100 GPUS + Memory (GB): 24.06 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_vit-large_8x1_768x768_80k_cityscapes/setr_naive_vit-large_8x1_768x768_80k_cityscapes_20211123_000505-20728e80.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_vit-large_8x1_768x768_80k_cityscapes/setr_naive_vit-large_8x1_768x768_80k_cityscapes_20211123_000505.log.json + Paper: + Title: Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective + with Transformers + URL: https://arxiv.org/abs/2012.15840 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/setr_up_head.py#L11 + Framework: PyTorch +- Name: setr_vit-l_pup_8xb1-80k_cityscapes-768x768 + In Collection: SETR + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.21 + mIoU(ms+flip): 81.02 + Config: configs/setr/setr_vit-l_pup_8xb1-80k_cityscapes-768x768.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - ViT-L + - SETR + - PUP + Training Resources: 8x V100 GPUS + Memory (GB): 27.96 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_vit-large_8x1_768x768_80k_cityscapes/setr_pup_vit-large_8x1_768x768_80k_cityscapes_20211122_155115-f6f37b8f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_vit-large_8x1_768x768_80k_cityscapes/setr_pup_vit-large_8x1_768x768_80k_cityscapes_20211122_155115.log.json + Paper: + Title: Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective + with Transformers + URL: https://arxiv.org/abs/2012.15840 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/setr_up_head.py#L11 + Framework: PyTorch +- Name: setr_vit-l_mla_8xb1-80k_cityscapes-768x768 + In Collection: SETR + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.0 + mIoU(ms+flip): 79.59 + Config: configs/setr/setr_vit-l_mla_8xb1-80k_cityscapes-768x768.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - ViT-L + - SETR + - MLA + Training Resources: 8x V100 GPUS + Memory (GB): 24.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_vit-large_8x1_768x768_80k_cityscapes/setr_mla_vit-large_8x1_768x768_80k_cityscapes_20211119_101003-7f8dccbe.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_vit-large_8x1_768x768_80k_cityscapes/setr_mla_vit-large_8x1_768x768_80k_cityscapes_20211119_101003.log.json + Paper: + Title: Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective + with Transformers + URL: https://arxiv.org/abs/2012.15840 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/setr_up_head.py#L11 + Framework: PyTorch diff --git a/configs/setr/setr.yml b/configs/setr/setr.yml deleted file mode 100644 index 6a9987089d..0000000000 --- a/configs/setr/setr.yml +++ /dev/null @@ -1,164 +0,0 @@ -Collections: -- Name: SETR - Metadata: - Training Data: - - ADE20K - - Cityscapes - Paper: - URL: https://arxiv.org/abs/2012.15840 - Title: Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective - with Transformers - README: configs/setr/README.md - Code: - URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/setr_up_head.py#L11 - Version: v0.17.0 - Converted From: - Code: https://github.com/fudan-zvg/SETR -Models: -- Name: setr_vit-l_naive_8xb2-160k_ade20k-512x512 - In Collection: SETR - Metadata: - backbone: ViT-L - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 211.86 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 18.4 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 48.28 - mIoU(ms+flip): 49.56 - Config: configs/setr/setr_vit-l_naive_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_512x512_160k_b16_ade20k/setr_naive_512x512_160k_b16_ade20k_20210619_191258-061f24f5.pth -- Name: setr_vit-l_pup_8xb2-160k_ade20k-512x512 - In Collection: SETR - Metadata: - backbone: ViT-L - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 222.22 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 19.54 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 48.24 - mIoU(ms+flip): 49.99 - Config: configs/setr/setr_vit-l_pup_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_512x512_160k_b16_ade20k/setr_pup_512x512_160k_b16_ade20k_20210619_191343-7e0ce826.pth -- Name: setr_vit-l-mla_8xb1-160k_ade20k-512x512 - In Collection: SETR - Metadata: - backbone: ViT-L - crop size: (512,512) - lr schd: 160000 - Training Memory (GB): 10.96 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 47.34 - mIoU(ms+flip): 49.05 - Config: configs/setr/setr_vit-l-mla_8xb1-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b8_ade20k/setr_mla_512x512_160k_b8_ade20k_20210619_191118-c6d21df0.pth -- Name: setr_vit-l_mla_8xb2-160k_ade20k-512x512 - In Collection: SETR - Metadata: - backbone: ViT-L - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 190.48 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 17.3 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 47.39 - mIoU(ms+flip): 49.37 - Config: configs/setr/setr_vit-l_mla_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b16_ade20k/setr_mla_512x512_160k_b16_ade20k_20210619_191057-f9741de7.pth -- Name: setr_vit-l_naive_8xb1-80k_cityscapes-768x768 - In Collection: SETR - Metadata: - backbone: ViT-L - crop size: (768,768) - lr schd: 80000 - inference time (ms/im): - - value: 2564.1 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (768,768) - Training Memory (GB): 24.06 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.1 - mIoU(ms+flip): 80.22 - Config: configs/setr/setr_vit-l_naive_8xb1-80k_cityscapes-768x768.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_vit-large_8x1_768x768_80k_cityscapes/setr_naive_vit-large_8x1_768x768_80k_cityscapes_20211123_000505-20728e80.pth -- Name: setr_vit-l_pup_8xb1-80k_cityscapes-768x768 - In Collection: SETR - Metadata: - backbone: ViT-L - crop size: (768,768) - lr schd: 80000 - inference time (ms/im): - - value: 2702.7 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (768,768) - Training Memory (GB): 27.96 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.21 - mIoU(ms+flip): 81.02 - Config: configs/setr/setr_vit-l_pup_8xb1-80k_cityscapes-768x768.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_vit-large_8x1_768x768_80k_cityscapes/setr_pup_vit-large_8x1_768x768_80k_cityscapes_20211122_155115-f6f37b8f.pth -- Name: setr_vit-l_mla_8xb1-80k_cityscapes-768x768 - In Collection: SETR - Metadata: - backbone: ViT-L - crop size: (768,768) - lr schd: 80000 - inference time (ms/im): - - value: 2439.02 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (768,768) - Training Memory (GB): 24.1 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 77.0 - mIoU(ms+flip): 79.59 - Config: configs/setr/setr_vit-l_mla_8xb1-80k_cityscapes-768x768.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_vit-large_8x1_768x768_80k_cityscapes/setr_mla_vit-large_8x1_768x768_80k_cityscapes_20211119_101003-7f8dccbe.pth diff --git a/configs/stdc/README.md b/configs/stdc/README.md index 639e6b6986..3e8bf60688 100644 --- a/configs/stdc/README.md +++ b/configs/stdc/README.md @@ -1,6 +1,6 @@ # STDC -[Rethinking BiSeNet For Real-time Semantic Segmentation](https://arxiv.org/abs/2104.13188) +> [Rethinking BiSeNet For Real-time Semantic Segmentation](https://arxiv.org/abs/2104.13188) ## Introduction @@ -22,18 +22,6 @@ BiSeNet has been proved to be a popular two-stream network for real-time segment -## Citation - -```bibtex -@inproceedings{fan2021rethinking, - title={Rethinking BiSeNet For Real-time Semantic Segmentation}, - author={Fan, Mingyuan and Lai, Shenqi and Huang, Junshi and Wei, Xiaoming and Chai, Zhenhua and Luo, Junfeng and Wei, Xiaolin}, - booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, - pages={9716--9725}, - year={2021} -} -``` - ## Usage We have provided [ImageNet Pretrained STDCNet Weights](https://drive.google.com/drive/folders/1wROFwRt8qWHD4jSo8Zu1gp1d6oYJ3ns1) models converted from [official repo](https://github.com/MichaelFan01/STDC-Seg). @@ -58,12 +46,12 @@ This script convert model from `PRETRAIN_PATH` and store the converted model in ### Cityscapes -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| -------------------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------- | -------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| STDC 1 (No Pretrain) | STDC1 | 512x1024 | 80000 | 7.15 | 23.06 | 71.82 | 73.89 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/stdc/stdc1_4xb12-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc1_512x1024_80k_cityscapes/stdc1_512x1024_80k_cityscapes_20220224_073048-74e6920a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc1_512x1024_80k_cityscapes/stdc1_512x1024_80k_cityscapes_20220224_073048.log.json) | -| STDC 1 | STDC1 | 512x1024 | 80000 | - | - | 74.94 | 76.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/stdc/stdc1_in1k-pre_4xb12-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc1_in1k-pre_512x1024_80k_cityscapes/stdc1_in1k-pre_512x1024_80k_cityscapes_20220224_141648-3d4c2981.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc1_in1k-pre_512x1024_80k_cityscapes/stdc1_in1k-pre_512x1024_80k_cityscapes_20220224_141648.log.json) | -| STDC 2 (No Pretrain) | STDC2 | 512x1024 | 80000 | 8.27 | 23.71 | 73.15 | 76.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/stdc/stdc2_4xb12-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc2_512x1024_80k_cityscapes/stdc2_512x1024_80k_cityscapes_20220222_132015-fb1e3a1a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc2_512x1024_80k_cityscapes/stdc2_512x1024_80k_cityscapes_20220222_132015.log.json) | -| STDC 2 | STDC2 | 512x1024 | 80000 | - | - | 76.67 | 78.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/stdc/stdc2_in1k-pre_4xb12-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc2_in1k-pre_512x1024_80k_cityscapes/stdc2_in1k-pre_512x1024_80k_cityscapes_20220224_073048-1f8f0f6c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc2_in1k-pre_512x1024_80k_cityscapes/stdc2_in1k-pre_512x1024_80k_cityscapes_20220224_073048.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------------------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------- | ----------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| STDC | STDC1 (No Pretrain) | 512x1024 | 80000 | 7.15 | 23.06 | V100 | 71.82 | 73.89 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/stdc/stdc1_4xb12-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc1_512x1024_80k_cityscapes/stdc1_512x1024_80k_cityscapes_20220224_073048-74e6920a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc1_512x1024_80k_cityscapes/stdc1_512x1024_80k_cityscapes_20220224_073048.log.json) | +| STDC | STDC1 | 512x1024 | 80000 | - | - | V100 | 74.94 | 76.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/stdc/stdc1_in1k-pre_4xb12-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc1_in1k-pre_512x1024_80k_cityscapes/stdc1_in1k-pre_512x1024_80k_cityscapes_20220224_141648-3d4c2981.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc1_in1k-pre_512x1024_80k_cityscapes/stdc1_in1k-pre_512x1024_80k_cityscapes_20220224_141648.log.json) | +| STDC | STDC2 (No Pretrain) | 512x1024 | 80000 | 8.27 | 23.71 | V100 | 73.15 | 76.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/stdc/stdc2_4xb12-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc2_512x1024_80k_cityscapes/stdc2_512x1024_80k_cityscapes_20220222_132015-fb1e3a1a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc2_512x1024_80k_cityscapes/stdc2_512x1024_80k_cityscapes_20220222_132015.log.json) | +| STDC | STDC2 | 512x1024 | 80000 | - | - | V100 | 76.67 | 78.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/stdc/stdc2_in1k-pre_4xb12-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc2_in1k-pre_512x1024_80k_cityscapes/stdc2_in1k-pre_512x1024_80k_cityscapes_20220224_073048-1f8f0f6c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc2_in1k-pre_512x1024_80k_cityscapes/stdc2_in1k-pre_512x1024_80k_cityscapes_20220224_073048.log.json) | Note: @@ -71,3 +59,15 @@ Note: - `No Pretrain` means the model is trained from scratch. - The FPS is for reference only. The environment is also different from paper setting, whose input size is `512x1024` and `768x1536`, i.e., 50% and 75% of our input size, respectively and using TensorRT. - The parameter `fusion_kernel` in `STDCHead` is not learnable. In official repo, `find_unused_parameters=True` is set [here](https://github.com/MichaelFan01/STDC-Seg/blob/59ff37fbd693b99972c76fcefe97caa14aeb619f/train.py#L220). You may check it by printing model parameters of original repo on your own. + +## Citation + +```bibtex +@inproceedings{fan2021rethinking, + title={Rethinking BiSeNet For Real-time Semantic Segmentation}, + author={Fan, Mingyuan and Lai, Shenqi and Huang, Junshi and Wei, Xiaoming and Chai, Zhenhua and Luo, Junfeng and Wei, Xiaolin}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={9716--9725}, + year={2021} +} +``` diff --git a/configs/stdc/metafile.yaml b/configs/stdc/metafile.yaml new file mode 100644 index 0000000000..93cb14f50b --- /dev/null +++ b/configs/stdc/metafile.yaml @@ -0,0 +1,107 @@ +Collections: +- Name: STDC + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + Paper: + Title: Rethinking BiSeNet For Real-time Semantic Segmentation + URL: https://arxiv.org/abs/2104.13188 + README: configs/stdc/README.md + Frameworks: + - PyTorch +Models: +- Name: stdc1_4xb12-80k_cityscapes-512x1024 + In Collection: STDC + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 71.82 + mIoU(ms+flip): 73.89 + Config: configs/stdc/stdc1_4xb12-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 48 + Architecture: + - STDC1 + - STDC + Training Resources: 4x V100 GPUS + Memory (GB): 7.15 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc1_512x1024_80k_cityscapes/stdc1_512x1024_80k_cityscapes_20220224_073048-74e6920a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc1_512x1024_80k_cityscapes/stdc1_512x1024_80k_cityscapes_20220224_073048.log.json + Paper: + Title: Rethinking BiSeNet For Real-time Semantic Segmentation + URL: https://arxiv.org/abs/2104.13188 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/stdc.py#L394 + Framework: PyTorch +- Name: stdc1_in1k-pre_4xb12-80k_cityscapes-512x1024 + In Collection: STDC + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.94 + mIoU(ms+flip): 76.97 + Config: configs/stdc/stdc1_in1k-pre_4xb12-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 48 + Architecture: + - STDC1 + - STDC + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc1_in1k-pre_512x1024_80k_cityscapes/stdc1_in1k-pre_512x1024_80k_cityscapes_20220224_141648-3d4c2981.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc1_in1k-pre_512x1024_80k_cityscapes/stdc1_in1k-pre_512x1024_80k_cityscapes_20220224_141648.log.json + Paper: + Title: Rethinking BiSeNet For Real-time Semantic Segmentation + URL: https://arxiv.org/abs/2104.13188 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/stdc.py#L394 + Framework: PyTorch +- Name: stdc2_4xb12-80k_cityscapes-512x1024 + In Collection: STDC + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.15 + mIoU(ms+flip): 76.13 + Config: configs/stdc/stdc2_4xb12-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 48 + Architecture: + - STDC2 + - STDC + Training Resources: 4x V100 GPUS + Memory (GB): 8.27 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc2_512x1024_80k_cityscapes/stdc2_512x1024_80k_cityscapes_20220222_132015-fb1e3a1a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc2_512x1024_80k_cityscapes/stdc2_512x1024_80k_cityscapes_20220222_132015.log.json + Paper: + Title: Rethinking BiSeNet For Real-time Semantic Segmentation + URL: https://arxiv.org/abs/2104.13188 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/stdc.py#L394 + Framework: PyTorch +- Name: stdc2_in1k-pre_4xb12-80k_cityscapes-512x1024 + In Collection: STDC + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.67 + mIoU(ms+flip): 78.67 + Config: configs/stdc/stdc2_in1k-pre_4xb12-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 48 + Architecture: + - STDC2 + - STDC + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc2_in1k-pre_512x1024_80k_cityscapes/stdc2_in1k-pre_512x1024_80k_cityscapes_20220224_073048-1f8f0f6c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc2_in1k-pre_512x1024_80k_cityscapes/stdc2_in1k-pre_512x1024_80k_cityscapes_20220224_073048.log.json + Paper: + Title: Rethinking BiSeNet For Real-time Semantic Segmentation + URL: https://arxiv.org/abs/2104.13188 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/stdc.py#L394 + Framework: PyTorch diff --git a/configs/stdc/stdc.yml b/configs/stdc/stdc.yml deleted file mode 100644 index 22fb37eeba..0000000000 --- a/configs/stdc/stdc.yml +++ /dev/null @@ -1,87 +0,0 @@ -Collections: -- Name: STDC - Metadata: - Training Data: - - Cityscapes - Paper: - URL: https://arxiv.org/abs/2104.13188 - Title: Rethinking BiSeNet For Real-time Semantic Segmentation - README: configs/stdc/README.md - Code: - URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/stdc.py#L394 - Version: v0.20.0 - Converted From: - Code: https://github.com/MichaelFan01/STDC-Seg -Models: -- Name: stdc1_4xb12-80k_cityscapes-512x1024 - In Collection: STDC - Metadata: - backbone: STDC1 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 43.37 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 7.15 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 71.82 - mIoU(ms+flip): 73.89 - Config: configs/stdc/stdc1_4xb12-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc1_512x1024_80k_cityscapes/stdc1_512x1024_80k_cityscapes_20220224_073048-74e6920a.pth -- Name: stdc1_in1k-pre_4xb12-80k_cityscapes-512x1024 - In Collection: STDC - Metadata: - backbone: STDC1 - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 74.94 - mIoU(ms+flip): 76.97 - Config: configs/stdc/stdc1_in1k-pre_4xb12-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc1_in1k-pre_512x1024_80k_cityscapes/stdc1_in1k-pre_512x1024_80k_cityscapes_20220224_141648-3d4c2981.pth -- Name: stdc2_4xb12-80k_cityscapes-512x1024 - In Collection: STDC - Metadata: - backbone: STDC2 - crop size: (512,1024) - lr schd: 80000 - inference time (ms/im): - - value: 42.18 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 8.27 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 73.15 - mIoU(ms+flip): 76.13 - Config: configs/stdc/stdc2_4xb12-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc2_512x1024_80k_cityscapes/stdc2_512x1024_80k_cityscapes_20220222_132015-fb1e3a1a.pth -- Name: stdc2_in1k-pre_4xb12-80k_cityscapes-512x1024 - In Collection: STDC - Metadata: - backbone: STDC2 - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 76.67 - mIoU(ms+flip): 78.67 - Config: configs/stdc/stdc2_in1k-pre_4xb12-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc2_in1k-pre_512x1024_80k_cityscapes/stdc2_in1k-pre_512x1024_80k_cityscapes_20220224_073048-1f8f0f6c.pth diff --git a/configs/swin/README.md b/configs/swin/README.md index 4ab20e80b8..18fcbae8bc 100644 --- a/configs/swin/README.md +++ b/configs/swin/README.md @@ -1,6 +1,6 @@ # Swin Transformer -[Swin Transformer: Hierarchical Vision Transformer using Shifted Windows](https://arxiv.org/abs/2103.14030) +> [Swin Transformer: Hierarchical Vision Transformer using Shifted Windows](https://arxiv.org/abs/2103.14030) ## Introduction @@ -22,17 +22,6 @@ This paper presents a new vision Transformer, called Swin Transformer, that capa -## Citation - -```bibtex -@article{liu2021Swin, - title={Swin Transformer: Hierarchical Vision Transformer using Shifted Windows}, - author={Liu, Ze and Lin, Yutong and Cao, Yue and Hu, Han and Wei, Yixuan and Zhang, Zheng and Lin, Stephen and Guo, Baining}, - journal={arXiv preprint arXiv:2103.14030}, - year={2021} -} -``` - ## Usage We have provided pretrained models converted from [official repo](https://github.com/microsoft/Swin-Transformer). @@ -66,11 +55,22 @@ In our default setting, pretrained models and their corresponding [original mode ### ADE20K -| Method | Backbone | Crop Size | pretrain | pretrain img size | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------- | -------- | --------- | ------------ | ----------------- | ---------- | ------- | -------- | -------------- | ----- | ------------: | --------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| UPerNet | Swin-T | 512x512 | ImageNet-1K | 224x224 | 16 | 160000 | 5.02 | 21.06 | 44.41 | 45.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210531_112542-e380ad3e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210531_112542.log.json) | -| UPerNet | Swin-S | 512x512 | ImageNet-1K | 224x224 | 16 | 160000 | 6.17 | 14.72 | 47.72 | 49.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-small-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192015-ee2fff1c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192015.log.json) | -| UPerNet | Swin-B | 512x512 | ImageNet-1K | 224x224 | 16 | 160000 | 7.61 | 12.65 | 47.99 | 49.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192340-593b0e13.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192340.log.json) | -| UPerNet | Swin-B | 512x512 | ImageNet-22K | 224x224 | 16 | 160000 | - | - | 50.13 | 51.9 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-base-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K_20210526_211650-762e2178.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K_20210526_211650.log.json) | -| UPerNet | Swin-B | 512x512 | ImageNet-1K | 384x384 | 16 | 160000 | 8.52 | 12.10 | 48.35 | 49.65 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-base-patch4-window12-in1k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K_20210531_132020-05b22ea4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K_20210531_132020.log.json) | -| UPerNet | Swin-B | 512x512 | ImageNet-22K | 384x384 | 16 | 160000 | - | - | 50.76 | 52.4 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-base-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K_20210531_125459-429057bf.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K_20210531_125459.log.json) | +| Method | Backbone | Crop Size | pretrain | pretrain img size | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------- | -------- | --------- | ------------ | ----------------- | ---------- | ------- | -------- | -------------- | ------ | ----- | ------------: | --------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UPerNet | Swin-T | 512x512 | ImageNet-1K | 224x224 | 16 | 160000 | 5.02 | 21.06 | V100 | 44.41 | 45.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210531_112542-e380ad3e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210531_112542.log.json) | +| UPerNet | Swin-S | 512x512 | ImageNet-1K | 224x224 | 16 | 160000 | 6.17 | 14.72 | V100 | 47.72 | 49.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-small-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192015-ee2fff1c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192015.log.json) | +| UPerNet | Swin-B | 512x512 | ImageNet-1K | 224x224 | 16 | 160000 | 7.61 | 12.65 | V100 | 47.99 | 49.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192340-593b0e13.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192340.log.json) | +| UPerNet | Swin-B | 512x512 | ImageNet-22K | 224x224 | 16 | 160000 | - | - | V100 | 50.13 | 51.9 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-base-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K_20210526_211650-762e2178.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K_20210526_211650.log.json) | +| UPerNet | Swin-B | 512x512 | ImageNet-1K | 384x384 | 16 | 160000 | 8.52 | 12.10 | V100 | 48.35 | 49.65 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-base-patch4-window12-in1k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K_20210531_132020-05b22ea4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K_20210531_132020.log.json) | +| UPerNet | Swin-B | 512x512 | ImageNet-22K | 384x384 | 16 | 160000 | - | - | V100 | 50.76 | 52.4 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/swin-base-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K_20210531_125459-429057bf.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K_20210531_125459.log.json) | + +## Citation + +```bibtex +@article{liu2021Swin, + title={Swin Transformer: Hierarchical Vision Transformer using Shifted Windows}, + author={Liu, Ze and Lin, Yutong and Cao, Yue and Hu, Han and Wei, Yixuan and Zhang, Zheng and Lin, Stephen and Guo, Baining}, + journal={arXiv preprint arXiv:2103.14030}, + year={2021} +} +``` diff --git a/configs/swin/metafile.yaml b/configs/swin/metafile.yaml new file mode 100644 index 0000000000..67a4e07551 --- /dev/null +++ b/configs/swin/metafile.yaml @@ -0,0 +1,143 @@ +Models: +- Name: swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 44.41 + mIoU(ms+flip): 45.79 + Config: configs/swin/swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-T + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 5.02 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210531_112542-e380ad3e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210531_112542.log.json + Paper: + Title: 'Swin Transformer: Hierarchical Vision Transformer using Shifted Windows' + URL: https://arxiv.org/abs/2103.14030 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/swin.py#L524 + Framework: PyTorch +- Name: swin-small-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 47.72 + mIoU(ms+flip): 49.24 + Config: configs/swin/swin-small-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-S + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 6.17 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192015-ee2fff1c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192015.log.json + Paper: + Title: 'Swin Transformer: Hierarchical Vision Transformer using Shifted Windows' + URL: https://arxiv.org/abs/2103.14030 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/swin.py#L524 + Framework: PyTorch +- Name: swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 47.99 + mIoU(ms+flip): 49.57 + Config: configs/swin/swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-B + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 7.61 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192340-593b0e13.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192340.log.json + Paper: + Title: 'Swin Transformer: Hierarchical Vision Transformer using Shifted Windows' + URL: https://arxiv.org/abs/2103.14030 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/swin.py#L524 + Framework: PyTorch +- Name: swin-base-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 50.13 + mIoU(ms+flip): 51.9 + Config: configs/swin/swin-base-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-B + - UPerNet + Training Resources: 8x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K_20210526_211650-762e2178.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K_20210526_211650.log.json + Paper: + Title: 'Swin Transformer: Hierarchical Vision Transformer using Shifted Windows' + URL: https://arxiv.org/abs/2103.14030 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/swin.py#L524 + Framework: PyTorch +- Name: swin-base-patch4-window12-in1k-384x384-pre_upernet_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 48.35 + mIoU(ms+flip): 49.65 + Config: configs/swin/swin-base-patch4-window12-in1k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-B + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 8.52 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K_20210531_132020-05b22ea4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K_20210531_132020.log.json + Paper: + Title: 'Swin Transformer: Hierarchical Vision Transformer using Shifted Windows' + URL: https://arxiv.org/abs/2103.14030 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/swin.py#L524 + Framework: PyTorch +- Name: swin-base-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 50.76 + mIoU(ms+flip): 52.4 + Config: configs/swin/swin-base-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Swin-B + - UPerNet + Training Resources: 8x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K_20210531_125459-429057bf.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K_20210531_125459.log.json + Paper: + Title: 'Swin Transformer: Hierarchical Vision Transformer using Shifted Windows' + URL: https://arxiv.org/abs/2103.14030 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/swin.py#L524 + Framework: PyTorch diff --git a/configs/swin/swin.yml b/configs/swin/swin.yml deleted file mode 100644 index 783d839c8d..0000000000 --- a/configs/swin/swin.yml +++ /dev/null @@ -1,117 +0,0 @@ -Models: -- Name: swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512 - In Collection: UPerNet - Metadata: - backbone: Swin-T - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 47.48 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 5.02 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 44.41 - mIoU(ms+flip): 45.79 - Config: configs/swin/swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210531_112542-e380ad3e.pth -- Name: swin-small-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512 - In Collection: UPerNet - Metadata: - backbone: Swin-S - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 67.93 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 6.17 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 47.72 - mIoU(ms+flip): 49.24 - Config: configs/swin/swin-small-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192015-ee2fff1c.pth -- Name: swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512 - In Collection: UPerNet - Metadata: - backbone: Swin-B - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 79.05 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 7.61 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 47.99 - mIoU(ms+flip): 49.57 - Config: configs/swin/swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192340-593b0e13.pth -- Name: swin-base-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512 - In Collection: UPerNet - Metadata: - backbone: Swin-B - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 50.13 - mIoU(ms+flip): 51.9 - Config: configs/swin/swin-base-patch4-window7-in22k-pre_upernet_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K_20210526_211650-762e2178.pth -- Name: swin-base-patch4-window12-in1k-384x384-pre_upernet_8xb2-160k_ade20k-512x512 - In Collection: UPerNet - Metadata: - backbone: Swin-B - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 82.64 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 8.52 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 48.35 - mIoU(ms+flip): 49.65 - Config: configs/swin/swin-base-patch4-window12-in1k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K_20210531_132020-05b22ea4.pth -- Name: swin-base-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512 - In Collection: UPerNet - Metadata: - backbone: Swin-B - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 50.76 - mIoU(ms+flip): 52.4 - Config: configs/swin/swin-base-patch4-window12-in22k-384x384-pre_upernet_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K_20210531_125459-429057bf.pth diff --git a/configs/twins/README.md b/configs/twins/README.md index 3e741802e6..e4b3735b00 100644 --- a/configs/twins/README.md +++ b/configs/twins/README.md @@ -1,6 +1,6 @@ # Twins -[Twins: Revisiting the Design of Spatial Attention in Vision Transformers](https://arxiv.org/pdf/2104.13840.pdf) +> [Twins: Revisiting the Design of Spatial Attention in Vision Transformers](https://arxiv.org/pdf/2104.13840.pdf) ## Introduction @@ -22,17 +22,6 @@ Very recently, a variety of vision transformer architectures for dense predictio -## Citation - -```bibtex -@article{chu2021twins, - title={Twins: Revisiting spatial attention design in vision transformers}, - author={Chu, Xiangxiang and Tian, Zhi and Wang, Yuqing and Zhang, Bo and Ren, Haibing and Wei, Xiaolin and Xia, Huaxia and Shen, Chunhua}, - journal={arXiv preprint arXiv:2104.13840}, - year={2021}altgvt -} -``` - ## Usage We have provided pretrained models converted from [official repo](https://github.com/Meituan-AutoML/Twins). @@ -55,22 +44,33 @@ python tools/model_converters/twins2mmseg.py ./alt_gvt_base.pth ./pretrained/alt ### ADE20K -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------------------- | -------- | --------- | ------- | -------- | -------------- | ----- | ------------- | -------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| Twins-FPN | PCPVT-S | 512x512 | 80000 | 6.60 | 27.15 | 43.26 | 44.11 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/twins/twins_pcpvt-s_fpn_fpnhead_8xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_204132-41acd132.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_204132.log.json) | -| Twins-UPerNet | PCPVT-S | 512x512 | 160000 | 9.67 | 14.24 | 46.04 | 46.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/twins/twins_pcpvt-s_uperhead_8xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k_20211201_233537-8e99c07a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k_20211201_233537.log.json) | -| Twins-FPN | PCPVT-B | 512x512 | 80000 | 8.41 | 19.67 | 45.66 | 46.48 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/twins/twins_pcpvt-b_fpn_fpnhead_8xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141019-d396db72.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141019.log.json) | -| Twins-UPerNet (8x2) | PCPVT-B | 512x512 | 160000 | 6.46 | 12.04 | 47.91 | 48.64 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/twins/twins_pcpvt-b_uperhead_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k_20211130_141020-02094ea5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k_20211130_141020.log.json) | -| Twins-FPN | PCPVT-L | 512x512 | 80000 | 10.78 | 14.32 | 45.94 | 46.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/twins/twins_pcpvt-l_fpn_fpnhead_8xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_105226-bc6d61dc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_105226.log.json) | -| Twins-UPerNet (8x2) | PCPVT-L | 512x512 | 160000 | 7.82 | 10.70 | 49.35 | 50.08 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/twins/twins_pcpvt-l_uperhead_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k_20211201_075053-c6095c07.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k_20211201_075053.log.json) | -| Twins-FPN | SVT-S | 512x512 | 80000 | 5.80 | 29.79 | 44.47 | 45.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/twins/twins_svt-s_fpn_fpnhead_8xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141006-0a0d3317.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141006.log.json) | -| Twins-UPerNet (8x2) | SVT-S | 512x512 | 160000 | 4.93 | 15.09 | 46.08 | 46.96 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/twins/twins_svt-s_uperhead_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k/twins_svt-s_uperhead_8x2_512x512_160k_ade20k_20211130_141005-e48a2d94.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k/twins_svt-s_uperhead_8x2_512x512_160k_ade20k_20211130_141005.log.json) | -| Twins-FPN | SVT-B | 512x512 | 80000 | 8.75 | 21.10 | 46.77 | 47.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/twins/twins_svt-b_fpn_fpnhead_8xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_113849-88b2907c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_113849.log.json) | -| Twins-UPerNet (8x2) | SVT-B | 512x512 | 160000 | 6.77 | 12.66 | 48.04 | 48.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/twins/twins_svt-b_uperhead_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-b_uperhead_8x2_512x512_160k_ade20k/twins_svt-b_uperhead_8x2_512x512_160k_ade20k_20211202_040826-0943a1f1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-b_uperhead_8x2_512x512_160k_ade20k/twins_svt-b_uperhead_8x2_512x512_160k_ade20k_20211202_040826.log.json) | -| Twins-FPN | SVT-L | 512x512 | 80000 | 11.20 | 17.80 | 46.55 | 47.74 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/twins/twins_svt-l_fpn_fpnhead_8xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141005-1d59bee2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141005.log.json) | -| Twins-UPerNet (8x2) | SVT-L | 512x512 | 160000 | 8.41 | 10.73 | 49.65 | 50.63 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/twins/twins_pcpvt-l_uperhead_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-l_uperhead_8x2_512x512_160k_ade20k/twins_svt-l_uperhead_8x2_512x512_160k_ade20k_20211130_141005-3e2cae61.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-l_uperhead_8x2_512x512_160k_ade20k/twins_svt-l_uperhead_8x2_512x512_160k_ade20k_20211130_141005.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------- | ------------------- | --------- | ------- | -------- | -------------- | ------ | ----- | ------------- | ----------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FPN | Twins-PCPVT-S | 512x512 | 80000 | 6.60 | 27.15 | V100 | 43.26 | 44.11 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/twins/twins_pcpvt-s_fpn_fpnhead_8xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_204132-41acd132.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_204132.log.json) | +| UPerNet | Twins-PCPVT-S | 512x512 | 160000 | 9.67 | 14.24 | V100 | 46.04 | 46.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/twins/twins_pcpvt-s_uperhead_8xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k_20211201_233537-8e99c07a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k_20211201_233537.log.json) | +| FPN | Twins-PCPVT-B | 512x512 | 80000 | 8.41 | 19.67 | V100 | 45.66 | 46.48 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/twins/twins_pcpvt-b_fpn_fpnhead_8xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141019-d396db72.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141019.log.json) | +| UPerNet | Twins-PCPVT-B (8x2) | 512x512 | 160000 | 6.46 | 12.04 | V100 | 47.91 | 48.64 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/twins/twins_pcpvt-b_uperhead_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k_20211130_141020-02094ea5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k_20211130_141020.log.json) | +| FPN | Twins-PCPVT-L | 512x512 | 80000 | 10.78 | 14.32 | V100 | 45.94 | 46.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/twins/twins_pcpvt-l_fpn_fpnhead_8xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_105226-bc6d61dc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_105226.log.json) | +| UPerNet | Twins-PCPVT-L (8x2) | 512x512 | 160000 | 7.82 | 10.70 | V100 | 49.35 | 50.08 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/twins/twins_pcpvt-l_uperhead_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k_20211201_075053-c6095c07.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k_20211201_075053.log.json) | +| FPN | Twins-SVT-S | 512x512 | 80000 | 5.80 | 29.79 | V100 | 44.47 | 45.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/twins/twins_svt-s_fpn_fpnhead_8xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141006-0a0d3317.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141006.log.json) | +| UPerNet | SVT-S (8x2) | 512x512 | 160000 | 4.93 | 15.09 | V100 | 46.08 | 46.96 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/twins/twins_svt-s_uperhead_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k/twins_svt-s_uperhead_8x2_512x512_160k_ade20k_20211130_141005-e48a2d94.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k/twins_svt-s_uperhead_8x2_512x512_160k_ade20k_20211130_141005.log.json) | +| FPN | Twins-SVT-B | 512x512 | 80000 | 8.75 | 21.10 | V100 | 46.77 | 47.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/twins/twins_svt-b_fpn_fpnhead_8xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_113849-88b2907c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_113849.log.json) | +| UPerNet | Twins-SVT-B (8x2) | 512x512 | 160000 | 6.77 | 12.66 | V100 | 48.04 | 48.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/twins/twins_svt-b_uperhead_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-b_uperhead_8x2_512x512_160k_ade20k/twins_svt-b_uperhead_8x2_512x512_160k_ade20k_20211202_040826-0943a1f1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-b_uperhead_8x2_512x512_160k_ade20k/twins_svt-b_uperhead_8x2_512x512_160k_ade20k_20211202_040826.log.json) | +| FPN | Twins-SVT-L | 512x512 | 80000 | 11.20 | 17.80 | V100 | 46.55 | 47.74 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/twins/twins_svt-l_fpn_fpnhead_8xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141005-1d59bee2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141005.log.json) | +| UPerNet | Twins-SVT-L (8x2) | 512x512 | 160000 | 8.41 | 10.73 | V100 | 49.65 | 50.63 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/twins/twins_pcpvt-l_uperhead_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-l_uperhead_8x2_512x512_160k_ade20k/twins_svt-l_uperhead_8x2_512x512_160k_ade20k_20211130_141005-3e2cae61.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-l_uperhead_8x2_512x512_160k_ade20k/twins_svt-l_uperhead_8x2_512x512_160k_ade20k_20211130_141005.log.json) | Note: - `8x2` means 8 GPUs with 2 samples per GPU in training. Default setting of Twins on ADE20K is 8 GPUs with 4 samples per GPU in training. - `UPerNet` and `FPN` are decoder heads utilized in corresponding Twins model, which is `UPerHead` and `FPNHead`, respectively. Specifically, models in [official repo](https://github.com/Meituan-AutoML/Twins) all use `UPerHead`. + +## Citation + +```bibtex +@article{chu2021twins, + title={Twins: Revisiting spatial attention design in vision transformers}, + author={Chu, Xiangxiang and Tian, Zhi and Wang, Yuqing and Zhang, Bo and Ren, Haibing and Wei, Xiaolin and Xia, Huaxia and Shen, Chunhua}, + journal={arXiv preprint arXiv:2104.13840}, + year={2021}altgvt +} +``` diff --git a/configs/twins/metafile.yaml b/configs/twins/metafile.yaml new file mode 100644 index 0000000000..0de78d9d2e --- /dev/null +++ b/configs/twins/metafile.yaml @@ -0,0 +1,289 @@ +Models: +- Name: twins_pcpvt-s_fpn_fpnhead_8xb4-80k_ade20k-512x512 + In Collection: FPN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.26 + mIoU(ms+flip): 44.11 + Config: configs/twins/twins_pcpvt-s_fpn_fpnhead_8xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 32 + Architecture: + - Twins-PCPVT-S + - FPN + Training Resources: 8x V100 GPUS + Memory (GB): 6.6 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_204132-41acd132.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_204132.log.json + Paper: + Title: 'Twins: Revisiting the Design of Spatial Attention in Vision Transformers' + URL: https://arxiv.org/pdf/2104.13840.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/twins.py#L352 + Framework: PyTorch +- Name: twins_pcpvt-s_uperhead_8xb4-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 46.04 + mIoU(ms+flip): 46.92 + Config: configs/twins/twins_pcpvt-s_uperhead_8xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 32 + Architecture: + - Twins-PCPVT-S + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 9.67 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k_20211201_233537-8e99c07a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k_20211201_233537.log.json + Paper: + Title: 'Twins: Revisiting the Design of Spatial Attention in Vision Transformers' + URL: https://arxiv.org/pdf/2104.13840.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/twins.py#L352 + Framework: PyTorch +- Name: twins_pcpvt-b_fpn_fpnhead_8xb4-80k_ade20k-512x512 + In Collection: FPN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.66 + mIoU(ms+flip): 46.48 + Config: configs/twins/twins_pcpvt-b_fpn_fpnhead_8xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 32 + Architecture: + - Twins-PCPVT-B + - FPN + Training Resources: 8x V100 GPUS + Memory (GB): 8.41 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141019-d396db72.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141019.log.json + Paper: + Title: 'Twins: Revisiting the Design of Spatial Attention in Vision Transformers' + URL: https://arxiv.org/pdf/2104.13840.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/twins.py#L352 + Framework: PyTorch +- Name: twins_pcpvt-b_uperhead_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 47.91 + mIoU(ms+flip): 48.64 + Config: configs/twins/twins_pcpvt-b_uperhead_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Twins-PCPVT-B + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 6.46 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k_20211130_141020-02094ea5.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k_20211130_141020.log.json + Paper: + Title: 'Twins: Revisiting the Design of Spatial Attention in Vision Transformers' + URL: https://arxiv.org/pdf/2104.13840.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/twins.py#L352 + Framework: PyTorch +- Name: twins_pcpvt-l_fpn_fpnhead_8xb4-80k_ade20k-512x512 + In Collection: FPN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.94 + mIoU(ms+flip): 46.7 + Config: configs/twins/twins_pcpvt-l_fpn_fpnhead_8xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 32 + Architecture: + - Twins-PCPVT-L + - FPN + Training Resources: 8x V100 GPUS + Memory (GB): 10.78 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_105226-bc6d61dc.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_105226.log.json + Paper: + Title: 'Twins: Revisiting the Design of Spatial Attention in Vision Transformers' + URL: https://arxiv.org/pdf/2104.13840.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/twins.py#L352 + Framework: PyTorch +- Name: twins_pcpvt-l_uperhead_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 49.35 + mIoU(ms+flip): 50.08 + Config: configs/twins/twins_pcpvt-l_uperhead_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Twins-PCPVT-L + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 7.82 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k_20211201_075053-c6095c07.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k_20211201_075053.log.json + Paper: + Title: 'Twins: Revisiting the Design of Spatial Attention in Vision Transformers' + URL: https://arxiv.org/pdf/2104.13840.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/twins.py#L352 + Framework: PyTorch +- Name: twins_svt-s_fpn_fpnhead_8xb4-80k_ade20k-512x512 + In Collection: FPN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 44.47 + mIoU(ms+flip): 45.42 + Config: configs/twins/twins_svt-s_fpn_fpnhead_8xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 32 + Architecture: + - Twins-SVT-S + - FPN + Training Resources: 8x V100 GPUS + Memory (GB): 5.8 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141006-0a0d3317.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141006.log.json + Paper: + Title: 'Twins: Revisiting the Design of Spatial Attention in Vision Transformers' + URL: https://arxiv.org/pdf/2104.13840.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/twins.py#L352 + Framework: PyTorch +- Name: twins_svt-s_uperhead_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 46.08 + mIoU(ms+flip): 46.96 + Config: configs/twins/twins_svt-s_uperhead_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - SVT-S + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 4.93 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k/twins_svt-s_uperhead_8x2_512x512_160k_ade20k_20211130_141005-e48a2d94.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k/twins_svt-s_uperhead_8x2_512x512_160k_ade20k_20211130_141005.log.json + Paper: + Title: 'Twins: Revisiting the Design of Spatial Attention in Vision Transformers' + URL: https://arxiv.org/pdf/2104.13840.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/twins.py#L352 + Framework: PyTorch +- Name: twins_svt-b_fpn_fpnhead_8xb4-80k_ade20k-512x512 + In Collection: FPN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 46.77 + mIoU(ms+flip): 47.47 + Config: configs/twins/twins_svt-b_fpn_fpnhead_8xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 32 + Architecture: + - Twins-SVT-B + - FPN + Training Resources: 8x V100 GPUS + Memory (GB): 8.75 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_113849-88b2907c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_113849.log.json + Paper: + Title: 'Twins: Revisiting the Design of Spatial Attention in Vision Transformers' + URL: https://arxiv.org/pdf/2104.13840.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/twins.py#L352 + Framework: PyTorch +- Name: twins_svt-b_uperhead_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 48.04 + mIoU(ms+flip): 48.87 + Config: configs/twins/twins_svt-b_uperhead_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Twins-SVT-B + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 6.77 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-b_uperhead_8x2_512x512_160k_ade20k/twins_svt-b_uperhead_8x2_512x512_160k_ade20k_20211202_040826-0943a1f1.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-b_uperhead_8x2_512x512_160k_ade20k/twins_svt-b_uperhead_8x2_512x512_160k_ade20k_20211202_040826.log.json + Paper: + Title: 'Twins: Revisiting the Design of Spatial Attention in Vision Transformers' + URL: https://arxiv.org/pdf/2104.13840.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/twins.py#L352 + Framework: PyTorch +- Name: twins_svt-l_fpn_fpnhead_8xb4-80k_ade20k-512x512 + In Collection: FPN + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 46.55 + mIoU(ms+flip): 47.74 + Config: configs/twins/twins_svt-l_fpn_fpnhead_8xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 32 + Architecture: + - Twins-SVT-L + - FPN + Training Resources: 8x V100 GPUS + Memory (GB): 11.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141005-1d59bee2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141005.log.json + Paper: + Title: 'Twins: Revisiting the Design of Spatial Attention in Vision Transformers' + URL: https://arxiv.org/pdf/2104.13840.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/twins.py#L352 + Framework: PyTorch +- Name: twins_pcpvt-l_uperhead_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 49.65 + mIoU(ms+flip): 50.63 + Config: configs/twins/twins_pcpvt-l_uperhead_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - Twins-SVT-L + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 8.41 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-l_uperhead_8x2_512x512_160k_ade20k/twins_svt-l_uperhead_8x2_512x512_160k_ade20k_20211130_141005-3e2cae61.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-l_uperhead_8x2_512x512_160k_ade20k/twins_svt-l_uperhead_8x2_512x512_160k_ade20k_20211130_141005.log.json + Paper: + Title: 'Twins: Revisiting the Design of Spatial Attention in Vision Transformers' + URL: https://arxiv.org/pdf/2104.13840.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/twins.py#L352 + Framework: PyTorch diff --git a/configs/twins/twins.yml b/configs/twins/twins.yml deleted file mode 100644 index 48d25c682f..0000000000 --- a/configs/twins/twins.yml +++ /dev/null @@ -1,265 +0,0 @@ -Models: -- Name: twins_pcpvt-s_fpn_fpnhead_8xb4-80k_ade20k-512x512 - In Collection: FPN - Metadata: - backbone: PCPVT-S - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 36.83 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 6.6 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 43.26 - mIoU(ms+flip): 44.11 - Config: configs/twins/twins_pcpvt-s_fpn_fpnhead_8xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_204132-41acd132.pth -- Name: twins_pcpvt-s_uperhead_8xb4-160k_ade20k-512x512 - In Collection: UPerNet - Metadata: - backbone: PCPVT-S - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 70.22 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 9.67 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 46.04 - mIoU(ms+flip): 46.92 - Config: configs/twins/twins_pcpvt-s_uperhead_8xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k_20211201_233537-8e99c07a.pth -- Name: twins_pcpvt-b_fpn_fpnhead_8xb4-80k_ade20k-512x512 - In Collection: FPN - Metadata: - backbone: PCPVT-B - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 50.84 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 8.41 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 45.66 - mIoU(ms+flip): 46.48 - Config: configs/twins/twins_pcpvt-b_fpn_fpnhead_8xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141019-d396db72.pth -- Name: twins_pcpvt-b_uperhead_8xb2-160k_ade20k-512x512 - In Collection: UPerNet - Metadata: - backbone: PCPVT-B - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 83.06 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 6.46 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 47.91 - mIoU(ms+flip): 48.64 - Config: configs/twins/twins_pcpvt-b_uperhead_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k_20211130_141020-02094ea5.pth -- Name: twins_pcpvt-l_fpn_fpnhead_8xb4-80k_ade20k-512x512 - In Collection: FPN - Metadata: - backbone: PCPVT-L - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 69.83 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 10.78 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 45.94 - mIoU(ms+flip): 46.7 - Config: configs/twins/twins_pcpvt-l_fpn_fpnhead_8xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_105226-bc6d61dc.pth -- Name: twins_pcpvt-l_uperhead_8xb2-160k_ade20k-512x512 - In Collection: UPerNet - Metadata: - backbone: PCPVT-L - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 93.46 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 7.82 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 49.35 - mIoU(ms+flip): 50.08 - Config: configs/twins/twins_pcpvt-l_uperhead_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k_20211201_075053-c6095c07.pth -- Name: twins_svt-s_fpn_fpnhead_8xb4-80k_ade20k-512x512 - In Collection: FPN - Metadata: - backbone: SVT-S - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 33.57 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 5.8 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 44.47 - mIoU(ms+flip): 45.42 - Config: configs/twins/twins_svt-s_fpn_fpnhead_8xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141006-0a0d3317.pth -- Name: twins_svt-s_uperhead_8xb2-160k_ade20k-512x512 - In Collection: UPerNet - Metadata: - backbone: SVT-S - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 66.27 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 4.93 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 46.08 - mIoU(ms+flip): 46.96 - Config: configs/twins/twins_svt-s_uperhead_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k/twins_svt-s_uperhead_8x2_512x512_160k_ade20k_20211130_141005-e48a2d94.pth -- Name: twins_svt-b_fpn_fpnhead_8xb4-80k_ade20k-512x512 - In Collection: FPN - Metadata: - backbone: SVT-B - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 47.39 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 8.75 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 46.77 - mIoU(ms+flip): 47.47 - Config: configs/twins/twins_svt-b_fpn_fpnhead_8xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_113849-88b2907c.pth -- Name: twins_svt-b_uperhead_8xb2-160k_ade20k-512x512 - In Collection: UPerNet - Metadata: - backbone: SVT-B - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 78.99 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 6.77 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 48.04 - mIoU(ms+flip): 48.87 - Config: configs/twins/twins_svt-b_uperhead_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-b_uperhead_8x2_512x512_160k_ade20k/twins_svt-b_uperhead_8x2_512x512_160k_ade20k_20211202_040826-0943a1f1.pth -- Name: twins_svt-l_fpn_fpnhead_8xb4-80k_ade20k-512x512 - In Collection: FPN - Metadata: - backbone: SVT-L - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 56.18 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 11.2 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 46.55 - mIoU(ms+flip): 47.74 - Config: configs/twins/twins_svt-l_fpn_fpnhead_8xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141005-1d59bee2.pth -- Name: twins_pcpvt-l_uperhead_8xb2-160k_ade20k-512x512 - In Collection: UPerNet - Metadata: - backbone: SVT-L - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 93.2 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 8.41 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 49.65 - mIoU(ms+flip): 50.63 - Config: configs/twins/twins_pcpvt-l_uperhead_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-l_uperhead_8x2_512x512_160k_ade20k/twins_svt-l_uperhead_8x2_512x512_160k_ade20k_20211130_141005-3e2cae61.pth diff --git a/configs/unet/README.md b/configs/unet/README.md index f3dc261c22..7225fbbf68 100644 --- a/configs/unet/README.md +++ b/configs/unet/README.md @@ -1,6 +1,6 @@ # UNet -[U-Net: Convolutional Networks for Biomedical Image Segmentation](https://arxiv.org/abs/1505.04597) +> [U-Net: Convolutional Networks for Biomedical Image Segmentation](https://arxiv.org/abs/1505.04597) ## Introduction @@ -22,71 +22,71 @@ There is large consent that successful training of deep networks requires many t -## Citation - -```bibtex -@inproceedings{ronneberger2015u, - title={U-net: Convolutional networks for biomedical image segmentation}, - author={Ronneberger, Olaf and Fischer, Philipp and Brox, Thomas}, - booktitle={International Conference on Medical image computing and computer-assisted intervention}, - pages={234--241}, - year={2015}, - organization={Springer} -} -``` - ## Results and models ### Cityscapes -| Method | Backbone | Loss | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ---------- | ----------- | ------------- | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| UNet + FCN | UNet-S5-D16 | Cross Entropy | 512x1024 | 160000 | 17.91 | 3.05 | 69.10 | 71.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes_20211210_145204-6860854e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes_20211210_145204.log.json) | +| Method | Backbone | Loss | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ---------- | ----------- | ------------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UNet + FCN | UNet-S5-D16 | Cross Entropy | 512x1024 | 160000 | 17.91 | 3.05 | V100 | 69.10 | 71.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes_20211210_145204-6860854e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes_20211210_145204.log.json) | ### DRIVE -| Method | Backbone | Loss | Image Size | Crop Size | Stride | Lr schd | Mem (GB) | Inf time (fps) | mDice | Dice | config | download | -| ---------------- | ----------- | -------------------- | ---------- | --------- | -----: | ------- | -------- | -------------: | ----: | ----: | ---------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| UNet + FCN | UNet-S5-D16 | Cross Entropy | 584x565 | 64x64 | 42x42 | 40000 | 0.680 | - | 88.38 | 78.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/unet/unet-s5-d16_fcn_4xb4-40k_drive-64x64.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_64x64_40k_drive/fcn_unet_s5-d16_64x64_40k_drive_20201223_191051-5daf6d3b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/unet_s5-d16_64x64_40k_drive/unet_s5-d16_64x64_40k_drive-20201223_191051.log.json) | -| UNet + FCN | UNet-S5-D16 | Cross Entropy + Dice | 584x565 | 64x64 | 42x42 | 40000 | 0.582 | - | 88.71 | 79.32 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_drive-64x64.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201820-785de5c2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201820.log.json) | -| UNet + PSPNet | UNet-S5-D16 | Cross Entropy | 584x565 | 64x64 | 42x42 | 40000 | 0.599 | - | 88.35 | 78.62 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/unet/unet-s5-d16_pspnet_4xb4-40k_drive-64x64.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_64x64_40k_drive/pspnet_unet_s5-d16_64x64_40k_drive_20201227_181818-aac73387.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_64x64_40k_drive/pspnet_unet_s5-d16_64x64_40k_drive-20201227_181818.log.json) | -| UNet + PSPNet | UNet-S5-D16 | Cross Entropy + Dice | 584x565 | 64x64 | 42x42 | 40000 | 0.585 | - | 88.76 | 79.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_drive-64x64.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201821-22b3e3ba.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201821.log.json) | -| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy | 584x565 | 64x64 | 42x42 | 40000 | 0.596 | - | 88.38 | 78.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/unet/unet-s5-d16_deeplabv3_4xb4-40k_drive-64x64.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_64x64_40k_drive/deeplabv3_unet_s5-d16_64x64_40k_drive_20201226_094047-0671ff20.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_64x64_40k_drive/deeplabv3_unet_s5-d16_64x64_40k_drive-20201226_094047.log.json) | -| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy + Dice | 584x565 | 64x64 | 42x42 | 40000 | 0.582 | - | 88.84 | 79.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_drive-64x64.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201825-6bf0efd7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201825.log.json) | +| Method | Backbone | Loss | Image Size | Crop Size | Stride | Lr schd | Mem (GB) | Inf time (fps) | Device | mDice | Dice | config | download | +| ---------------- | ----------- | -------------------- | ---------- | --------- | -----: | ------- | -------- | -------------: | ------ | ----: | ----: | ------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UNet + FCN | UNet-S5-D16 | Cross Entropy | 584x565 | 64x64 | 42x42 | 40000 | 0.680 | - | V100 | 88.38 | 78.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_fcn_4xb4-40k_drive-64x64.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_64x64_40k_drive/fcn_unet_s5-d16_64x64_40k_drive_20201223_191051-5daf6d3b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/unet_s5-d16_64x64_40k_drive/unet_s5-d16_64x64_40k_drive-20201223_191051.log.json) | +| UNet + FCN | UNet-S5-D16 | Cross Entropy + Dice | 584x565 | 64x64 | 42x42 | 40000 | 0.582 | - | V100 | 88.71 | 79.32 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_drive-64x64.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201820-785de5c2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201820.log.json) | +| UNet + PSPNet | UNet-S5-D16 | Cross Entropy | 584x565 | 64x64 | 42x42 | 40000 | 0.599 | - | V100 | 88.35 | 78.62 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_pspnet_4xb4-40k_drive-64x64.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_64x64_40k_drive/pspnet_unet_s5-d16_64x64_40k_drive_20201227_181818-aac73387.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_64x64_40k_drive/pspnet_unet_s5-d16_64x64_40k_drive-20201227_181818.log.json) | +| UNet + PSPNet | UNet-S5-D16 | Cross Entropy + Dice | 584x565 | 64x64 | 42x42 | 40000 | 0.585 | - | V100 | 88.76 | 79.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_drive-64x64.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201821-22b3e3ba.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201821.log.json) | +| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy | 584x565 | 64x64 | 42x42 | 40000 | 0.596 | - | V100 | 88.38 | 78.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_deeplabv3_4xb4-40k_drive-64x64.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_64x64_40k_drive/deeplabv3_unet_s5-d16_64x64_40k_drive_20201226_094047-0671ff20.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_64x64_40k_drive/deeplabv3_unet_s5-d16_64x64_40k_drive-20201226_094047.log.json) | +| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy + Dice | 584x565 | 64x64 | 42x42 | 40000 | 0.582 | - | V100 | 88.84 | 79.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_drive-64x64.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201825-6bf0efd7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201825.log.json) | ### STARE -| Method | Backbone | Loss | Image Size | Crop Size | Stride | Lr schd | Mem (GB) | Inf time (fps) | mDice | Dice | config | download | -| ---------------- | ----------- | -------------------- | ---------- | --------- | -----: | ------- | -------- | -------------: | ----: | ----: | ------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| UNet + FCN | UNet-S5-D16 | Cross Entropy | 605x700 | 128x128 | 85x85 | 40000 | 0.968 | - | 89.78 | 81.02 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/unet/unet-s5-d16_fcn_4xb4-40k_stare-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_128x128_40k_stare/fcn_unet_s5-d16_128x128_40k_stare_20201223_191051-7d77e78b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/unet_s5-d16_128x128_40k_stare/unet_s5-d16_128x128_40k_stare-20201223_191051.log.json) | -| UNet + FCN | UNet-S5-D16 | Cross Entropy + Dice | 605x700 | 128x128 | 85x85 | 40000 | 0.986 | - | 90.65 | 82.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_stare-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201821-f75705a9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201821.log.json) | -| UNet + PSPNet | UNet-S5-D16 | Cross Entropy | 605x700 | 128x128 | 85x85 | 40000 | 0.982 | - | 89.89 | 81.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/unet/unet-s5-d16_pspnet_4xb4-40k_stare-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_stare/pspnet_unet_s5-d16_128x128_40k_stare_20201227_181818-3c2923c4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_stare/pspnet_unet_s5-d16_128x128_40k_stare-20201227_181818.log.json) | -| UNet + PSPNet | UNet-S5-D16 | Cross Entropy + Dice | 605x700 | 128x128 | 85x85 | 40000 | 1.028 | - | 90.72 | 82.84 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_stare-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201823-f1063ef7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201823.log.json) | -| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy | 605x700 | 128x128 | 85x85 | 40000 | 0.999 | - | 89.73 | 80.93 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/unet/unet-s5-d16_deeplabv3_4xb4-40k_stare-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_stare/deeplabv3_unet_s5-d16_128x128_40k_stare_20201226_094047-93dcb93c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_stare/deeplabv3_unet_s5-d16_128x128_40k_stare-20201226_094047.log.json) | -| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy + Dice | 605x700 | 128x128 | 85x85 | 40000 | 1.010 | - | 90.65 | 82.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_stare-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201825-21db614c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201825.log.json) | +| Method | Backbone | Loss | Image Size | Crop Size | Stride | Lr schd | Mem (GB) | Inf time (fps) | Device | mDice | Dice | config | download | +| ---------------- | ----------- | -------------------- | ---------- | --------- | -----: | ------- | -------- | -------------: | ------ | ----: | ----: | --------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| UNet + FCN | UNet-S5-D16 | Cross Entropy | 605x700 | 128x128 | 85x85 | 40000 | 0.968 | - | V100 | 89.78 | 81.02 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_fcn_4xb4-40k_stare-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_128x128_40k_stare/fcn_unet_s5-d16_128x128_40k_stare_20201223_191051-7d77e78b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/unet_s5-d16_128x128_40k_stare/unet_s5-d16_128x128_40k_stare-20201223_191051.log.json) | +| UNet + FCN | UNet-S5-D16 | Cross Entropy + Dice | 605x700 | 128x128 | 85x85 | 40000 | 0.986 | - | V100 | 90.65 | 82.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_stare-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201821-f75705a9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201821.log.json) | +| UNet + PSPNet | UNet-S5-D16 | Cross Entropy | 605x700 | 128x128 | 85x85 | 40000 | 0.982 | - | V100 | 89.89 | 81.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_pspnet_4xb4-40k_stare-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_stare/pspnet_unet_s5-d16_128x128_40k_stare_20201227_181818-3c2923c4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_stare/pspnet_unet_s5-d16_128x128_40k_stare-20201227_181818.log.json) | +| UNet + PSPNet | UNet-S5-D16 | Cross Entropy + Dice | 605x700 | 128x128 | 85x85 | 40000 | 1.028 | - | V100 | 90.72 | 82.84 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_stare-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201823-f1063ef7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201823.log.json) | +| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy | 605x700 | 128x128 | 85x85 | 40000 | 0.999 | - | V100 | 89.73 | 80.93 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_deeplabv3_4xb4-40k_stare-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_stare/deeplabv3_unet_s5-d16_128x128_40k_stare_20201226_094047-93dcb93c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_stare/deeplabv3_unet_s5-d16_128x128_40k_stare-20201226_094047.log.json) | +| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy + Dice | 605x700 | 128x128 | 85x85 | 40000 | 1.010 | - | V100 | 90.65 | 82.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_stare-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201825-21db614c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201825.log.json) | ### CHASE_DB1 -| Method | Backbone | Loss | Image Size | Crop Size | Stride | Lr schd | Mem (GB) | Inf time (fps) | mDice | Dice | config | download | -| ---------------- | ----------- | -------------------- | ---------- | --------- | -----: | ------- | -------- | -------------: | ----: | ----: | ---------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| UNet + FCN | UNet-S5-D16 | Cross Entropy | 960x999 | 128x128 | 85x85 | 40000 | 0.968 | - | 89.46 | 80.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/unet/unet-s5-d16_fcn_4xb4-40k_chase-db1-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_128x128_40k_chase_db1/fcn_unet_s5-d16_128x128_40k_chase_db1_20201223_191051-11543527.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/unet_s5-d16_128x128_40k_chase_db1/unet_s5-d16_128x128_40k_chase_db1-20201223_191051.log.json) | -| UNet + FCN | UNet-S5-D16 | Cross Entropy + Dice | 960x999 | 128x128 | 85x85 | 40000 | 0.986 | - | 89.52 | 80.40 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201821-1c4eb7cf.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201821.log.json) | -| UNet + PSPNet | UNet-S5-D16 | Cross Entropy | 960x999 | 128x128 | 85x85 | 40000 | 0.982 | - | 89.52 | 80.36 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/unet/unet-s5-d16_pspnet_4xb4-40k_chase-db1-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1/pspnet_unet_s5-d16_128x128_40k_chase_db1_20201227_181818-68d4e609.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1/pspnet_unet_s5-d16_128x128_40k_chase_db1-20201227_181818.log.json) | -| UNet + PSPNet | UNet-S5-D16 | Cross Entropy + Dice | 960x999 | 128x128 | 85x85 | 40000 | 1.028 | - | 89.45 | 80.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201823-c0802c4d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201823.log.json) | -| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy | 960x999 | 128x128 | 85x85 | 40000 | 0.999 | - | 89.57 | 80.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/unet/unet_s5-d16_deeplabv3_4xb4-40k_chase-db1-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1/deeplabv3_unet_s5-d16_128x128_40k_chase_db1_20201226_094047-4c5aefa3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1/deeplabv3_unet_s5-d16_128x128_40k_chase_db1-20201226_094047.log.json) | -| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy + Dice | 960x999 | 128x128 | 85x85 | 40000 | 1.010 | - | 89.49 | 80.37 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201825-4ef29df5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201825.log.json) | +| Method | Backbone | Loss | Image Size | Crop Size | Stride | Lr schd | Mem (GB) | Inf time (fps) | Device | mDice | Dice | config | download | +| ---------------- | ----------- | -------------------- | ---------- | --------- | -----: | ------- | -------- | -------------: | ------ | ----: | ----: | ------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UNet + FCN | UNet-S5-D16 | Cross Entropy | 960x999 | 128x128 | 85x85 | 40000 | 0.968 | - | V100 | 89.46 | 80.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_fcn_4xb4-40k_chase-db1-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_128x128_40k_chase_db1/fcn_unet_s5-d16_128x128_40k_chase_db1_20201223_191051-11543527.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/unet_s5-d16_128x128_40k_chase_db1/unet_s5-d16_128x128_40k_chase_db1-20201223_191051.log.json) | +| UNet + FCN | UNet-S5-D16 | Cross Entropy + Dice | 960x999 | 128x128 | 85x85 | 40000 | 0.986 | - | V100 | 89.52 | 80.40 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201821-1c4eb7cf.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201821.log.json) | +| UNet + PSPNet | UNet-S5-D16 | Cross Entropy | 960x999 | 128x128 | 85x85 | 40000 | 0.982 | - | V100 | 89.52 | 80.36 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_pspnet_4xb4-40k_chase-db1-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1/pspnet_unet_s5-d16_128x128_40k_chase_db1_20201227_181818-68d4e609.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1/pspnet_unet_s5-d16_128x128_40k_chase_db1-20201227_181818.log.json) | +| UNet + PSPNet | UNet-S5-D16 | Cross Entropy + Dice | 960x999 | 128x128 | 85x85 | 40000 | 1.028 | - | V100 | 89.45 | 80.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201823-c0802c4d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201823.log.json) | +| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy | 960x999 | 128x128 | 85x85 | 40000 | 0.999 | - | V100 | 89.57 | 80.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet_s5-d16_deeplabv3_4xb4-40k_chase-db1-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1/deeplabv3_unet_s5-d16_128x128_40k_chase_db1_20201226_094047-4c5aefa3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1/deeplabv3_unet_s5-d16_128x128_40k_chase_db1-20201226_094047.log.json) | +| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy + Dice | 960x999 | 128x128 | 85x85 | 40000 | 1.010 | - | V100 | 89.49 | 80.37 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201825-4ef29df5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201825.log.json) | ### HRF -| Method | Backbone | Loss | Image Size | Crop Size | Stride | Lr schd | Mem (GB) | Inf time (fps) | mDice | Dice | config | download | -| ---------------- | ----------- | -------------------- | ---------- | --------- | ------: | ------- | -------- | -------------: | ----: | ----: | ---------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| UNet + FCN | UNet-S5-D16 | Cross Entropy | 2336x3504 | 256x256 | 170x170 | 40000 | 2.525 | - | 88.92 | 79.45 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/unet/unet-s5-d16_fcn_4xb4-40k_hrf-256x256.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_256x256_40k_hrf/fcn_unet_s5-d16_256x256_40k_hrf_20201223_173724-d89cf1ed.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/unet_s5-d16_256x256_40k_hrf/unet_s5-d16_256x256_40k_hrf-20201223_173724.log.json) | -| UNet + FCN | UNet-S5-D16 | Cross Entropy + Dice | 2336x3504 | 256x256 | 170x170 | 40000 | 2.623 | - | 89.64 | 80.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_201821-c314da8a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_201821.log.json) | -| UNet + PSPNet | UNet-S5-D16 | Cross Entropy | 2336x3504 | 256x256 | 170x170 | 40000 | 2.588 | - | 89.24 | 80.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/unet/unet-s5-d16_pspnet_4xb4-40k_hrf-256x256.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_256x256_40k_hrf/pspnet_unet_s5-d16_256x256_40k_hrf_20201227_181818-fdb7e29b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_256x256_40k_hrf/pspnet_unet_s5-d16_256x256_40k_hrf-20201227_181818.log.json) | -| UNet + PSPNet | UNet-S5-D16 | Cross Entropy + Dice | 2336x3504 | 256x256 | 170x170 | 40000 | 2.798 | - | 89.69 | 80.96 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_201823-53d492fa.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_201823.log.json) | -| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy | 2336x3504 | 256x256 | 170x170 | 40000 | 2.604 | - | 89.32 | 80.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/unet/unet-s5-d16_deeplabv3_4xb4-40k_hrf-256x256.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf/deeplabv3_unet_s5-d16_256x256_40k_hrf_20201226_094047-3a1fdf85.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf/deeplabv3_unet_s5-d16_256x256_40k_hrf-20201226_094047.log.json) | -| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy + Dice | 2336x3504 | 256x256 | 170x170 | 40000 | 2.607 | - | 89.56 | 80.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_202032-59daf7a4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_202032.log.json) | +| Method | Backbone | Loss | Image Size | Crop Size | Stride | Lr schd | Mem (GB) | Inf time (fps) | Device | mDice | Dice | config | download | +| ---------------- | ----------- | -------------------- | ---------- | --------- | ------: | ------- | -------- | -------------: | ------ | ----: | ----: | ------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UNet + FCN | UNet-S5-D16 | Cross Entropy | 2336x3504 | 256x256 | 170x170 | 40000 | 2.525 | - | V100 | 88.92 | 79.45 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_fcn_4xb4-40k_hrf-256x256.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_256x256_40k_hrf/fcn_unet_s5-d16_256x256_40k_hrf_20201223_173724-d89cf1ed.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/unet_s5-d16_256x256_40k_hrf/unet_s5-d16_256x256_40k_hrf-20201223_173724.log.json) | +| UNet + FCN | UNet-S5-D16 | Cross Entropy + Dice | 2336x3504 | 256x256 | 170x170 | 40000 | 2.623 | - | V100 | 89.64 | 80.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_201821-c314da8a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_201821.log.json) | +| UNet + PSPNet | UNet-S5-D16 | Cross Entropy | 2336x3504 | 256x256 | 170x170 | 40000 | 2.588 | - | V100 | 89.24 | 80.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_pspnet_4xb4-40k_hrf-256x256.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_256x256_40k_hrf/pspnet_unet_s5-d16_256x256_40k_hrf_20201227_181818-fdb7e29b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_256x256_40k_hrf/pspnet_unet_s5-d16_256x256_40k_hrf-20201227_181818.log.json) | +| UNet + PSPNet | UNet-S5-D16 | Cross Entropy + Dice | 2336x3504 | 256x256 | 170x170 | 40000 | 2.798 | - | V100 | 89.69 | 80.96 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_201823-53d492fa.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_201823.log.json) | +| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy | 2336x3504 | 256x256 | 170x170 | 40000 | 2.604 | - | V100 | 89.32 | 80.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_deeplabv3_4xb4-40k_hrf-256x256.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf/deeplabv3_unet_s5-d16_256x256_40k_hrf_20201226_094047-3a1fdf85.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf/deeplabv3_unet_s5-d16_256x256_40k_hrf-20201226_094047.log.json) | +| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy + Dice | 2336x3504 | 256x256 | 170x170 | 40000 | 2.607 | - | V100 | 89.56 | 80.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_202032-59daf7a4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_202032.log.json) | Note: - In `DRIVE`, `STARE`, `CHASE_DB1`, and `HRF` dataset, `mDice` is mean dice of background and vessel, while `Dice` is dice metric of vessel(foreground) only. + +## Citation + +```bibtex +@inproceedings{ronneberger2015u, + title={U-net: Convolutional networks for biomedical image segmentation}, + author={Ronneberger, Olaf and Fischer, Philipp and Brox, Thomas}, + booktitle={International Conference on Medical image computing and computer-assisted intervention}, + pages={234--241}, + year={2015}, + organization={Springer} +} +``` diff --git a/configs/unet/metafile.yaml b/configs/unet/metafile.yaml new file mode 100644 index 0000000000..1eafbc6d08 --- /dev/null +++ b/configs/unet/metafile.yaml @@ -0,0 +1,642 @@ +Collections: +- Name: UNet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - DRIVE + - STARE + - CHASE_DB1 + - HRF + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + README: configs/unet/README.md + Frameworks: + - PyTorch +Models: +- Name: unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 69.1 + mIoU(ms+flip): 71.05 + Config: configs/unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 17.91 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes_20211210_145204-6860854e.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes_20211210_145204.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_fcn_4xb4-40k_drive-64x64 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: DRIVE + Metrics: + mDice: 88.38 + Dice: 78.67 + Config: configs/unet/unet-s5-d16_fcn_4xb4-40k_drive-64x64.py + Metadata: + Training Data: DRIVE + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 0.68 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_64x64_40k_drive/fcn_unet_s5-d16_64x64_40k_drive_20201223_191051-5daf6d3b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/unet_s5-d16_64x64_40k_drive/unet_s5-d16_64x64_40k_drive-20201223_191051.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_drive-64x64 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: DRIVE + Metrics: + mDice: 88.71 + Dice: 79.32 + Config: configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_drive-64x64.py + Metadata: + Training Data: DRIVE + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 0.582 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201820-785de5c2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201820.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_pspnet_4xb4-40k_drive-64x64 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: DRIVE + Metrics: + mDice: 88.35 + Dice: 78.62 + Config: configs/unet/unet-s5-d16_pspnet_4xb4-40k_drive-64x64.py + Metadata: + Training Data: DRIVE + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 0.599 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_64x64_40k_drive/pspnet_unet_s5-d16_64x64_40k_drive_20201227_181818-aac73387.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_64x64_40k_drive/pspnet_unet_s5-d16_64x64_40k_drive-20201227_181818.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_drive-64x64 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: DRIVE + Metrics: + mDice: 88.76 + Dice: 79.42 + Config: configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_drive-64x64.py + Metadata: + Training Data: DRIVE + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 0.585 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201821-22b3e3ba.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201821.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_deeplabv3_4xb4-40k_drive-64x64 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: DRIVE + Metrics: + mDice: 88.38 + Dice: 78.69 + Config: configs/unet/unet-s5-d16_deeplabv3_4xb4-40k_drive-64x64.py + Metadata: + Training Data: DRIVE + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 0.596 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_64x64_40k_drive/deeplabv3_unet_s5-d16_64x64_40k_drive_20201226_094047-0671ff20.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_64x64_40k_drive/deeplabv3_unet_s5-d16_64x64_40k_drive-20201226_094047.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_drive-64x64 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: DRIVE + Metrics: + mDice: 88.84 + Dice: 79.56 + Config: configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_drive-64x64.py + Metadata: + Training Data: DRIVE + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 0.582 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201825-6bf0efd7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201825.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_fcn_4xb4-40k_stare-128x128 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: STARE + Metrics: + mDice: 89.78 + Dice: 81.02 + Config: configs/unet/unet-s5-d16_fcn_4xb4-40k_stare-128x128.py + Metadata: + Training Data: STARE + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 0.968 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_128x128_40k_stare/fcn_unet_s5-d16_128x128_40k_stare_20201223_191051-7d77e78b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/unet_s5-d16_128x128_40k_stare/unet_s5-d16_128x128_40k_stare-20201223_191051.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_stare-128x128 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: STARE + Metrics: + mDice: 90.65 + Dice: 82.7 + Config: configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_stare-128x128.py + Metadata: + Training Data: STARE + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 0.986 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201821-f75705a9.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201821.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_pspnet_4xb4-40k_stare-128x128 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: STARE + Metrics: + mDice: 89.89 + Dice: 81.22 + Config: configs/unet/unet-s5-d16_pspnet_4xb4-40k_stare-128x128.py + Metadata: + Training Data: STARE + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 0.982 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_stare/pspnet_unet_s5-d16_128x128_40k_stare_20201227_181818-3c2923c4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_stare/pspnet_unet_s5-d16_128x128_40k_stare-20201227_181818.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_stare-128x128 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: STARE + Metrics: + mDice: 90.72 + Dice: 82.84 + Config: configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_stare-128x128.py + Metadata: + Training Data: STARE + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 1.028 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201823-f1063ef7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201823.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_deeplabv3_4xb4-40k_stare-128x128 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: STARE + Metrics: + mDice: 89.73 + Dice: 80.93 + Config: configs/unet/unet-s5-d16_deeplabv3_4xb4-40k_stare-128x128.py + Metadata: + Training Data: STARE + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 0.999 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_stare/deeplabv3_unet_s5-d16_128x128_40k_stare_20201226_094047-93dcb93c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_stare/deeplabv3_unet_s5-d16_128x128_40k_stare-20201226_094047.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_stare-128x128 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: STARE + Metrics: + mDice: 90.65 + Dice: 82.71 + Config: configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_stare-128x128.py + Metadata: + Training Data: STARE + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 1.01 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201825-21db614c.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201825.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_fcn_4xb4-40k_chase-db1-128x128 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: CHASE_DB1 + Metrics: + mDice: 89.46 + Dice: 80.24 + Config: configs/unet/unet-s5-d16_fcn_4xb4-40k_chase-db1-128x128.py + Metadata: + Training Data: CHASE_DB1 + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 0.968 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_128x128_40k_chase_db1/fcn_unet_s5-d16_128x128_40k_chase_db1_20201223_191051-11543527.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/unet_s5-d16_128x128_40k_chase_db1/unet_s5-d16_128x128_40k_chase_db1-20201223_191051.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: CHASE_DB1 + Metrics: + mDice: 89.52 + Dice: 80.4 + Config: configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128.py + Metadata: + Training Data: CHASE_DB1 + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 0.986 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201821-1c4eb7cf.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201821.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_pspnet_4xb4-40k_chase-db1-128x128 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: CHASE_DB1 + Metrics: + mDice: 89.52 + Dice: 80.36 + Config: configs/unet/unet-s5-d16_pspnet_4xb4-40k_chase-db1-128x128.py + Metadata: + Training Data: CHASE_DB1 + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 0.982 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1/pspnet_unet_s5-d16_128x128_40k_chase_db1_20201227_181818-68d4e609.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1/pspnet_unet_s5-d16_128x128_40k_chase_db1-20201227_181818.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: CHASE_DB1 + Metrics: + mDice: 89.45 + Dice: 80.28 + Config: configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128.py + Metadata: + Training Data: CHASE_DB1 + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 1.028 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201823-c0802c4d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201823.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet_s5-d16_deeplabv3_4xb4-40k_chase-db1-128x128 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: CHASE_DB1 + Metrics: + mDice: 89.57 + Dice: 80.47 + Config: configs/unet/unet_s5-d16_deeplabv3_4xb4-40k_chase-db1-128x128.py + Metadata: + Training Data: CHASE_DB1 + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 0.999 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1/deeplabv3_unet_s5-d16_128x128_40k_chase_db1_20201226_094047-4c5aefa3.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1/deeplabv3_unet_s5-d16_128x128_40k_chase_db1-20201226_094047.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: CHASE_DB1 + Metrics: + mDice: 89.49 + Dice: 80.37 + Config: configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128.py + Metadata: + Training Data: CHASE_DB1 + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 1.01 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201825-4ef29df5.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201825.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_fcn_4xb4-40k_hrf-256x256 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: HRF + Metrics: + mDice: 88.92 + Dice: 79.45 + Config: configs/unet/unet-s5-d16_fcn_4xb4-40k_hrf-256x256.py + Metadata: + Training Data: HRF + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 2.525 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_256x256_40k_hrf/fcn_unet_s5-d16_256x256_40k_hrf_20201223_173724-d89cf1ed.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/unet_s5-d16_256x256_40k_hrf/unet_s5-d16_256x256_40k_hrf-20201223_173724.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: HRF + Metrics: + mDice: 89.64 + Dice: 80.87 + Config: configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256.py + Metadata: + Training Data: HRF + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - FCN + Training Resources: 4x V100 GPUS + Memory (GB): 2.623 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_201821-c314da8a.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_201821.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_pspnet_4xb4-40k_hrf-256x256 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: HRF + Metrics: + mDice: 89.24 + Dice: 80.07 + Config: configs/unet/unet-s5-d16_pspnet_4xb4-40k_hrf-256x256.py + Metadata: + Training Data: HRF + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 2.588 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_256x256_40k_hrf/pspnet_unet_s5-d16_256x256_40k_hrf_20201227_181818-fdb7e29b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_256x256_40k_hrf/pspnet_unet_s5-d16_256x256_40k_hrf-20201227_181818.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: HRF + Metrics: + mDice: 89.69 + Dice: 80.96 + Config: configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256.py + Metadata: + Training Data: HRF + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - PSPNet + Training Resources: 4x V100 GPUS + Memory (GB): 2.798 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_201823-53d492fa.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_201823.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_deeplabv3_4xb4-40k_hrf-256x256 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: HRF + Metrics: + mDice: 89.32 + Dice: 80.21 + Config: configs/unet/unet-s5-d16_deeplabv3_4xb4-40k_hrf-256x256.py + Metadata: + Training Data: HRF + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 2.604 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf/deeplabv3_unet_s5-d16_256x256_40k_hrf_20201226_094047-3a1fdf85.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf/deeplabv3_unet_s5-d16_256x256_40k_hrf-20201226_094047.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch +- Name: unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256 + In Collection: UNet + Results: + Task: Semantic Segmentation + Dataset: HRF + Metrics: + mDice: 89.56 + Dice: 80.71 + Config: configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256.py + Metadata: + Training Data: HRF + Batch Size: 16 + Architecture: + - UNet-S5-D16 + - UNet + - DeepLabV3 + Training Resources: 4x V100 GPUS + Memory (GB): 2.607 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_202032-59daf7a4.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_202032.log.json + Paper: + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + URL: https://arxiv.org/abs/1505.04597 + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Framework: PyTorch diff --git a/configs/unet/unet.yml b/configs/unet/unet.yml deleted file mode 100644 index 4a01ce33e2..0000000000 --- a/configs/unet/unet.yml +++ /dev/null @@ -1,377 +0,0 @@ -Collections: -- Name: UNet - Metadata: - Training Data: - - Cityscapes - - DRIVE - - STARE - - CHASE_DB1 - - HRF - Paper: - URL: https://arxiv.org/abs/1505.04597 - Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' - README: configs/unet/README.md - Code: - URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 - Version: v0.17.0 - Converted From: - Code: http://lmb.informatik.uni-freiburg.de/people/ronneber/u-net -Models: -- Name: unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024 - In Collection: UNet - Metadata: - backbone: UNet-S5-D16 - crop size: (512,1024) - lr schd: 160000 - inference time (ms/im): - - value: 327.87 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 17.91 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 69.1 - mIoU(ms+flip): 71.05 - Config: configs/unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes_20211210_145204-6860854e.pth -- Name: unet-s5-d16_fcn_4xb4-40k_drive-64x64 - In Collection: UNet - Metadata: - backbone: UNet-S5-D16 - crop size: (64,64) - lr schd: 40000 - Training Memory (GB): 0.68 - Results: - - Task: Semantic Segmentation - Dataset: DRIVE - Metrics: - Dice: 78.67 - Config: configs/unet/unet-s5-d16_fcn_4xb4-40k_drive-64x64.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_64x64_40k_drive/fcn_unet_s5-d16_64x64_40k_drive_20201223_191051-5daf6d3b.pth -- Name: unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_drive-64x64 - In Collection: UNet - Metadata: - backbone: UNet-S5-D16 - crop size: (64,64) - lr schd: 40000 - Training Memory (GB): 0.582 - Results: - - Task: Semantic Segmentation - Dataset: DRIVE - Metrics: - Dice: 79.32 - Config: configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_drive-64x64.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201820-785de5c2.pth -- Name: unet-s5-d16_pspnet_4xb4-40k_drive-64x64 - In Collection: UNet - Metadata: - backbone: UNet-S5-D16 - crop size: (64,64) - lr schd: 40000 - Training Memory (GB): 0.599 - Results: - - Task: Semantic Segmentation - Dataset: DRIVE - Metrics: - Dice: 78.62 - Config: configs/unet/unet-s5-d16_pspnet_4xb4-40k_drive-64x64.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_64x64_40k_drive/pspnet_unet_s5-d16_64x64_40k_drive_20201227_181818-aac73387.pth -- Name: unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_drive-64x64 - In Collection: UNet - Metadata: - backbone: UNet-S5-D16 - crop size: (64,64) - lr schd: 40000 - Training Memory (GB): 0.585 - Results: - - Task: Semantic Segmentation - Dataset: DRIVE - Metrics: - Dice: 79.42 - Config: configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_drive-64x64.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201821-22b3e3ba.pth -- Name: unet-s5-d16_deeplabv3_4xb4-40k_drive-64x64 - In Collection: UNet - Metadata: - backbone: UNet-S5-D16 - crop size: (64,64) - lr schd: 40000 - Training Memory (GB): 0.596 - Results: - - Task: Semantic Segmentation - Dataset: DRIVE - Metrics: - Dice: 78.69 - Config: configs/unet/unet-s5-d16_deeplabv3_4xb4-40k_drive-64x64.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_64x64_40k_drive/deeplabv3_unet_s5-d16_64x64_40k_drive_20201226_094047-0671ff20.pth -- Name: unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_drive-64x64 - In Collection: UNet - Metadata: - backbone: UNet-S5-D16 - crop size: (64,64) - lr schd: 40000 - Training Memory (GB): 0.582 - Results: - - Task: Semantic Segmentation - Dataset: DRIVE - Metrics: - Dice: 79.56 - Config: configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_drive-64x64.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201825-6bf0efd7.pth -- Name: unet-s5-d16_fcn_4xb4-40k_stare-128x128 - In Collection: UNet - Metadata: - backbone: UNet-S5-D16 - crop size: (128,128) - lr schd: 40000 - Training Memory (GB): 0.968 - Results: - - Task: Semantic Segmentation - Dataset: STARE - Metrics: - Dice: 81.02 - Config: configs/unet/unet-s5-d16_fcn_4xb4-40k_stare-128x128.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_128x128_40k_stare/fcn_unet_s5-d16_128x128_40k_stare_20201223_191051-7d77e78b.pth -- Name: unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_stare-128x128 - In Collection: UNet - Metadata: - backbone: UNet-S5-D16 - crop size: (128,128) - lr schd: 40000 - Training Memory (GB): 0.986 - Results: - - Task: Semantic Segmentation - Dataset: STARE - Metrics: - Dice: 82.7 - Config: configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_stare-128x128.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201821-f75705a9.pth -- Name: unet-s5-d16_pspnet_4xb4-40k_stare-128x128 - In Collection: UNet - Metadata: - backbone: UNet-S5-D16 - crop size: (128,128) - lr schd: 40000 - Training Memory (GB): 0.982 - Results: - - Task: Semantic Segmentation - Dataset: STARE - Metrics: - Dice: 81.22 - Config: configs/unet/unet-s5-d16_pspnet_4xb4-40k_stare-128x128.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_stare/pspnet_unet_s5-d16_128x128_40k_stare_20201227_181818-3c2923c4.pth -- Name: unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_stare-128x128 - In Collection: UNet - Metadata: - backbone: UNet-S5-D16 - crop size: (128,128) - lr schd: 40000 - Training Memory (GB): 1.028 - Results: - - Task: Semantic Segmentation - Dataset: STARE - Metrics: - Dice: 82.84 - Config: configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_stare-128x128.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201823-f1063ef7.pth -- Name: unet-s5-d16_deeplabv3_4xb4-40k_stare-128x128 - In Collection: UNet - Metadata: - backbone: UNet-S5-D16 - crop size: (128,128) - lr schd: 40000 - Training Memory (GB): 0.999 - Results: - - Task: Semantic Segmentation - Dataset: STARE - Metrics: - Dice: 80.93 - Config: configs/unet/unet-s5-d16_deeplabv3_4xb4-40k_stare-128x128.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_stare/deeplabv3_unet_s5-d16_128x128_40k_stare_20201226_094047-93dcb93c.pth -- Name: unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_stare-128x128 - In Collection: UNet - Metadata: - backbone: UNet-S5-D16 - crop size: (128,128) - lr schd: 40000 - Training Memory (GB): 1.01 - Results: - - Task: Semantic Segmentation - Dataset: STARE - Metrics: - Dice: 82.71 - Config: configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_stare-128x128.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201825-21db614c.pth -- Name: unet-s5-d16_fcn_4xb4-40k_chase-db1-128x128 - In Collection: UNet - Metadata: - backbone: UNet-S5-D16 - crop size: (128,128) - lr schd: 40000 - Training Memory (GB): 0.968 - Results: - - Task: Semantic Segmentation - Dataset: CHASE_DB1 - Metrics: - Dice: 80.24 - Config: configs/unet/unet-s5-d16_fcn_4xb4-40k_chase-db1-128x128.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_128x128_40k_chase_db1/fcn_unet_s5-d16_128x128_40k_chase_db1_20201223_191051-11543527.pth -- Name: unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128 - In Collection: UNet - Metadata: - backbone: UNet-S5-D16 - crop size: (128,128) - lr schd: 40000 - Training Memory (GB): 0.986 - Results: - - Task: Semantic Segmentation - Dataset: CHASE_DB1 - Metrics: - Dice: 80.4 - Config: configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201821-1c4eb7cf.pth -- Name: unet-s5-d16_pspnet_4xb4-40k_chase-db1-128x128 - In Collection: UNet - Metadata: - backbone: UNet-S5-D16 - crop size: (128,128) - lr schd: 40000 - Training Memory (GB): 0.982 - Results: - - Task: Semantic Segmentation - Dataset: CHASE_DB1 - Metrics: - Dice: 80.36 - Config: configs/unet/unet-s5-d16_pspnet_4xb4-40k_chase-db1-128x128.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1/pspnet_unet_s5-d16_128x128_40k_chase_db1_20201227_181818-68d4e609.pth -- Name: unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128 - In Collection: UNet - Metadata: - backbone: UNet-S5-D16 - crop size: (128,128) - lr schd: 40000 - Training Memory (GB): 1.028 - Results: - - Task: Semantic Segmentation - Dataset: CHASE_DB1 - Metrics: - Dice: 80.28 - Config: configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201823-c0802c4d.pth -- Name: unet_s5-d16_deeplabv3_4xb4-40k_chase-db1-128x128 - In Collection: UNet - Metadata: - backbone: UNet-S5-D16 - crop size: (128,128) - lr schd: 40000 - Training Memory (GB): 0.999 - Results: - - Task: Semantic Segmentation - Dataset: CHASE_DB1 - Metrics: - Dice: 80.47 - Config: configs/unet/unet_s5-d16_deeplabv3_4xb4-40k_chase-db1-128x128.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1/deeplabv3_unet_s5-d16_128x128_40k_chase_db1_20201226_094047-4c5aefa3.pth -- Name: unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128 - In Collection: UNet - Metadata: - backbone: UNet-S5-D16 - crop size: (128,128) - lr schd: 40000 - Training Memory (GB): 1.01 - Results: - - Task: Semantic Segmentation - Dataset: CHASE_DB1 - Metrics: - Dice: 80.37 - Config: configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_chase-db1-128x128.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201825-4ef29df5.pth -- Name: unet-s5-d16_fcn_4xb4-40k_hrf-256x256 - In Collection: UNet - Metadata: - backbone: UNet-S5-D16 - crop size: (256,256) - lr schd: 40000 - Training Memory (GB): 2.525 - Results: - - Task: Semantic Segmentation - Dataset: HRF - Metrics: - Dice: 79.45 - Config: configs/unet/unet-s5-d16_fcn_4xb4-40k_hrf-256x256.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_256x256_40k_hrf/fcn_unet_s5-d16_256x256_40k_hrf_20201223_173724-d89cf1ed.pth -- Name: unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256 - In Collection: UNet - Metadata: - backbone: UNet-S5-D16 - crop size: (256,256) - lr schd: 40000 - Training Memory (GB): 2.623 - Results: - - Task: Semantic Segmentation - Dataset: HRF - Metrics: - Dice: 80.87 - Config: configs/unet/unet-s5-d16_fcn_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_201821-c314da8a.pth -- Name: unet-s5-d16_pspnet_4xb4-40k_hrf-256x256 - In Collection: UNet - Metadata: - backbone: UNet-S5-D16 - crop size: (256,256) - lr schd: 40000 - Training Memory (GB): 2.588 - Results: - - Task: Semantic Segmentation - Dataset: HRF - Metrics: - Dice: 80.07 - Config: configs/unet/unet-s5-d16_pspnet_4xb4-40k_hrf-256x256.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_256x256_40k_hrf/pspnet_unet_s5-d16_256x256_40k_hrf_20201227_181818-fdb7e29b.pth -- Name: unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256 - In Collection: UNet - Metadata: - backbone: UNet-S5-D16 - crop size: (256,256) - lr schd: 40000 - Training Memory (GB): 2.798 - Results: - - Task: Semantic Segmentation - Dataset: HRF - Metrics: - Dice: 80.96 - Config: configs/unet/unet-s5-d16_pspnet_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_201823-53d492fa.pth -- Name: unet-s5-d16_deeplabv3_4xb4-40k_hrf-256x256 - In Collection: UNet - Metadata: - backbone: UNet-S5-D16 - crop size: (256,256) - lr schd: 40000 - Training Memory (GB): 2.604 - Results: - - Task: Semantic Segmentation - Dataset: HRF - Metrics: - Dice: 80.21 - Config: configs/unet/unet-s5-d16_deeplabv3_4xb4-40k_hrf-256x256.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf/deeplabv3_unet_s5-d16_256x256_40k_hrf_20201226_094047-3a1fdf85.pth -- Name: unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256 - In Collection: UNet - Metadata: - backbone: UNet-S5-D16 - crop size: (256,256) - lr schd: 40000 - Training Memory (GB): 2.607 - Results: - - Task: Semantic Segmentation - Dataset: HRF - Metrics: - Dice: 80.71 - Config: configs/unet/unet-s5-d16_deeplabv3_4xb4-ce-1.0-dice-3.0-40k_hrf-256x256.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_202032-59daf7a4.pth diff --git a/configs/upernet/README.md b/configs/upernet/README.md index e4a5ee4381..c2babbd2a7 100644 --- a/configs/upernet/README.md +++ b/configs/upernet/README.md @@ -1,6 +1,6 @@ # UPerNet -[Unified Perceptual Parsing for Scene Understanding](https://arxiv.org/pdf/1807.10221.pdf) +> [Unified Perceptual Parsing for Scene Understanding](https://arxiv.org/pdf/1807.10221.pdf) ## Introduction @@ -22,6 +22,39 @@ Humans recognize the visual world at multiple levels: we effortlessly categorize +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UPerNet | R-50 | 512x1024 | 40000 | 6.4 | 4.25 | V100 | 77.10 | 78.37 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r50_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_40k_cityscapes/upernet_r50_512x1024_40k_cityscapes_20200605_094827-aa54cb54.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_40k_cityscapes/upernet_r50_512x1024_40k_cityscapes_20200605_094827.log.json) | +| UPerNet | R-101 | 512x1024 | 40000 | 7.4 | 3.79 | V100 | 78.69 | 80.11 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r101_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_40k_cityscapes/upernet_r101_512x1024_40k_cityscapes_20200605_094933-ebce3b10.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_40k_cityscapes/upernet_r101_512x1024_40k_cityscapes_20200605_094933.log.json) | +| UPerNet | R-50 | 769x769 | 40000 | 7.2 | 1.76 | V100 | 77.98 | 79.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r50_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_40k_cityscapes/upernet_r50_769x769_40k_cityscapes_20200530_033048-92d21539.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_40k_cityscapes/upernet_r50_769x769_40k_cityscapes_20200530_033048.log.json) | +| UPerNet | R-101 | 769x769 | 40000 | 8.4 | 1.56 | V100 | 79.03 | 80.77 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r101_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_40k_cityscapes/upernet_r101_769x769_40k_cityscapes_20200530_040819-83c95d01.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_40k_cityscapes/upernet_r101_769x769_40k_cityscapes_20200530_040819.log.json) | +| UPerNet | R-50 | 512x1024 | 80000 | - | - | V100 | 78.19 | 79.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r50_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_80k_cityscapes/upernet_r50_512x1024_80k_cityscapes_20200607_052207-848beca8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_80k_cityscapes/upernet_r50_512x1024_80k_cityscapes_20200607_052207.log.json) | +| UPerNet | R-101 | 512x1024 | 80000 | - | - | V100 | 79.40 | 80.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r101_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_80k_cityscapes/upernet_r101_512x1024_80k_cityscapes_20200607_002403-f05f2345.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_80k_cityscapes/upernet_r101_512x1024_80k_cityscapes_20200607_002403.log.json) | +| UPerNet | R-50 | 769x769 | 80000 | - | - | V100 | 79.39 | 80.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r50_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_80k_cityscapes/upernet_r50_769x769_80k_cityscapes_20200607_005107-82ae7d15.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_80k_cityscapes/upernet_r50_769x769_80k_cityscapes_20200607_005107.log.json) | +| UPerNet | R-101 | 769x769 | 80000 | - | - | V100 | 80.10 | 81.49 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r101_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_80k_cityscapes/upernet_r101_769x769_80k_cityscapes_20200607_001014-082fc334.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_80k_cityscapes/upernet_r101_769x769_80k_cityscapes_20200607_001014.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UPerNet | R-50 | 512x512 | 80000 | 8.1 | 23.40 | V100 | 40.70 | 41.81 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r50_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_80k_ade20k/upernet_r50_512x512_80k_ade20k_20200614_144127-ecc8377b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_80k_ade20k/upernet_r50_512x512_80k_ade20k_20200614_144127.log.json) | +| UPerNet | R-101 | 512x512 | 80000 | 9.1 | 20.34 | V100 | 42.91 | 43.96 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r101_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_80k_ade20k/upernet_r101_512x512_80k_ade20k_20200614_185117-32e4db94.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_80k_ade20k/upernet_r101_512x512_80k_ade20k_20200614_185117.log.json) | +| UPerNet | R-50 | 512x512 | 160000 | - | - | V100 | 42.05 | 42.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r50_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_160k_ade20k/upernet_r50_512x512_160k_ade20k_20200615_184328-8534de8d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_160k_ade20k/upernet_r50_512x512_160k_ade20k_20200615_184328.log.json) | +| UPerNet | R-101 | 512x512 | 160000 | - | - | V100 | 43.82 | 44.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r101_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_160k_ade20k/upernet_r101_512x512_160k_ade20k_20200615_161951-91b32684.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_160k_ade20k/upernet_r101_512x512_160k_ade20k_20200615_161951.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------- | -------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UPerNet | R-50 | 512x512 | 20000 | 6.4 | 23.17 | V100 | 74.82 | 76.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r50_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_20k_voc12aug/upernet_r50_512x512_20k_voc12aug_20200617_165330-5b5890a7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_20k_voc12aug/upernet_r50_512x512_20k_voc12aug_20200617_165330.log.json) | +| UPerNet | R-101 | 512x512 | 20000 | 7.5 | 19.98 | V100 | 77.10 | 78.29 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r101_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_20k_voc12aug/upernet_r101_512x512_20k_voc12aug_20200617_165629-f14e7f27.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_20k_voc12aug/upernet_r101_512x512_20k_voc12aug_20200617_165629.log.json) | +| UPerNet | R-50 | 512x512 | 40000 | - | - | V100 | 75.92 | 77.44 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r50_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_40k_voc12aug/upernet_r50_512x512_40k_voc12aug_20200613_162257-ca9bcc6b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_40k_voc12aug/upernet_r50_512x512_40k_voc12aug_20200613_162257.log.json) | +| UPerNet | R-101 | 512x512 | 40000 | - | - | V100 | 77.43 | 78.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet/upernet_r101_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_40k_voc12aug/upernet_r101_512x512_40k_voc12aug_20200613_163549-e26476ac.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_40k_voc12aug/upernet_r101_512x512_40k_voc12aug_20200613_163549.log.json) | + ## Citation ```bibtex @@ -33,36 +66,3 @@ Humans recognize the visual world at multiple levels: we effortlessly categorize year={2018} } ``` - -## Results and models - -### Cityscapes - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| UPerNet | R-50 | 512x1024 | 40000 | 6.4 | 4.25 | 77.10 | 78.37 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/upernet/upernet_r50_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_40k_cityscapes/upernet_r50_512x1024_40k_cityscapes_20200605_094827-aa54cb54.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_40k_cityscapes/upernet_r50_512x1024_40k_cityscapes_20200605_094827.log.json) | -| UPerNet | R-101 | 512x1024 | 40000 | 7.4 | 3.79 | 78.69 | 80.11 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/upernet/upernet_r101_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_40k_cityscapes/upernet_r101_512x1024_40k_cityscapes_20200605_094933-ebce3b10.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_40k_cityscapes/upernet_r101_512x1024_40k_cityscapes_20200605_094933.log.json) | -| UPerNet | R-50 | 769x769 | 40000 | 7.2 | 1.76 | 77.98 | 79.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/upernet/upernet_r50_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_40k_cityscapes/upernet_r50_769x769_40k_cityscapes_20200530_033048-92d21539.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_40k_cityscapes/upernet_r50_769x769_40k_cityscapes_20200530_033048.log.json) | -| UPerNet | R-101 | 769x769 | 40000 | 8.4 | 1.56 | 79.03 | 80.77 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/upernet/upernet_r101_4xb2-40k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_40k_cityscapes/upernet_r101_769x769_40k_cityscapes_20200530_040819-83c95d01.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_40k_cityscapes/upernet_r101_769x769_40k_cityscapes_20200530_040819.log.json) | -| UPerNet | R-50 | 512x1024 | 80000 | - | - | 78.19 | 79.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/upernet/upernet_r50_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_80k_cityscapes/upernet_r50_512x1024_80k_cityscapes_20200607_052207-848beca8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_80k_cityscapes/upernet_r50_512x1024_80k_cityscapes_20200607_052207.log.json) | -| UPerNet | R-101 | 512x1024 | 80000 | - | - | 79.40 | 80.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/upernet/upernet_r101_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_80k_cityscapes/upernet_r101_512x1024_80k_cityscapes_20200607_002403-f05f2345.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_80k_cityscapes/upernet_r101_512x1024_80k_cityscapes_20200607_002403.log.json) | -| UPerNet | R-50 | 769x769 | 80000 | - | - | 79.39 | 80.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/upernet/upernet_r50_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_80k_cityscapes/upernet_r50_769x769_80k_cityscapes_20200607_005107-82ae7d15.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_80k_cityscapes/upernet_r50_769x769_80k_cityscapes_20200607_005107.log.json) | -| UPerNet | R-101 | 769x769 | 80000 | - | - | 80.10 | 81.49 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/upernet/upernet_r101_4xb2-80k_cityscapes-769x769.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_80k_cityscapes/upernet_r101_769x769_80k_cityscapes_20200607_001014-082fc334.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_80k_cityscapes/upernet_r101_769x769_80k_cityscapes_20200607_001014.log.json) | - -### ADE20K - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| UPerNet | R-50 | 512x512 | 80000 | 8.1 | 23.40 | 40.70 | 41.81 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/upernet/upernet_r50_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_80k_ade20k/upernet_r50_512x512_80k_ade20k_20200614_144127-ecc8377b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_80k_ade20k/upernet_r50_512x512_80k_ade20k_20200614_144127.log.json) | -| UPerNet | R-101 | 512x512 | 80000 | 9.1 | 20.34 | 42.91 | 43.96 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/upernet/upernet_r101_4xb4-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_80k_ade20k/upernet_r101_512x512_80k_ade20k_20200614_185117-32e4db94.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_80k_ade20k/upernet_r101_512x512_80k_ade20k_20200614_185117.log.json) | -| UPerNet | R-50 | 512x512 | 160000 | - | - | 42.05 | 42.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/upernet/upernet_r50_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_160k_ade20k/upernet_r50_512x512_160k_ade20k_20200615_184328-8534de8d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_160k_ade20k/upernet_r50_512x512_160k_ade20k_20200615_184328.log.json) | -| UPerNet | R-101 | 512x512 | 160000 | - | - | 43.82 | 44.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/upernet/upernet_r101_4xb4-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_160k_ade20k/upernet_r101_512x512_160k_ade20k_20200615_161951-91b32684.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_160k_ade20k/upernet_r101_512x512_160k_ade20k_20200615_161951.log.json) | - -### Pascal VOC 2012 + Aug - -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| UPerNet | R-50 | 512x512 | 20000 | 6.4 | 23.17 | 74.82 | 76.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/upernet/upernet_r50_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_20k_voc12aug/upernet_r50_512x512_20k_voc12aug_20200617_165330-5b5890a7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_20k_voc12aug/upernet_r50_512x512_20k_voc12aug_20200617_165330.log.json) | -| UPerNet | R-101 | 512x512 | 20000 | 7.5 | 19.98 | 77.10 | 78.29 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/upernet/upernet_r101_4xb4-20k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_20k_voc12aug/upernet_r101_512x512_20k_voc12aug_20200617_165629-f14e7f27.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_20k_voc12aug/upernet_r101_512x512_20k_voc12aug_20200617_165629.log.json) | -| UPerNet | R-50 | 512x512 | 40000 | - | - | 75.92 | 77.44 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/upernet/upernet_r50_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_40k_voc12aug/upernet_r50_512x512_40k_voc12aug_20200613_162257-ca9bcc6b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_40k_voc12aug/upernet_r50_512x512_40k_voc12aug_20200613_162257.log.json) | -| UPerNet | R-101 | 512x512 | 40000 | - | - | 77.43 | 78.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/upernet/upernet_r101_4xb4-40k_voc12aug-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_40k_voc12aug/upernet_r101_512x512_40k_voc12aug_20200613_163549-e26476ac.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_40k_voc12aug/upernet_r101_512x512_40k_voc12aug_20200613_163549.log.json) | diff --git a/configs/upernet/metafile.yaml b/configs/upernet/metafile.yaml new file mode 100644 index 0000000000..f6ad8187f2 --- /dev/null +++ b/configs/upernet/metafile.yaml @@ -0,0 +1,391 @@ +Collections: +- Name: UPerNet + License: Apache License 2.0 + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + README: configs/upernet/README.md + Frameworks: + - PyTorch +Models: +- Name: upernet_r50_4xb2-40k_cityscapes-512x1024 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.1 + mIoU(ms+flip): 78.37 + Config: configs/upernet/upernet_r50_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50 + - UPerNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_40k_cityscapes/upernet_r50_512x1024_40k_cityscapes_20200605_094827-aa54cb54.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_40k_cityscapes/upernet_r50_512x1024_40k_cityscapes_20200605_094827.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch +- Name: upernet_r101_4xb2-40k_cityscapes-512x1024 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.69 + mIoU(ms+flip): 80.11 + Config: configs/upernet/upernet_r101_4xb2-40k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101 + - UPerNet + Training Resources: 4x V100 GPUS + Memory (GB): 7.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_40k_cityscapes/upernet_r101_512x1024_40k_cityscapes_20200605_094933-ebce3b10.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_40k_cityscapes/upernet_r101_512x1024_40k_cityscapes_20200605_094933.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch +- Name: upernet_r50_4xb2-40k_cityscapes-769x769 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.98 + mIoU(ms+flip): 79.7 + Config: configs/upernet/upernet_r50_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50 + - UPerNet + Training Resources: 4x V100 GPUS + Memory (GB): 7.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_40k_cityscapes/upernet_r50_769x769_40k_cityscapes_20200530_033048-92d21539.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_40k_cityscapes/upernet_r50_769x769_40k_cityscapes_20200530_033048.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch +- Name: upernet_r101_4xb2-40k_cityscapes-769x769 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.03 + mIoU(ms+flip): 80.77 + Config: configs/upernet/upernet_r101_4xb2-40k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101 + - UPerNet + Training Resources: 4x V100 GPUS + Memory (GB): 8.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_40k_cityscapes/upernet_r101_769x769_40k_cityscapes_20200530_040819-83c95d01.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_40k_cityscapes/upernet_r101_769x769_40k_cityscapes_20200530_040819.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch +- Name: upernet_r50_4xb2-80k_cityscapes-512x1024 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.19 + mIoU(ms+flip): 79.19 + Config: configs/upernet/upernet_r50_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50 + - UPerNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_80k_cityscapes/upernet_r50_512x1024_80k_cityscapes_20200607_052207-848beca8.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_80k_cityscapes/upernet_r50_512x1024_80k_cityscapes_20200607_052207.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch +- Name: upernet_r101_4xb2-80k_cityscapes-512x1024 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.4 + mIoU(ms+flip): 80.46 + Config: configs/upernet/upernet_r101_4xb2-80k_cityscapes-512x1024.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101 + - UPerNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_80k_cityscapes/upernet_r101_512x1024_80k_cityscapes_20200607_002403-f05f2345.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_80k_cityscapes/upernet_r101_512x1024_80k_cityscapes_20200607_002403.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch +- Name: upernet_r50_4xb2-80k_cityscapes-769x769 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.39 + mIoU(ms+flip): 80.92 + Config: configs/upernet/upernet_r50_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-50 + - UPerNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_80k_cityscapes/upernet_r50_769x769_80k_cityscapes_20200607_005107-82ae7d15.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_80k_cityscapes/upernet_r50_769x769_80k_cityscapes_20200607_005107.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch +- Name: upernet_r101_4xb2-80k_cityscapes-769x769 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.1 + mIoU(ms+flip): 81.49 + Config: configs/upernet/upernet_r101_4xb2-80k_cityscapes-769x769.py + Metadata: + Training Data: Cityscapes + Batch Size: 8 + Architecture: + - R-101 + - UPerNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_80k_cityscapes/upernet_r101_769x769_80k_cityscapes_20200607_001014-082fc334.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_80k_cityscapes/upernet_r101_769x769_80k_cityscapes_20200607_001014.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch +- Name: upernet_r50_4xb4-80k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 40.7 + mIoU(ms+flip): 41.81 + Config: configs/upernet/upernet_r50_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50 + - UPerNet + Training Resources: 4x V100 GPUS + Memory (GB): 8.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_80k_ade20k/upernet_r50_512x512_80k_ade20k_20200614_144127-ecc8377b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_80k_ade20k/upernet_r50_512x512_80k_ade20k_20200614_144127.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch +- Name: upernet_r101_4xb4-80k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.91 + mIoU(ms+flip): 43.96 + Config: configs/upernet/upernet_r101_4xb4-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101 + - UPerNet + Training Resources: 4x V100 GPUS + Memory (GB): 9.1 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_80k_ade20k/upernet_r101_512x512_80k_ade20k_20200614_185117-32e4db94.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_80k_ade20k/upernet_r101_512x512_80k_ade20k_20200614_185117.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch +- Name: upernet_r50_4xb4-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.05 + mIoU(ms+flip): 42.78 + Config: configs/upernet/upernet_r50_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-50 + - UPerNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_160k_ade20k/upernet_r50_512x512_160k_ade20k_20200615_184328-8534de8d.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_160k_ade20k/upernet_r50_512x512_160k_ade20k_20200615_184328.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch +- Name: upernet_r101_4xb4-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.82 + mIoU(ms+flip): 44.85 + Config: configs/upernet/upernet_r101_4xb4-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - R-101 + - UPerNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_160k_ade20k/upernet_r101_512x512_160k_ade20k_20200615_161951-91b32684.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_160k_ade20k/upernet_r101_512x512_160k_ade20k_20200615_161951.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch +- Name: upernet_r50_4xb4-20k_voc12aug-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 74.82 + mIoU(ms+flip): 76.35 + Config: configs/upernet/upernet_r50_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50 + - UPerNet + Training Resources: 4x V100 GPUS + Memory (GB): 6.4 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_20k_voc12aug/upernet_r50_512x512_20k_voc12aug_20200617_165330-5b5890a7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_20k_voc12aug/upernet_r50_512x512_20k_voc12aug_20200617_165330.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch +- Name: upernet_r101_4xb4-20k_voc12aug-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.1 + mIoU(ms+flip): 78.29 + Config: configs/upernet/upernet_r101_4xb4-20k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101 + - UPerNet + Training Resources: 4x V100 GPUS + Memory (GB): 7.5 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_20k_voc12aug/upernet_r101_512x512_20k_voc12aug_20200617_165629-f14e7f27.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_20k_voc12aug/upernet_r101_512x512_20k_voc12aug_20200617_165629.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch +- Name: upernet_r50_4xb4-40k_voc12aug-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 75.92 + mIoU(ms+flip): 77.44 + Config: configs/upernet/upernet_r50_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-50 + - UPerNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_40k_voc12aug/upernet_r50_512x512_40k_voc12aug_20200613_162257-ca9bcc6b.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_40k_voc12aug/upernet_r50_512x512_40k_voc12aug_20200613_162257.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch +- Name: upernet_r101_4xb4-40k_voc12aug-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.43 + mIoU(ms+flip): 78.56 + Config: configs/upernet/upernet_r101_4xb4-40k_voc12aug-512x512.py + Metadata: + Training Data: Pascal VOC 2012 + Aug + Batch Size: 16 + Architecture: + - R-101 + - UPerNet + Training Resources: 4x V100 GPUS + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_40k_voc12aug/upernet_r101_512x512_40k_voc12aug_20200613_163549-e26476ac.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_40k_voc12aug/upernet_r101_512x512_40k_voc12aug_20200613_163549.log.json + Paper: + Title: Unified Perceptual Parsing for Scene Understanding + URL: https://arxiv.org/pdf/1807.10221.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Framework: PyTorch diff --git a/configs/upernet/upernet.yml b/configs/upernet/upernet.yml deleted file mode 100644 index 6892fcf06a..0000000000 --- a/configs/upernet/upernet.yml +++ /dev/null @@ -1,305 +0,0 @@ -Collections: -- Name: UPerNet - Metadata: - Training Data: - - Cityscapes - - ADE20K - - Pascal VOC 2012 + Aug - Paper: - URL: https://arxiv.org/pdf/1807.10221.pdf - Title: Unified Perceptual Parsing for Scene Understanding - README: configs/upernet/README.md - Code: - URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 - Version: v0.17.0 - Converted From: - Code: https://github.com/CSAILVision/unifiedparsing -Models: -- Name: upernet_r50_4xb2-40k_cityscapes-512x1024 - In Collection: UPerNet - Metadata: - backbone: R-50 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 235.29 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 6.4 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 77.1 - mIoU(ms+flip): 78.37 - Config: configs/upernet/upernet_r50_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_40k_cityscapes/upernet_r50_512x1024_40k_cityscapes_20200605_094827-aa54cb54.pth -- Name: upernet_r101_4xb2-40k_cityscapes-512x1024 - In Collection: UPerNet - Metadata: - backbone: R-101 - crop size: (512,1024) - lr schd: 40000 - inference time (ms/im): - - value: 263.85 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,1024) - Training Memory (GB): 7.4 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.69 - mIoU(ms+flip): 80.11 - Config: configs/upernet/upernet_r101_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_40k_cityscapes/upernet_r101_512x1024_40k_cityscapes_20200605_094933-ebce3b10.pth -- Name: upernet_r50_4xb2-40k_cityscapes-769x769 - In Collection: UPerNet - Metadata: - backbone: R-50 - crop size: (769,769) - lr schd: 40000 - inference time (ms/im): - - value: 568.18 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 7.2 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 77.98 - mIoU(ms+flip): 79.7 - Config: configs/upernet/upernet_r50_4xb2-40k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_40k_cityscapes/upernet_r50_769x769_40k_cityscapes_20200530_033048-92d21539.pth -- Name: upernet_r101_4xb2-40k_cityscapes-769x769 - In Collection: UPerNet - Metadata: - backbone: R-101 - crop size: (769,769) - lr schd: 40000 - inference time (ms/im): - - value: 641.03 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (769,769) - Training Memory (GB): 8.4 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.03 - mIoU(ms+flip): 80.77 - Config: configs/upernet/upernet_r101_4xb2-40k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_40k_cityscapes/upernet_r101_769x769_40k_cityscapes_20200530_040819-83c95d01.pth -- Name: upernet_r50_4xb2-80k_cityscapes-512x1024 - In Collection: UPerNet - Metadata: - backbone: R-50 - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 78.19 - mIoU(ms+flip): 79.19 - Config: configs/upernet/upernet_r50_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_80k_cityscapes/upernet_r50_512x1024_80k_cityscapes_20200607_052207-848beca8.pth -- Name: upernet_r101_4xb2-80k_cityscapes-512x1024 - In Collection: UPerNet - Metadata: - backbone: R-101 - crop size: (512,1024) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.4 - mIoU(ms+flip): 80.46 - Config: configs/upernet/upernet_r101_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_80k_cityscapes/upernet_r101_512x1024_80k_cityscapes_20200607_002403-f05f2345.pth -- Name: upernet_r50_4xb2-80k_cityscapes-769x769 - In Collection: UPerNet - Metadata: - backbone: R-50 - crop size: (769,769) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 79.39 - mIoU(ms+flip): 80.92 - Config: configs/upernet/upernet_r50_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_80k_cityscapes/upernet_r50_769x769_80k_cityscapes_20200607_005107-82ae7d15.pth -- Name: upernet_r101_4xb2-80k_cityscapes-769x769 - In Collection: UPerNet - Metadata: - backbone: R-101 - crop size: (769,769) - lr schd: 80000 - Results: - - Task: Semantic Segmentation - Dataset: Cityscapes - Metrics: - mIoU: 80.1 - mIoU(ms+flip): 81.49 - Config: configs/upernet/upernet_r101_4xb2-80k_cityscapes-769x769.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_80k_cityscapes/upernet_r101_769x769_80k_cityscapes_20200607_001014-082fc334.pth -- Name: upernet_r50_4xb4-80k_ade20k-512x512 - In Collection: UPerNet - Metadata: - backbone: R-50 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 42.74 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 8.1 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 40.7 - mIoU(ms+flip): 41.81 - Config: configs/upernet/upernet_r50_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_80k_ade20k/upernet_r50_512x512_80k_ade20k_20200614_144127-ecc8377b.pth -- Name: upernet_r101_4xb4-80k_ade20k-512x512 - In Collection: UPerNet - Metadata: - backbone: R-101 - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 49.16 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 9.1 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 42.91 - mIoU(ms+flip): 43.96 - Config: configs/upernet/upernet_r101_4xb4-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_80k_ade20k/upernet_r101_512x512_80k_ade20k_20200614_185117-32e4db94.pth -- Name: upernet_r50_4xb4-160k_ade20k-512x512 - In Collection: UPerNet - Metadata: - backbone: R-50 - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 42.05 - mIoU(ms+flip): 42.78 - Config: configs/upernet/upernet_r50_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_160k_ade20k/upernet_r50_512x512_160k_ade20k_20200615_184328-8534de8d.pth -- Name: upernet_r101_4xb4-160k_ade20k-512x512 - In Collection: UPerNet - Metadata: - backbone: R-101 - crop size: (512,512) - lr schd: 160000 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 43.82 - mIoU(ms+flip): 44.85 - Config: configs/upernet/upernet_r101_4xb4-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_160k_ade20k/upernet_r101_512x512_160k_ade20k_20200615_161951-91b32684.pth -- Name: upernet_r50_4xb4-20k_voc12aug-512x512 - In Collection: UPerNet - Metadata: - backbone: R-50 - crop size: (512,512) - lr schd: 20000 - inference time (ms/im): - - value: 43.16 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 6.4 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 74.82 - mIoU(ms+flip): 76.35 - Config: configs/upernet/upernet_r50_4xb4-20k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_20k_voc12aug/upernet_r50_512x512_20k_voc12aug_20200617_165330-5b5890a7.pth -- Name: upernet_r101_4xb4-20k_voc12aug-512x512 - In Collection: UPerNet - Metadata: - backbone: R-101 - crop size: (512,512) - lr schd: 20000 - inference time (ms/im): - - value: 50.05 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 7.5 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 77.1 - mIoU(ms+flip): 78.29 - Config: configs/upernet/upernet_r101_4xb4-20k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_20k_voc12aug/upernet_r101_512x512_20k_voc12aug_20200617_165629-f14e7f27.pth -- Name: upernet_r50_4xb4-40k_voc12aug-512x512 - In Collection: UPerNet - Metadata: - backbone: R-50 - crop size: (512,512) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 75.92 - mIoU(ms+flip): 77.44 - Config: configs/upernet/upernet_r50_4xb4-40k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_40k_voc12aug/upernet_r50_512x512_40k_voc12aug_20200613_162257-ca9bcc6b.pth -- Name: upernet_r101_4xb4-40k_voc12aug-512x512 - In Collection: UPerNet - Metadata: - backbone: R-101 - crop size: (512,512) - lr schd: 40000 - Results: - - Task: Semantic Segmentation - Dataset: Pascal VOC 2012 + Aug - Metrics: - mIoU: 77.43 - mIoU(ms+flip): 78.56 - Config: configs/upernet/upernet_r101_4xb4-40k_voc12aug-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_40k_voc12aug/upernet_r101_512x512_40k_voc12aug_20200613_163549-e26476ac.pth diff --git a/configs/vit/README.md b/configs/vit/README.md index b7f242549d..f75326e8e4 100644 --- a/configs/vit/README.md +++ b/configs/vit/README.md @@ -1,6 +1,6 @@ # Vision Transformer -[An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/pdf/2010.11929.pdf) +> [An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/pdf/2010.11929.pdf) ## Introduction @@ -22,17 +22,6 @@ While the Transformer architecture has become the de-facto standard for natural -## Citation - -```bibtex -@article{dosoViTskiy2020, - title={An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale}, - author={DosoViTskiy, Alexey and Beyer, Lucas and Kolesnikov, Alexander and Weissenborn, Dirk and Zhai, Xiaohua and Unterthiner, Thomas and Dehghani, Mostafa and Minderer, Matthias and Heigold, Georg and Gelly, Sylvain and Uszkoreit, Jakob and Houlsby, Neil}, - journal={arXiv preprint arXiv:2010.11929}, - year={2020} -} -``` - ## Usage To use other repositories' pre-trained models, it is necessary to convert keys. @@ -55,16 +44,27 @@ This script convert model from `PRETRAIN_PATH` and store the converted model in ### ADE20K -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------- | ----------------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| UPerNet | ViT-B + MLN | 512x512 | 80000 | 9.20 | 6.94 | 47.71 | 49.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/vit/vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_80k_ade20k/upernet_vit-b16_mln_512x512_80k_ade20k_20210624_130547-0403cee1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_80k_ade20k/20210624_130547.log.json) | -| UPerNet | ViT-B + MLN | 512x512 | 160000 | 9.20 | 7.58 | 46.75 | 48.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/vit/vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_160k_ade20k/upernet_vit-b16_mln_512x512_160k_ade20k_20210624_130547-852fa768.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_160k_ade20k/20210623_192432.log.json) | -| UPerNet | ViT-B + LN + MLN | 512x512 | 160000 | 9.21 | 6.82 | 47.73 | 49.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/vit/vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k/upernet_vit-b16_ln_mln_512x512_160k_ade20k_20210621_172828-f444c077.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k/20210621_172828.log.json) | -| UPerNet | DeiT-S | 512x512 | 80000 | 4.68 | 29.85 | 42.96 | 43.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/vit/vit_deit-s16_upernet_8xb2-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_80k_ade20k/upernet_deit-s16_512x512_80k_ade20k_20210624_095228-afc93ec2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_80k_ade20k/20210624_095228.log.json) | -| UPerNet | DeiT-S | 512x512 | 160000 | 4.68 | 29.19 | 42.87 | 43.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/vit/vit_deit-s16_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_160k_ade20k/upernet_deit-s16_512x512_160k_ade20k_20210621_160903-5110d916.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_160k_ade20k/20210621_160903.log.json) | -| UPerNet | DeiT-S + MLN | 512x512 | 160000 | 5.69 | 11.18 | 43.82 | 45.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/vit/vit_deit-s16_mln_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_mln_512x512_160k_ade20k/upernet_deit-s16_mln_512x512_160k_ade20k_20210621_161021-fb9a5dfb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_mln_512x512_160k_ade20k/20210621_161021.log.json) | -| UPerNet | DeiT-S + LN + MLN | 512x512 | 160000 | 5.69 | 12.39 | 43.52 | 45.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/vit/vit_deit-s16-ln_mln_upernet_512x512_160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k/upernet_deit-s16_ln_mln_512x512_160k_ade20k_20210621_161021-c0cd652f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k/20210621_161021.log.json) | -| UPerNet | DeiT-B | 512x512 | 80000 | 7.75 | 9.69 | 45.24 | 46.73 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/vit/vit_deit-b16_upernet_8xb2-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_80k_ade20k/upernet_deit-b16_512x512_80k_ade20k_20210624_130529-1e090789.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_80k_ade20k/20210624_130529.log.json) | -| UPerNet | DeiT-B | 512x512 | 160000 | 7.75 | 10.39 | 45.36 | 47.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/vit/vit_deit-b16_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_160k_ade20k/upernet_deit-b16_512x512_160k_ade20k_20210621_180100-828705d7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_160k_ade20k/20210621_180100.log.json) | -| UPerNet | DeiT-B + MLN | 512x512 | 160000 | 9.21 | 7.78 | 45.46 | 47.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/vit/vit_deit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_mln_512x512_160k_ade20k/upernet_deit-b16_mln_512x512_160k_ade20k_20210621_191949-4e1450f3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_mln_512x512_160k_ade20k/20210621_191949.log.json) | -| UPerNet | DeiT-B + LN + MLN | 512x512 | 160000 | 9.21 | 7.75 | 45.37 | 47.23 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/vit/vit_deit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k/upernet_deit-b16_ln_mln_512x512_160k_ade20k_20210623_153535-8a959c14.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k/20210623_153535.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | Device | mIoU | mIoU(ms+flip) | config | download | +| ------- | ----------------- | --------- | ------: | -------- | -------------- | ------ | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UPerNet | ViT-B + MLN | 512x512 | 80000 | 9.20 | 6.94 | V100 | 47.71 | 49.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/vit/vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_80k_ade20k/upernet_vit-b16_mln_512x512_80k_ade20k_20210624_130547-0403cee1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_80k_ade20k/20210624_130547.log.json) | +| UPerNet | ViT-B + MLN | 512x512 | 160000 | 9.20 | 7.58 | V100 | 46.75 | 48.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/vit/vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_160k_ade20k/upernet_vit-b16_mln_512x512_160k_ade20k_20210624_130547-852fa768.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_160k_ade20k/20210623_192432.log.json) | +| UPerNet | ViT-B + LN + MLN | 512x512 | 160000 | 9.21 | 6.82 | V100 | 47.73 | 49.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/vit/vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k/upernet_vit-b16_ln_mln_512x512_160k_ade20k_20210621_172828-f444c077.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k/20210621_172828.log.json) | +| UPerNet | DeiT-S | 512x512 | 80000 | 4.68 | 29.85 | V100 | 42.96 | 43.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/vit/vit_deit-s16_upernet_8xb2-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_80k_ade20k/upernet_deit-s16_512x512_80k_ade20k_20210624_095228-afc93ec2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_80k_ade20k/20210624_095228.log.json) | +| UPerNet | DeiT-S | 512x512 | 160000 | 4.68 | 29.19 | V100 | 42.87 | 43.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/vit/vit_deit-s16_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_160k_ade20k/upernet_deit-s16_512x512_160k_ade20k_20210621_160903-5110d916.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_160k_ade20k/20210621_160903.log.json) | +| UPerNet | DeiT-S + MLN | 512x512 | 160000 | 5.69 | 11.18 | V100 | 43.82 | 45.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/vit/vit_deit-s16_mln_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_mln_512x512_160k_ade20k/upernet_deit-s16_mln_512x512_160k_ade20k_20210621_161021-fb9a5dfb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_mln_512x512_160k_ade20k/20210621_161021.log.json) | +| UPerNet | DeiT-S + LN + MLN | 512x512 | 160000 | 5.69 | 12.39 | V100 | 43.52 | 45.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/vit/vit_deit-s16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k/upernet_deit-s16_ln_mln_512x512_160k_ade20k_20210621_161021-c0cd652f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k/20210621_161021.log.json) | +| UPerNet | DeiT-B | 512x512 | 80000 | 7.75 | 9.69 | V100 | 45.24 | 46.73 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/vit/vit_deit-b16_upernet_8xb2-80k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_80k_ade20k/upernet_deit-b16_512x512_80k_ade20k_20210624_130529-1e090789.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_80k_ade20k/20210624_130529.log.json) | +| UPerNet | DeiT-B | 512x512 | 160000 | 7.75 | 10.39 | V100 | 45.36 | 47.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/vit/vit_deit-b16_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_160k_ade20k/upernet_deit-b16_512x512_160k_ade20k_20210621_180100-828705d7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_160k_ade20k/20210621_180100.log.json) | +| UPerNet | DeiT-B + MLN | 512x512 | 160000 | 9.21 | 7.78 | V100 | 45.46 | 47.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/vit/vit_deit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_mln_512x512_160k_ade20k/upernet_deit-b16_mln_512x512_160k_ade20k_20210621_191949-4e1450f3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_mln_512x512_160k_ade20k/20210621_191949.log.json) | +| UPerNet | DeiT-B + LN + MLN | 512x512 | 160000 | 9.21 | 7.75 | V100 | 45.37 | 47.23 | [config](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/vit/vit_deit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k/upernet_deit-b16_ln_mln_512x512_160k_ade20k_20210623_153535-8a959c14.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k/20210623_153535.log.json) | + +## Citation + +```bibtex +@article{dosoViTskiy2020, + title={An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale}, + author={DosoViTskiy, Alexey and Beyer, Lucas and Kolesnikov, Alexander and Weissenborn, Dirk and Zhai, Xiaohua and Unterthiner, Thomas and Dehghani, Mostafa and Minderer, Matthias and Heigold, Georg and Gelly, Sylvain and Uszkoreit, Jakob and Houlsby, Neil}, + journal={arXiv preprint arXiv:2010.11929}, + year={2020} +} +``` diff --git a/configs/vit/metafile.yaml b/configs/vit/metafile.yaml new file mode 100644 index 0000000000..68e254a5f9 --- /dev/null +++ b/configs/vit/metafile.yaml @@ -0,0 +1,265 @@ +Models: +- Name: vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 47.71 + mIoU(ms+flip): 49.51 + Config: configs/vit/vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - ViT-B + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 9.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_80k_ade20k/upernet_vit-b16_mln_512x512_80k_ade20k_20210624_130547-0403cee1.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_80k_ade20k/20210624_130547.log.json + Paper: + Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale' + URL: https://arxiv.org/pdf/2010.11929.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98 + Framework: PyTorch +- Name: vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 46.75 + mIoU(ms+flip): 48.46 + Config: configs/vit/vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - ViT-B + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 9.2 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_160k_ade20k/upernet_vit-b16_mln_512x512_160k_ade20k_20210624_130547-852fa768.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_160k_ade20k/20210623_192432.log.json + Paper: + Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale' + URL: https://arxiv.org/pdf/2010.11929.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98 + Framework: PyTorch +- Name: vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 47.73 + mIoU(ms+flip): 49.95 + Config: configs/vit/vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - ViT-B + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 9.21 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k/upernet_vit-b16_ln_mln_512x512_160k_ade20k_20210621_172828-f444c077.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k/20210621_172828.log.json + Paper: + Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale' + URL: https://arxiv.org/pdf/2010.11929.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98 + Framework: PyTorch +- Name: vit_deit-s16_upernet_8xb2-80k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.96 + mIoU(ms+flip): 43.79 + Config: configs/vit/vit_deit-s16_upernet_8xb2-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - DeiT-S + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 4.68 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_80k_ade20k/upernet_deit-s16_512x512_80k_ade20k_20210624_095228-afc93ec2.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_80k_ade20k/20210624_095228.log.json + Paper: + Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale' + URL: https://arxiv.org/pdf/2010.11929.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98 + Framework: PyTorch +- Name: vit_deit-s16_upernet_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.87 + mIoU(ms+flip): 43.79 + Config: configs/vit/vit_deit-s16_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - DeiT-S + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 4.68 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_160k_ade20k/upernet_deit-s16_512x512_160k_ade20k_20210621_160903-5110d916.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_160k_ade20k/20210621_160903.log.json + Paper: + Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale' + URL: https://arxiv.org/pdf/2010.11929.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98 + Framework: PyTorch +- Name: vit_deit-s16_mln_upernet_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.82 + mIoU(ms+flip): 45.07 + Config: configs/vit/vit_deit-s16_mln_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - DeiT-S + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 5.69 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_mln_512x512_160k_ade20k/upernet_deit-s16_mln_512x512_160k_ade20k_20210621_161021-fb9a5dfb.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_mln_512x512_160k_ade20k/20210621_161021.log.json + Paper: + Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale' + URL: https://arxiv.org/pdf/2010.11929.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98 + Framework: PyTorch +- Name: vit_deit-s16-ln_mln_upernet_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.52 + mIoU(ms+flip): 45.01 + Config: configs/vit/vit_deit-s16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - DeiT-S + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 5.69 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k/upernet_deit-s16_ln_mln_512x512_160k_ade20k_20210621_161021-c0cd652f.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k/20210621_161021.log.json + Paper: + Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale' + URL: https://arxiv.org/pdf/2010.11929.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98 + Framework: PyTorch +- Name: vit_deit-b16_upernet_8xb2-80k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.24 + mIoU(ms+flip): 46.73 + Config: configs/vit/vit_deit-b16_upernet_8xb2-80k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - DeiT-B + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 7.75 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_80k_ade20k/upernet_deit-b16_512x512_80k_ade20k_20210624_130529-1e090789.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_80k_ade20k/20210624_130529.log.json + Paper: + Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale' + URL: https://arxiv.org/pdf/2010.11929.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98 + Framework: PyTorch +- Name: vit_deit-b16_upernet_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.36 + mIoU(ms+flip): 47.16 + Config: configs/vit/vit_deit-b16_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - DeiT-B + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 7.75 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_160k_ade20k/upernet_deit-b16_512x512_160k_ade20k_20210621_180100-828705d7.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_160k_ade20k/20210621_180100.log.json + Paper: + Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale' + URL: https://arxiv.org/pdf/2010.11929.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98 + Framework: PyTorch +- Name: vit_deit-b16_mln_upernet_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.46 + mIoU(ms+flip): 47.16 + Config: configs/vit/vit_deit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - DeiT-B + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 9.21 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_mln_512x512_160k_ade20k/upernet_deit-b16_mln_512x512_160k_ade20k_20210621_191949-4e1450f3.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_mln_512x512_160k_ade20k/20210621_191949.log.json + Paper: + Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale' + URL: https://arxiv.org/pdf/2010.11929.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98 + Framework: PyTorch +- Name: vit_deit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512 + In Collection: UPerNet + Results: + Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.37 + mIoU(ms+flip): 47.23 + Config: configs/vit/vit_deit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py + Metadata: + Training Data: ADE20K + Batch Size: 16 + Architecture: + - DeiT-B + - UPerNet + Training Resources: 8x V100 GPUS + Memory (GB): 9.21 + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k/upernet_deit-b16_ln_mln_512x512_160k_ade20k_20210623_153535-8a959c14.pth + Training log: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k/20210623_153535.log.json + Paper: + Title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale' + URL: https://arxiv.org/pdf/2010.11929.pdf + Code: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/vit.py#L98 + Framework: PyTorch diff --git a/configs/vit/vit.yml b/configs/vit/vit.yml deleted file mode 100644 index 613d866ac4..0000000000 --- a/configs/vit/vit.yml +++ /dev/null @@ -1,243 +0,0 @@ -Models: -- Name: vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512 - In Collection: UPerNet - Metadata: - backbone: ViT-B + MLN - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 144.09 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 9.2 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 47.71 - mIoU(ms+flip): 49.51 - Config: configs/vit/vit_vit-b16_mln_upernet_8xb2-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_80k_ade20k/upernet_vit-b16_mln_512x512_80k_ade20k_20210624_130547-0403cee1.pth -- Name: vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512 - In Collection: UPerNet - Metadata: - backbone: ViT-B + MLN - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 131.93 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 9.2 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 46.75 - mIoU(ms+flip): 48.46 - Config: configs/vit/vit_vit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_160k_ade20k/upernet_vit-b16_mln_512x512_160k_ade20k_20210624_130547-852fa768.pth -- Name: vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512 - In Collection: UPerNet - Metadata: - backbone: ViT-B + LN + MLN - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 146.63 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 9.21 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 47.73 - mIoU(ms+flip): 49.95 - Config: configs/vit/vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k/upernet_vit-b16_ln_mln_512x512_160k_ade20k_20210621_172828-f444c077.pth -- Name: vit_deit-s16_upernet_8xb2-80k_ade20k-512x512 - In Collection: UPerNet - Metadata: - backbone: DeiT-S - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 33.5 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 4.68 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 42.96 - mIoU(ms+flip): 43.79 - Config: configs/vit/vit_deit-s16_upernet_8xb2-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_80k_ade20k/upernet_deit-s16_512x512_80k_ade20k_20210624_095228-afc93ec2.pth -- Name: vit_deit-s16_upernet_8xb2-160k_ade20k-512x512 - In Collection: UPerNet - Metadata: - backbone: DeiT-S - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 34.26 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 4.68 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 42.87 - mIoU(ms+flip): 43.79 - Config: configs/vit/vit_deit-s16_upernet_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_160k_ade20k/upernet_deit-s16_512x512_160k_ade20k_20210621_160903-5110d916.pth -- Name: vit_deit-s16_mln_upernet_8xb2-160k_ade20k-512x512 - In Collection: UPerNet - Metadata: - backbone: DeiT-S + MLN - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 89.45 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 5.69 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 43.82 - mIoU(ms+flip): 45.07 - Config: configs/vit/vit_deit-s16_mln_upernet_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_mln_512x512_160k_ade20k/upernet_deit-s16_mln_512x512_160k_ade20k_20210621_161021-fb9a5dfb.pth -- Name: vit_deit-s16-ln_mln_upernet_512x512_160k_ade20k-512x512 - In Collection: UPerNet - Metadata: - backbone: DeiT-S + LN + MLN - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 80.71 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 5.69 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 43.52 - mIoU(ms+flip): 45.01 - Config: configs/vit/vit_deit-s16-ln_mln_upernet_512x512_160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k/upernet_deit-s16_ln_mln_512x512_160k_ade20k_20210621_161021-c0cd652f.pth -- Name: vit_deit-b16_upernet_8xb2-80k_ade20k-512x512 - In Collection: UPerNet - Metadata: - backbone: DeiT-B - crop size: (512,512) - lr schd: 80000 - inference time (ms/im): - - value: 103.2 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 7.75 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 45.24 - mIoU(ms+flip): 46.73 - Config: configs/vit/vit_deit-b16_upernet_8xb2-80k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_80k_ade20k/upernet_deit-b16_512x512_80k_ade20k_20210624_130529-1e090789.pth -- Name: vit_deit-b16_upernet_8xb2-160k_ade20k-512x512 - In Collection: UPerNet - Metadata: - backbone: DeiT-B - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 96.25 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 7.75 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 45.36 - mIoU(ms+flip): 47.16 - Config: configs/vit/vit_deit-b16_upernet_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_160k_ade20k/upernet_deit-b16_512x512_160k_ade20k_20210621_180100-828705d7.pth -- Name: vit_deit-b16_mln_upernet_8xb2-160k_ade20k-512x512 - In Collection: UPerNet - Metadata: - backbone: DeiT-B + MLN - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 128.53 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 9.21 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 45.46 - mIoU(ms+flip): 47.16 - Config: configs/vit/vit_deit-b16_mln_upernet_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_mln_512x512_160k_ade20k/upernet_deit-b16_mln_512x512_160k_ade20k_20210621_191949-4e1450f3.pth -- Name: vit_deit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512 - In Collection: UPerNet - Metadata: - backbone: DeiT-B + LN + MLN - crop size: (512,512) - lr schd: 160000 - inference time (ms/im): - - value: 129.03 - hardware: V100 - backend: PyTorch - batch size: 1 - mode: FP32 - resolution: (512,512) - Training Memory (GB): 9.21 - Results: - - Task: Semantic Segmentation - Dataset: ADE20K - Metrics: - mIoU: 45.37 - mIoU(ms+flip): 47.23 - Config: configs/vit/vit_deit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k/upernet_deit-b16_ln_mln_512x512_160k_ade20k_20210623_153535-8a959c14.pth diff --git a/configs/vit/vit_deit-s16-ln_mln_upernet_512x512_160k_ade20k-512x512.py b/configs/vit/vit_deit-s16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py similarity index 100% rename from configs/vit/vit_deit-s16-ln_mln_upernet_512x512_160k_ade20k-512x512.py rename to configs/vit/vit_deit-s16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py diff --git a/demo/MMSegmentation_Tutorial.ipynb b/demo/MMSegmentation_Tutorial.ipynb index 1d92342ae6..ac8601b321 100644 --- a/demo/MMSegmentation_Tutorial.ipynb +++ b/demo/MMSegmentation_Tutorial.ipynb @@ -7,7 +7,7 @@ "id": "view-in-github" }, "source": [ - "\"Open" + "\"Open" ] }, { @@ -89,7 +89,7 @@ "outputs": [], "source": [ "!rm -rf mmsegmentation\n", - "!git clone -b dev-1.x https://github.com/open-mmlab/mmsegmentation.git \n", + "!git clone -b main https://github.com/open-mmlab/mmsegmentation.git \n", "%cd mmsegmentation\n", "!pip install -e ." ] diff --git a/docker/Dockerfile b/docker/Dockerfile index 73a0fac121..982b09bdc6 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -28,7 +28,7 @@ RUN ["/bin/bash", "-c", "mim install mmengine"] RUN ["/bin/bash", "-c", "mim install mmcv==${MMCV}"] # Install MMSegmentation -RUN git clone -b dev-1.x https://github.com/open-mmlab/mmsegmentation.git /mmsegmentation +RUN git clone -b main https://github.com/open-mmlab/mmsegmentation.git /mmsegmentation WORKDIR /mmsegmentation ENV FORCE_CUDA="1" RUN pip install -r requirements.txt diff --git a/docs/en/advanced_guides/add_datasets.md b/docs/en/advanced_guides/add_datasets.md index f33f3d32c6..fbfa36940c 100644 --- a/docs/en/advanced_guides/add_datasets.md +++ b/docs/en/advanced_guides/add_datasets.md @@ -1,4 +1,62 @@ -# \[WIP\] Add New Datasets +# Add New Datasets + +## Add new custom dataset + +Here we show how to develop a new custom dataset. + +1. Create a new file `mmseg/datasets/example.py` + + ```python + from mmseg.registry import DATASETS + from .basesegdataset import BaseSegDataset + + + @DATASETS.register_module() + class ExampleDataset(BaseSegDataset): + + METAINFO = dict( + classes=('xxx', 'xxx', ...), + palette=[[x, x, x], [x, x, x], ...]) + + def __init__(self, aeg1, arg2): + pass + ``` + +2. Import the module in `mmseg/datasets/__init__.py` + + ```python + from .example import ExampleDataset + ``` + +3. Use it by creating a new new dataset config file `configs/_base_/datasets/example_dataset.py` + + ```python + dataset_type = 'ExampleDataset' + data_root = 'data/example/' + ... + ``` + +4. Add dataset meta information in `mmseg/utils/class_names.py` + + ```python + def example_classes(): + return [ + 'xxx', 'xxx', + ... + ] + + def example_palette(): + return [ + [x, x, x], [x, x, x], + ... + ] + dataset_aliases ={ + 'example': ['example', ...], + ... + } + ``` + +**Note:** If the new dataset does not satisfy the mmseg requirements, a data preprocessing script needs to be prepared in `tools/dataset_converters/` ## Customize datasets by reorganizing data @@ -26,26 +84,12 @@ An example of file structure is as followed. A training pair will consist of the files with same suffix in img_dir/ann_dir. -If `split` argument is given, only part of the files in img_dir/ann_dir will be loaded. -We may specify the prefix of files we would like to be included in the split txt. +Some datasets don't release the test set or don't release the ground truth of the test set, and we cannot evaluate models locally without the ground truth of the test set, so we set the validation set as the default test set in config files. -More specifically, for a split txt like following, +About how to build your own datasets or implement a new dataset class please refer to the [datasets guide](./datasets.md) for more detailed information. -```none -xxx -zzz -``` - -Only -`data/my_dataset/img_dir/train/xxx{img_suffix}`, -`data/my_dataset/img_dir/train/zzz{img_suffix}`, -`data/my_dataset/ann_dir/train/xxx{seg_map_suffix}`, -`data/my_dataset/ann_dir/train/zzz{seg_map_suffix}` will be loaded. - -:::{note} -The annotations are images of shape (H, W), the value pixel should fall in range `[0, num_classes - 1]`. +**Note:** The annotations are images of shape (H, W), the value pixel should fall in range `[0, num_classes - 1]`. You may use `'P'` mode of [pillow](https://pillow.readthedocs.io/en/stable/handbook/concepts.html#palette) to create your annotation image with color. -::: ## Customize datasets by mixing dataset @@ -59,14 +103,14 @@ For example, suppose the original dataset is `Dataset_A`, to repeat it, the conf ```python dataset_A_train = dict( - type='RepeatDataset', - times=N, - dataset=dict( # This is the original config of Dataset_A - type='Dataset_A', - ... - pipeline=train_pipeline - ) + type='RepeatDataset', + times=N, + dataset=dict( # This is the original config of Dataset_A + type='Dataset_A', + ... + pipeline=train_pipeline ) +) ``` ### Concatenate dataset @@ -111,7 +155,9 @@ dataset_B_train = dict( ) ) train_dataloader = dict( - dataset=dict('ConcatDataset', datasets=[dataset_A_train, dataset_B_train])) + dataset=dict( + type='ConcatDataset', + datasets=[dataset_A_train, dataset_B_train])) val_dataloader = dict(dataset=dataset_A_val) test_dataloader = dict(dataset=dataset_A_test) @@ -123,8 +169,7 @@ You can refer base dataset [tutorial](https://mmengine.readthedocs.io/en/latest/ ### Multi-image Mix Dataset We use `MultiImageMixDataset` as a wrapper to mix images from multiple datasets. -`MultiImageMixDataset` can be used by multiple images mixed data augmentation -like mosaic and mixup. +`MultiImageMixDataset` can be used by multiple images mixed data augmentation like mosaic and mixup. An example of using `MultiImageMixDataset` with `Mosaic` data augmentation: @@ -139,8 +184,6 @@ train_pipeline = [ train_dataset = dict( type='MultiImageMixDataset', dataset=dict( - classes=classes, - palette=palette, type=dataset_type, reduce_zero_label=False, img_dir=data_root + "images/train", diff --git a/docs/en/advanced_guides/add_metrics.md b/docs/en/advanced_guides/add_metrics.md index 0a25a81fc4..0298826f05 100644 --- a/docs/en/advanced_guides/add_metrics.md +++ b/docs/en/advanced_guides/add_metrics.md @@ -1 +1,81 @@ # Add New Metrics + +## Develop with the source code of MMSegmentation + +Here we show how to develop a new metric with an example of `CustomMetric` as the following. + +1. Create a new file `mmseg/evaluation/metrics/custom_metric.py`. + + ```python + from typing import List, Sequence + + from mmengine.evaluator import BaseMetric + + from mmseg.registry import METRICS + + + @METRICS.register_module() + class CustomMetric(BaseMetric): + + def __init__(self, arg1, arg2): + """ + The metric first processes each batch of data_samples and predictions, + and appends the processed results to the results list. Then it + collects all results together from all ranks if distributed training + is used. Finally, it computes the metrics of the entire dataset. + """ + + def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None: + pass + + def compute_metrics(self, results: list) -> dict: + pass + + def evaluate(self, size: int) -> dict: + pass + ``` + + In the above example, `CustomMetric` is a subclass of `BaseMetric`. It has three methods: `process`, `compute_metrics` and `evaluate`. + + - `process()` process one batch of data samples and predictions. The processed results are stored in `self.results` which will be used to compute the metrics after all the data samples are processed. Please refer to [MMEngine documentation](https://github.com/open-mmlab/mmengine/blob/main/docs/en/design/evaluation.md) for more details. + + - `compute_metrics()` is used to compute the metrics from the processed results. + + - `evaluate()` is an interface to compute the metrics and return the results. It will be called by `ValLoop` or `TestLoop` in the `Runner`. In most cases, you don't need to override this method, but you can override it if you want to do some extra work. + + **Note:** You might find the details of calling `evaluate()` method in the `Runner` [here](https://github.com/open-mmlab/mmengine/blob/main/mmengine/runner/loops.py#L366). The `Runner` is the executor of the training and testing process, you can find more details about it at the [engine document](./engine.md). + +2. Import the new metric in `mmseg/evaluation/metrics/__init__.py`. + + ```python + from .custom_metric import CustomMetric + __all__ = ['CustomMetric', ...] + ``` + +3. Add the new metric to the config file. + + ```python + val_evaluator = dict(type='CustomMetric', arg1=xxx, arg2=xxx) + test_evaluator = dict(type='CustomMetric', arg1=xxx, arg2=xxx) + ``` + +## Develop with the released version of MMSegmentation + +The above example shows how to develop a new metric with the source code of MMSegmentation. If you want to develop a new metric with the released version of MMSegmentation, you can follow the following steps. + +1. Create a new file `/Path/to/metrics/custom_metric.py`, implement the `process`, `compute_metrics` and `evaluate` methods, `evaluate` method is optional. + +2. Import the new metric in your code or config file. + + ```python + from path.to.metrics import CustomMetric + ``` + + or + + ```python + custom_imports = dict(imports=['/Path/to/metrics'], allow_failed_imports=False) + + val_evaluator = dict(type='CustomMetric', arg1=xxx, arg2=xxx) + test_evaluator = dict(type='CustomMetric', arg1=xxx, arg2=xxx) + ``` diff --git a/docs/en/advanced_guides/add_models.md b/docs/en/advanced_guides/add_models.md index 1f1969db39..ed5c9ce611 100644 --- a/docs/en/advanced_guides/add_models.md +++ b/docs/en/advanced_guides/add_models.md @@ -49,7 +49,7 @@ Here we show how to develop a new backbone with an example of MobileNet. ### Add new heads -In MMSegmentation, we provide a [BaseDecodeHead](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/models/decode_heads/decode_head.py#L17) for developing all segmentation heads. +In MMSegmentation, we provide a [BaseDecodeHead](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/models/decode_heads/decode_head.py#L17) for developing all segmentation heads. All newly implemented decode heads should be derived from it. Here we show how to develop a new head with the example of [PSPNet](https://arxiv.org/abs/1612.01105) as the following. @@ -166,7 +166,7 @@ loss_decode=dict(type='MyLoss', loss_weight=1.0)) ### Add new data preprocessor -In MMSegmentation 1.x versions, we use [SegDataPreProcessor](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/models/data_preprocessor.py#L13) to copy data to the target device and preprocess the data into the model input format as default. Here we show how to develop a new data preprocessor. +In MMSegmentation 1.x versions, we use [SegDataPreProcessor](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/models/data_preprocessor.py#L13) to copy data to the target device and preprocess the data into the model input format as default. Here we show how to develop a new data preprocessor. 1. Create a new file `mmseg/models/my_datapreprocessor.py`. @@ -202,9 +202,9 @@ In MMSegmentation 1.x versions, we use [SegDataPreProcessor](https://github.com/ ## Develop new segmentors -The segmentor is an algorithmic architecture in which users can customize their algorithms by adding customized components and defining the logic of algorithm execution. Please refer to [the model document](https://github.com/open-mmlab/mmsegmentation/blob/1.x/docs/en/advanced_guides/models.md) for more details. +The segmentor is an algorithmic architecture in which users can customize their algorithms by adding customized components and defining the logic of algorithm execution. Please refer to [the model document](./models.md) for more details. -Since the [BaseSegmentor](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/models/segmentors/base.py#L15) in MMSegmentation unifies three modes for a forward process, to develop a new segmentor, users need to overwrite `loss`, `predict` and `_forward` methods corresponding to the `loss`, `predict` and `tensor` modes. +Since the [BaseSegmentor](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/models/segmentors/base.py#L15) in MMSegmentation unifies three modes for a forward process, to develop a new segmentor, users need to overwrite `loss`, `predict` and `_forward` methods corresponding to the `loss`, `predict` and `tensor` modes. Here we show how to develop a new segmentor. diff --git a/docs/en/advanced_guides/data_flow.md b/docs/en/advanced_guides/data_flow.md index 20d23084ab..404035aee4 100644 --- a/docs/en/advanced_guides/data_flow.md +++ b/docs/en/advanced_guides/data_flow.md @@ -16,7 +16,7 @@ val_cfg = dict(type='ValLoop') test_cfg = dict(type='TestLoop') ``` -In the above diagram, the red line indicates the [train_step](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/advanced_guides/models.md#train_step). At each training iteration, dataloader loads images from storage and transfer to data preprocessor, data preprocessor would put images to the specific device and stack data to batch, then model accepts the batch data as inputs, finally the outputs of the model would be sent to optimizer. The blue line indicates [val_step](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/advanced_guides/models.md#val_step) and [test_step](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/advanced_guides/models.md#test_step). The dataflow of these two process is similar to the `train_step` except the outputs of model, since model parameters are freezed when doing evaluation, the model output would be transferred to [Evaluator](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/advanced_guides/evaluation.md#ioumetric) to compute metrics. +In the above diagram, the red line indicates the [train_step](./models.md#train_step). At each training iteration, dataloader loads images from storage and transfer to data preprocessor, data preprocessor would put images to the specific device and stack data to batch, then model accepts the batch data as inputs, finally the outputs of the model would be sent to optimizer. The blue line indicates [val_step](./models.md#val_step) and [test_step](./models.md#test_step). The dataflow of these two process is similar to the `train_step` except the outputs of model, since model parameters are freezed when doing evaluation, the model output would be transferred to [Evaluator](./evaluation.md#ioumetric) to compute metrics. ## Dataflow convention in MMSegmentation @@ -26,7 +26,7 @@ From the diagram above, we could see the basic dataflow. In this section, we wou DataLoader is an essential component in training and testing pipelines of MMEngine. Conceptually, it is derived from and consistent with [PyTorch](https://pytorch.org/). DataLoader loads data from filesystem and the original data passes through data preparation pipeline, then it would be sent to Data Preprocessor. -MMSegmentation defines the default data format at [PackSegInputs](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/datasets/transforms/formatting.py#L12), it's the last component of `train_pipeline` and `test_pipeline`. Please refer to [data transform documentation](https://mmsegmentation.readthedocs.io/en/dev-1.x/advanced_guides/transforms.html) for more information about data transform `pipeline`. +MMSegmentation defines the default data format at [PackSegInputs](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/datasets/transforms/formatting.py#L12), it's the last component of `train_pipeline` and `test_pipeline`. Please refer to [data transform documentation](./transforms.md) for more information about data transform `pipeline`. Without any modifications, the return value of PackSegInputs is usually a `dict` and has only two keys, `inputs` and `data_samples`. The following pseudo-code shows the data types of the data loader output in mmseg, which is a batch of fetched data samples from the dataset, and data loader packs them into a dictionary of the list. `inputs` is the list of input tensors to the model and `data_samples` contains a list of input images' meta information and corresponding ground truth. @@ -37,11 +37,11 @@ dict( ) ``` -**Note:** [SegDataSample](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/structures/seg_data_sample.py) is a data structure interface of MMSegmentation, it is used as an interface between different components. `SegDataSample` implements the abstract data element `mmengine.structures.BaseDataElement`, please refer to [the SegDataSample documentation](https://mmsegmentation.readthedocs.io/en/1.x/advanced_guides/structures.html) and [data element documentation](https://mmengine.readthedocs.io/en/latest/advanced_tutorials/data_element.html) in [MMEngine](https://github.com/open-mmlab/mmengine) for more information. +**Note:** [SegDataSample](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/structures/seg_data_sample.py) is a data structure interface of MMSegmentation, it is used as an interface between different components. `SegDataSample` implements the abstract data element `mmengine.structures.BaseDataElement`, please refer to [the SegDataSample documentation](./structures.md) and [data element documentation](https://mmengine.readthedocs.io/en/latest/advanced_tutorials/data_element.html) in [MMEngine](https://github.com/open-mmlab/mmengine) for more information. ### Data Preprocessor to Model -Though drawn separately in the diagram [above](#overview-of-dataflow), data_preprocessor is a part of the model and thus can be found in [Model tutorial](https://mmsegmentation.readthedocs.io/en/dev-1.x/advanced_guides/models.html) at data preprocessor chapter. +Though drawn separately in the diagram [above](#overview-of-dataflow), data_preprocessor is a part of the model and thus can be found in [Model tutorial](./models.md) at data preprocessor chapter. The return value of data preprocessor is a dictionary, containing `inputs` and `data_samples`, `inputs` is batched images, a 4D tensor, and some additional meta info used in data preprocesses would be added to the `data_samples`. When transferred to the network, the dictionary would be unpacked to two values. The following pseudo-codes show the return value of the data preprocessor and the input values of model. @@ -59,21 +59,21 @@ class Network(BaseSegmentor): pass ``` -**Note:** Model forward has 3 kinds of mode, which is controlled by input argumentmode, please refer [model tutorial](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/advanced_guides/models.md) for more details. +**Note:** Model forward has 3 kinds of mode, which is controlled by input argumentmode, please refer [model tutorial](./models.md) for more details. ### Model output -As [model tutorial](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/advanced_guides/models.md#forward) mentioned 3 kinds of mode forward with 3 kinds of output. `train_step`and `test_step`(or `val_step`) correspond to `'loss'` and `'predict'` respectively. +As [model tutorial](./models.md#forward) mentioned 3 kinds of mode forward with 3 kinds of output. `train_step`and `test_step`(or `val_step`) correspond to `'loss'` and `'predict'` respectively. -In `test_step` or `val_step`, the inference results would be transferred to `Evaluator`. You might read the [evaluation document](https://mmsegmentation.readthedocs.io/en/dev-1.x/advanced_guides/evaluation.html) for more information about `Evaluator`. +In `test_step` or `val_step`, the inference results would be transferred to `Evaluator`. You might read the [evaluation document](./evaluation.md) for more information about `Evaluator`. -After inference, the [BaseSegmentor](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/models/segmentors/base.py#L15) in MMSegmentation would do a simple post process to pack inference results, the segmentation logits produced by the neural network, segmentation mask after the `argmax` operation and ground truth(if exists) would be packed into a similar `SegDataSample` instance. The return value of [postprocess_result](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/models/segmentors/base.py#L132) is a **`List` of `SegDataSample`**. Following diagram shows the key properties of these `SegDataSample` instances. +After inference, the [BaseSegmentor](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/models/segmentors/base.py#L15) in MMSegmentation would do a simple post process to pack inference results, the segmentation logits produced by the neural network, segmentation mask after the `argmax` operation and ground truth(if exists) would be packed into a similar `SegDataSample` instance. The return value of [postprocess_result](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/models/segmentors/base.py#L132) is a **`List` of `SegDataSample`**. Following diagram shows the key properties of these `SegDataSample` instances. ![SegDataSample](https://user-images.githubusercontent.com/15952744/209912225-ab46a8d9-904a-43cb-8bf1-8bec4938ed29.png) -The same as Data Preprocessor, loss function is also a part of the model, it's a property of [decode head](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/models/decode_heads/decode_head.py#L142). +The same as Data Preprocessor, loss function is also a part of the model, it's a property of [decode head](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/models/decode_heads/decode_head.py#L142). -In MMSegmentation, the method [loss_by_feat](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/models/decode_heads/decode_head.py#L291) of `decode_head` is an unified interface used to compute loss. +In MMSegmentation, the method [loss_by_feat](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/models/decode_heads/decode_head.py#L291) of `decode_head` is an unified interface used to compute loss. Parameters: @@ -84,4 +84,4 @@ Returns: - dict\[str, Tensor\]: a dictionary of loss components -**Note:** The `train_step` transfers the loss into OptimWrapper to update the weights in model, please refer [train_step](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/advanced_guides/models.md#train_step) for more details. +**Note:** The `train_step` transfers the loss into OptimWrapper to update the weights in model, please refer [train_step](./models.md#train_step) for more details. diff --git a/docs/en/advanced_guides/datasets.md b/docs/en/advanced_guides/datasets.md index a1b8044b3d..c655a3ff55 100644 --- a/docs/en/advanced_guides/datasets.md +++ b/docs/en/advanced_guides/datasets.md @@ -1,14 +1,14 @@ # Dataset -Dataset classes in MMSegmentation have two functions: (1) load data information after [data preparation](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/user_guides/2_dataset_prepare.md) -and (2) send data into [dataset transform pipeline](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/datasets/basesegdataset.py#L141) to do [data augmentation](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/zh_cn/advanced_guides/transforms.md). +Dataset classes in MMSegmentation have two functions: (1) load data information after [data preparation](../user_guides/2_dataset_prepare.md) +and (2) send data into [dataset transform pipeline](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/datasets/basesegdataset.py#L141) to do [data augmentation](./transforms.md). There are 2 kinds of loaded information: (1) meta information which is original dataset information such as categories (classes) of dataset and their corresponding palette information, (2) data information which includes the path of dataset images and labels. The tutorial includes some main interfaces in MMSegmentation 1.x dataset class: methods of loading data information and modifying dataset classes in base dataset class, and the relationship between dataset and the data transform pipeline. ## Main Interfaces -Take Cityscapes as an example, if you want to run the example, please download and [preprocess](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/user_guides/2_dataset_prepare.md#cityscapes) +Take Cityscapes as an example, if you want to run the example, please download and [preprocess](../user_guides/2_dataset_prepare.md#cityscapes) Cityscapes dataset in `data` directory, before running the demo code: Instantiate Cityscapes training dataset: @@ -108,7 +108,7 @@ print(dataset.metainfo) ``` The return value of dataset `__getitem__` method is the output of data samples after data augmentation, whose type is also `dict`. It has two fields: `'inputs'` corresponding to images after data augmentation, -and `'data_samples'` corresponding to `SegDataSample`\](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/zh_cn/advanced_guides/structures.md) which is new data structures in MMSegmentation 1.x, +and `'data_samples'` corresponding to [`SegDataSample`](./structures.md) which is new data structures in MMSegmentation 1.x, and `gt_sem_seg` of `SegDataSample` has labels after data augmentation operations. ```python @@ -179,19 +179,19 @@ print(dataset[0]) ## BaseSegDataset -As mentioned above, dataset classes have the same functions, we implemented [`BaseSegDataset`](https://mmsegmentation.readthedocs.io/en/dev-1.x/api.html?highlight=BaseSegDataset#mmseg.datasets.BaseSegDataset) to reues the common functions. +As mentioned above, dataset classes have the same functions, we implemented [`BaseSegDataset`](https://mmsegmentation.readthedocs.io/en/main/api.html?highlight=BaseSegDataset#mmseg.datasets.BaseSegDataset) to reues the common functions. It inherits [`BaseDataset` of MMEngine](https://github.com/open-mmlab/mmengine/blob/main/docs/en/advanced_tutorials/basedataset.md) and follows unified initialization process of OpenMMLab. It supports the highly effective interior storing format, some functions like dataset concatenation and repeatedly sampling. In MMSegmentation `BaseSegDataset`, the **method of loading data information** (`load_data_list`) is redefined and adds new `get_label_map` method to **modify dataset classes information**. ### Loading Dataset Information The loaded data information includes the path of images samples and annotations samples, the detailed implementation could be found in -[`load_data_list`](https://github.com/open-mmlab/mmsegmentation/blob/163277bfe0fa8fefb63ee5137917fafada1b301c/mmseg/datasets/basesegdataset.py#L231) of `BaseSegDataset` in MMSegmentation. +[`load_data_list`](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/datasets/basesegdataset.py#L231) of `BaseSegDataset` in MMSegmentation. There are two main methods to acquire the path of images and labels: 1. Load file paths according to the dirictory and suffix of input images and annotations -If the dataset directory structure is organized as below, the [`load_data_list`](https://github.com/open-mmlab/mmsegmentation/blob/163277bfe0fa8fefb63ee5137917fafada1b301c/mmseg/datasets/basesegdataset.py#L231) can parse dataset directory Structure: +If the dataset directory structure is organized as below, the [`load_data_list`](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/datasets/basesegdataset.py#L231) can parse dataset directory Structure: ``` ├── data @@ -344,7 +344,7 @@ print(dataset.metainfo) ``` Meta information is different from default setting of Cityscapes dataset. Moreover, `label_map` field is also defined, which is used for modifying label index of each pixel on segmentation mask. -The segmentation label would re-map class information by `label_map`, [here](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/datasets/basesegdataset.py#L151) is detailed implementation: +The segmentation label would re-map class information by `label_map`, [here](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/datasets/basesegdataset.py#L151) is detailed implementation: ```python gt_semantic_seg_copy = gt_semantic_seg.copy() diff --git a/docs/en/advanced_guides/engine.md b/docs/en/advanced_guides/engine.md index eaa55b0c8c..7acfe5ad64 100644 --- a/docs/en/advanced_guides/engine.md +++ b/docs/en/advanced_guides/engine.md @@ -1 +1,279 @@ -# Engine +# Training Engine + +MMEngine defined some [basic loop controllers](https://github.com/open-mmlab/mmengine/blob/main/mmengine/runner/loops.py) such as epoch-based training loop (`EpochBasedTrainLoop`), iteration-based training loop (`IterBasedTrainLoop`), standard validation loop (`ValLoop`), and standard testing loop (`TestLoop`). + +OpenMMLab's algorithm libraries like MMSegmentation abstract model training, testing, and inference as `Runner` to handle. Users can use the default `Runner` in MMEngine directly or modify the `Runner` to meet customized needs. This document mainly introduces how users can configure existing running settings, hooks, and optimizers' basic concepts and usage methods. + +## Configuring Runtime Settings + +### Configuring Training Iterations + +Loop controllers refer to the execution process during training, validation, and testing. `train_cfg`, `val_cfg`, and `test_cfg` are used to build these processes in the configuration file. MMSegmentation sets commonly used training iterations in `train_cfg` under the `configs/_base_/schedules` folder. +For example, to train for 80,000 iterations using the iteration-based training loop (`IterBasedTrainLoop`) and perform validation every 8,000 iterations, you can set it as follows: + +```python +train_cfg = dict(type='IterBasedTrainLoop', max_iters=80000, val_interval=8000) +``` + +### Configuring Training Optimizers + +Here's an example of a SGD optimizer: + +```python +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005), + clip_grad=None) +``` + +OpenMMLab supports all optimizers in PyTorch. For more details, please refer to the [MMEngine optimizer documentation](https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/optim_wrapper.md). + +It is worth emphasizing that `optim_wrapper` is a variable of `runner`, so when configuring the optimizer, the field to configure is the `optim_wrapper` field. For more information on using optimizers, see the [Optimizer](#Optimizer) section below. + +### Configuring Training Parameter Schedulers + +Before configuring the training parameter scheduler, it is recommended to first understand the basic concepts of parameter schedulers in the [MMEngine documentation](https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md). + +Here's an example of a parameter scheduler. During training, a linearly changing learning rate strategy is used for warm-up in the first 1,000 iterations. After the first 1,000 iterations until the 16,000 iterations in the end, the default polynomial learning rate decay is used: + +```python +param_scheduler = [ + dict(type='LinearLR', by_epoch=False, start_factor=0.1, begin=0, end=1000), + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=1000, + end=160000, + by_epoch=False, + ) +] +``` + +Note: When modifying the `max_iters` in `train_cfg`, make sure the parameters in the parameter scheduler `param_scheduler` are also modified accordingly. + +## Hook + +### Introduction + +OpenMMLab abstracts the model training and testing process as `Runner`. Inserting hooks can implement the corresponding functionality needed at different training and testing stages (such as "before and after each training iter", "before and after each validation iter", etc.) in `Runner`. For more introduction on hook mechanisms, please refer to [here](https://www.calltutors.com/blog/what-is-hook). + +Hooks used in `Runner` are divided into two categories: + +- Default hooks: + +They implement essential functions during training and are defined in the configuration file by `default_hooks` and passed to `Runner`. `Runner` registers them through the [`register_default_hooks`](https://github.com/open-mmlab/mmengine/blob/main/mmengine/runner/runner.py#L1780) method. + +Hooks have corresponding priorities; the higher the priority, the earlier the runner calls them. If the priorities are the same, the calling order is consistent with the hook registration order. + +It is not recommended for users to modify the default hook priorities. Please refer to the [MMEngine hooks documentation](https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/hook.md) to understand the hook priority definitions. + +The following are the default hooks used in MMSegmentation: + +| Hook | Function | Priority | +| :--------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------: | :---------------: | +| [IterTimerHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/iter_timer_hook.py) | Record the time spent on each iteration. | NORMAL (50) | +| [LoggerHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/logger_hook.py) | Collect log records from different components in `Runner` and output them to terminal, JSON file, tensorboard, wandb, etc. | BELOW_NORMAL (60) | +| [ParamSchedulerHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/param_scheduler_hook.py) | Update some hyperparameters in the optimizer, such as learning rate momentum. | LOW (70) | +| [CheckpointHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/checkpoint_hook.py) | Regularly save checkpoint files. | VERY_LOW (90) | +| [DistSamplerSeedHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/sampler_seed_hook.py) | Ensure the distributed sampler shuffle is enabled. | NORMAL (50) | +| [SegVisualizationHook](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/visualization/local_visualizer.py) | Visualize prediction results during validation and testing. | NORMAL (50) | + +MMSegmentation registers some hooks with essential training functions in `default_hooks`: + +```python +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=32000), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) +``` + +All the default hooks mentioned above, except for `SegVisualizationHook`, are implemented in MMEngine. The `SegVisualizationHook` is a hook implemented in MMSegmentation, which will be introduced later. + +- Modifying default hooks + +We will use the `logger` and `checkpoint` in `default_hooks` as examples to demonstrate how to modify the default hooks in `default_hooks`. + +(1) Model saving configuration + +`default_hooks` uses the `checkpoint` field to initialize the [model saving hook (CheckpointHook)](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/checkpoint_hook.py#L19). + +```python +checkpoint = dict(type='CheckpointHook', interval=1) +``` + +Users can set `max_keep_ckpts` to save only a small number of checkpoints or use `save_optimizer` to determine whether to save optimizer information. More details on related parameters can be found [here](https://mmengine.readthedocs.io/en/latest/api/generated/mmengine.hooks.CheckpointHook.html#checkpointhook). + +(2) Logging configuration + +The `LoggerHook` is used to collect log information from different components in `Runner` and write it to terminal, JSON files, tensorboard, wandb, etc. + +```python +logger=dict(type='LoggerHook', interval=10) +``` + +In the latest 1.x version of MMSegmentation, some logger hooks (LoggerHook) such as `TextLoggerHook`, `WandbLoggerHook`, and `TensorboardLoggerHook` will no longer be used. Instead, MMEngine uses `LogProcessor` to handle the information processed by the aforementioned hooks, which are now in [`MessageHub`](https://github.com/open-mmlab/mmengine/blob/main/mmengine/logging/message_hub.py#L17), [`WandbVisBackend`](https://github.com/open-mmlab/mmengine/blob/main/mmengine/visualization/vis_backend.py#L324), and [`TensorboardVisBackend`](https://github.com/open-mmlab/mmengine/blob/main/mmengine/visualization/vis_backend.py#L472). + +Detailed usage is as follows, configuring the visualizer and specifying the visualization backend at the same time, here using Tensorboard as the visualizer's backend: + +```python +# TensorboardVisBackend +visualizer = dict( + type='SegLocalVisualizer', vis_backends=[dict(type='TensorboardVisBackend')], name='visualizer') +``` + +For more related usage, please refer to [MMEngine Visualization Backend User Tutorial](https://github.com/open-mmlab/mmengine/blob/main/docs/en/advanced_tutorials/visualization.md). + +- Custom hooks + +Custom hooks are defined in the configuration through `custom_hooks`, and `Runner` registers them using the [`register_custom_hooks`](https://github.com/open-mmlab/mmengine/blob/main/mmengine/runner/runner.py#L1820) method. + +The priority of custom hooks needs to be set in the configuration file; if not, it will be set to `NORMAL` by default. The following are some custom hooks implemented in MMEngine: + +| Hook | Usage | +| :----------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------: | +| [EMAHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/ema_hook.py) | Use Exponential Moving Average (EMA) during model training. | +| [EmptyCacheHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/empty_cache_hook.py) | Release all GPU memory not occupied by the cache during training | +| [SyncBuffersHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/sync_buffer_hook.py) | Synchronize the parameters in the model buffer, such as `running_mean` and `running_var` in BN, at the end of each training epoch. | + +The following is a use case for `EMAHook`, where the config file includes the configuration of the implemented custom hooks as members of the `custom_hooks` list. + +```python +custom_hooks = [ + dict(type='EMAHook', start_iters=500, priority='NORMAL') +] +``` + +### SegVisualizationHook + +MMSegmentation implemented [`SegVisualizationHook`](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/engine/hooks/visualization_hook.py#L17), which is used to visualize prediction results during validation and testing. +`SegVisualizationHook` overrides the `_after_iter` method in the base class `Hook`. During validation or testing, it calls the `add_datasample` method of `visualizer` to draw semantic segmentation results according to the specified iteration interval. The specific implementation is as follows: + +```python +... +@HOOKS.register_module() +class SegVisualizationHook(Hook): +... + def _after_iter(self, + runner: Runner, + batch_idx: int, + data_batch: dict, + outputs: Sequence[SegDataSample], + mode: str = 'val') -> None: +... + # If it's a training phase or self.draw is False, then skip it + if self.draw is False or mode == 'train': + return +... + if self.every_n_inner_iters(batch_idx, self.interval): + for output in outputs: + img_path = output.img_path + img_bytes = self.file_client.get(img_path) + img = mmcv.imfrombytes(img_bytes, channel_order='rgb') + window_name = f'{mode}_{osp.basename(img_path)}' + + self._visualizer.add_datasample( + window_name, + img, + data_sample=output, + show=self.show, + wait_time=self.wait_time, + step=runner.iter) + +``` + +For more details about visualization, you can check [here](../user_guides/visualization.md). + +## Optimizer + +In the previous configuration and runtime settings, we provided a simple example of configuring the training optimizer. This section will further detailly introduce how to configure optimizers in MMSegmentation. + +## Optimizer Wrapper + +OpenMMLab 2.0 introduces an optimizer wrapper that supports different training strategies, including mixed-precision training, gradient accumulation, and gradient clipping. Users can choose the appropriate training strategy according to their needs. The optimizer wrapper also defines a standard parameter update process, allowing users to switch between different training strategies within the same code. For more information, please refer to the [MMEngine optimizer wrapper documentation](https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/optim_wrapper.md). + +Here are some common usage methods in MMSegmentation: + +#### Configuring PyTorch Supported Optimizers + +OpenMMLab 2.0 supports all native PyTorch optimizers, as referenced [here](https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/optim_wrapper.md). + +To set the optimizer used by the `Runner` during training in the configuration file, you need to define `optim_wrapper` instead of `optimizer`. Below is an example of configuring an optimizer during training: + +```python +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005), + clip_grad=None) +``` + +#### Configuring Gradient Clipping + +When the model training requires gradient clipping, you can configure it as shown in the following example: + +```python +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, + clip_grad=dict(max_norm=0.01, norm_type=2)) +``` + +Here, `max_norm` refers to the maximum value of the gradient after clipping, and `norm_type` refers to the norm used when clipping the gradient. Related methods can be found in [torch.nn.utils.clip_grad_norm\_](https://pytorch.org/docs/stable/generated/torch.nn.utils.clip_grad_norm_.html). + +#### Configuring Mixed Precision Training + +When mixed precision training is needed to reduce memory usage, you can use `AmpOptimWrapper`. The specific configuration is as follows: + +```python +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) +optim_wrapper = dict(type='AmpOptimWrapper', optimizer=optimizer) +``` + +The default setting for `loss_scale` in [`AmpOptimWrapper`](https://github.com/open-mmlab/mmengine/blob/main/mmengine/optim/optimizer/amp_optimizer_wrapper.py#L20) is `dynamic`. + +#### Configuring Hyperparameters for Different Layers of the Model Network + +In model training, if you want to set different optimization strategies for different parameters in the optimizer, such as setting different learning rates, weight decay, and other hyperparameters, you can achieve this by setting `paramwise_cfg` in the `optim_wrapper` of the configuration file. + +The following config file uses the [ViT `optim_wrapper`](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/vit/vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py#L15-L27) as an example to introduce the use of `paramwise_cfg` parameters. During training, the weight decay parameter coefficients for the `pos_embed`, `mask_token`, and `norm` modules are set to 0. That is, during training, the weight decay for these modules will be changed to `weight_decay * decay_mult`=0. + +```python +optimizer = dict( + type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01) +optim_wrapper = dict( + type='OptimWrapper', + optimizer=optimizer, + paramwise_cfg=dict( + custom_keys={ + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) +``` + +Here, `decay_mult` refers to the weight decay coefficient for the corresponding parameters. For more information on the usage of `paramwise_cfg`, please refer to the [MMEngine optimizer wrapper documentation](https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/optim_wrapper.md). + +### Optimizer Wrapper Constructor + +The default optimizer wrapper constructor [`DefaultOptimWrapperConstructor`](https://github.com/open-mmlab/mmengine/blob/main/mmengine/optim/optimizer/default_constructor.py#L19) builds the optimizer used in training based on the input `optim_wrapper` and `paramwise_cfg` defined in the `optim_wrapper`. When the functionality of [`DefaultOptimWrapperConstructor`](https://github.com/open-mmlab/mmengine/blob/main/mmengine/optim/optimizer/default_constructor.py#L19) does not meet the requirements, you can customize the optimizer wrapper constructor to implement the configuration of hyperparameters. + +MMSegmentation has implemented the [`LearningRateDecayOptimizerConstructor`](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/engine/optimizers/layer_decay_optimizer_constructor.py#L104), which can decay the learning rate of model parameters in the backbone networks of ConvNeXt, BEiT, and MAE models during training according to the defined decay ratio (`decay_rate`). The configuration in the configuration file is as follows: + +```python +optim_wrapper = dict( + _delete_=True, + type='AmpOptimWrapper', + optimizer=dict( + type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05), + paramwise_cfg={ + 'decay_rate': 0.9, + 'decay_type': 'stage_wise', + 'num_layers': 12 + }, + constructor='LearningRateDecayOptimizerConstructor', + loss_scale='dynamic') +``` + +The purpose of `_delete_=True` is to ignore the inherited configuration in the OpenMMLab Config. In this code snippet, the inherited `optim_wrapper` configuration is ignored. For more information on `_delete_` fields, please refer to the [MMEngine documentation](https://github.com/open-mmlab/mmengine/blob/main/docs/en/advanced_tutorials/config.md#delete-key-in-dict). diff --git a/docs/en/advanced_guides/evaluation.md b/docs/en/advanced_guides/evaluation.md index ee5a927ff7..ca0beeeccf 100644 --- a/docs/en/advanced_guides/evaluation.md +++ b/docs/en/advanced_guides/evaluation.md @@ -1,11 +1,8 @@ # Evaluation -The evaluation procedure would be executed at [ValLoop](https://github.com/open-mmlab/mmengine/blob/main/mmengine/runner/loops.py#L300) and [TestLoop](https://github.com/open-mmlab/mmengine/blob/main/mmengine/runner/loops.py#L373), users can evaluate model performance during training or using the test script with simple settings in the configuration file. The `ValLoop` and `TestLoop` are properties of [Runner](https://github.com/open-mmlab/mmengine/blob/main/mmengine/runner/runner.py#L59), they will be built the first time they are called. To build the `ValLoop` successfully, the `val_dataloader` and `val_evaluator` must be set when building `Runner` since `dataloder` and `evaluator` are required parameters, and the same goes for `TestLoop`. For more information about the Runner's design, please refer to the [documentoation](https://github.com/open-mmlab/mmengine/blob/main/docs/en/design/runner.md) of [MMEngine](https://github.com/open-mmlab/mmengine). +The evaluation procedure would be executed at [ValLoop](https://github.com/open-mmlab/mmengine/blob/main/mmengine/runner/loops.py#L300) and [TestLoop](https://github.com/open-mmlab/mmengine/blob/main/mmengine/runner/loops.py#L373), users can evaluate model performance during training or using the test script with simple settings in the configuration file. The `ValLoop` and `TestLoop` are properties of [Runner](https://github.com/open-mmlab/mmengine/blob/main/mmengine/runner/runner.py#L59), they will be built the first time they are called. To build the `ValLoop` successfully, the `val_dataloader` and `val_evaluator` must be set when building `Runner` since `dataloader` and `evaluator` are required parameters, and the same goes for `TestLoop`. For more information about the Runner's design, please refer to the [documentation](https://github.com/open-mmlab/mmengine/blob/main/docs/en/design/runner.md) of [MMEngine](https://github.com/open-mmlab/mmengine). -
- -
test_step/val_step dataflow
-
+![test_step/val_step dataflow](https://user-images.githubusercontent.com/15952744/228828179-3269baa3-bebd-4c9a-9787-59e7d785fbcf.png) In MMSegmentation, we write the settings of dataloader and metrics in the config files of datasets and the configuration of the evaluation loop in the `schedule_x` config files by default. @@ -61,7 +58,7 @@ In MMSegmentation, the settings of `test_dataloader` and `test_evaluator` are th ## IoUMetric -MMSegmentation implements [IoUMetric](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/evaluation/metrics/iou_metric.py) and [CitysMetric](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/evaluation/metrics/citys_metric.py) for evaluating the performance of models, based on the [BaseMetric](https://github.com/open-mmlab/mmengine/blob/main/mmengine/evaluator/metric.py) provided by [MMEngine](https://github.com/open-mmlab/mmengine). Please refer to [the documentation](https://mmengine.readthedocs.io/en/latest/tutorials/evaluation.html) for more details about the unified evaluation interface. +MMSegmentation implements [IoUMetric](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/evaluation/metrics/iou_metric.py) and [CityscapesMetric](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/evaluation/metrics/citys_metric.py) for evaluating the performance of models, based on the [BaseMetric](https://github.com/open-mmlab/mmengine/blob/main/mmengine/evaluator/metric.py) provided by [MMEngine](https://github.com/open-mmlab/mmengine). Please refer to [the documentation](https://mmengine.readthedocs.io/en/latest/tutorials/evaluation.html) for more details about the unified evaluation interface. Here we briefly describe the arguments and the two main methods of `IoUMetric`. @@ -102,9 +99,9 @@ Returns: - Dict\[str, float\] - The computed metrics. The keys are the names of the metrics, and the values are corresponding results. The key mainly includes **aAcc**, **mIoU**, **mAcc**, **mDice**, **mFscore**, **mPrecision**, **mRecall**. -## CitysMetric +## CityscapesMetric -`CitysMetric` uses the official [CityscapesScripts](https://github.com/mcordts/cityscapesScripts) provided by Cityscapes to evaluate model performance. +`CityscapesMetric` uses the official [CityscapesScripts](https://github.com/mcordts/cityscapesScripts) provided by Cityscapes to evaluate model performance. ### Usage @@ -114,10 +111,10 @@ Before using it, please install the `cityscapesscripts` package first: pip install cityscapesscripts ``` -Since the `IoUMetric` is used as the default evaluator in MMSegmentation, if you would like to use `CitysMetric`, customizing the config file is required. In your customized config file, you should overwrite the default evaluator as follows. +Since the `IoUMetric` is used as the default evaluator in MMSegmentation, if you would like to use `CityscapesMetric`, customizing the config file is required. In your customized config file, you should overwrite the default evaluator as follows. ```python -val_evaluator = dict(type='CitysMetric', citys_metrics=['cityscapes']) +val_evaluator = dict(type='CityscapesMetric', output_dir='tmp') test_evaluator = val_evaluator ``` @@ -125,27 +122,27 @@ test_evaluator = val_evaluator The arguments of the constructor: +- output_dir (str) - The directory for output prediction - ignore_index (int) - Index that will be ignored in evaluation. Default: 255. -- citys_metrics (list\[str\] | str) - Metrics to be evaluated, Default: \['cityscapes'\]. -- to_label_id (bool) - whether convert output to label_id for submission. Default: True. -- suffix (str): The filename prefix of the png files. If the prefix is "somepath/xxx", the png files will be named "somepath/xxx.png". Default: '.format_cityscapes'. -- collect_device (str): Device name used for collecting results from different ranks during distributed training. Must be 'cpu' or 'gpu'. Defaults to 'cpu'. -- prefix (str, optional): The prefix that will be added in the metric names to disambiguate homonymous metrics of different evaluators. If the prefix is not provided in the argument, self.default_prefix will be used instead. Defaults to None. +- format_only (bool) - Only format result for results commit without perform evaluation. It is useful when you want to format the result to a specific format and submit it to the test server. Defaults to False. +- keep_results (bool) - Whether to keep the results. When `format_only` is True, `keep_results` must be True. Defaults to False. +- collect_device (str) - Device name used for collecting results from different ranks during distributed training. Must be 'cpu' or 'gpu'. Defaults to 'cpu'. +- prefix (str, optional) - The prefix that will be added in the metric names to disambiguate homonymous metrics of different evaluators. If prefix is not provided in the argument, self.default_prefix will be used instead. Defaults to None. -#### CitysMetric.process +#### CityscapesMetric.process This method would draw the masks on images and save the painted images to `work_dir`. Parameters: -- data_batch (Any) - A batch of data from the dataloader. +- data_batch (dict) - A batch of data from the dataloader. - data_samples (Sequence\[dict\]) - A batch of outputs from the model. Returns: This method doesn't have returns, the annotations' path would be stored in `self.results`, which will be used to compute the metrics when all batches have been processed. -#### CitysMetric.compute_metrics +#### CityscapesMetric.compute_metrics This method would call `cityscapesscripts.evaluation.evalPixelLevelSemanticLabeling` tool to calculate metrics. diff --git a/docs/en/advanced_guides/models.md b/docs/en/advanced_guides/models.md index 8202e95b7c..b0089869d9 100644 --- a/docs/en/advanced_guides/models.md +++ b/docs/en/advanced_guides/models.md @@ -1,7 +1,5 @@ # Models -# Models - We usually define a neural network in a deep learning task as a model, and this model is the core of an algorithm. [MMEngine](https://github.com/open-mmlab/mmengine) abstracts a unified model [BaseModel](https://github.com/open-mmlab/mmengine/blob/main/mmengine/model/base_model/base_model.py#L16) to standardize the interfaces for training, testing and other processes. All models implemented by MMSegmentation inherit from `BaseModel`, and in MMSegmentation we implemented forward and added some functions for the semantic segmentation algorithm. ## Common components @@ -22,9 +20,9 @@ In MMSegmentation, we abstract the network architecture as a **Segmentor**, it i **Neck** is the part that connects the backbone and heads. It performs some refinements or reconfigurations on the raw feature maps produced by the backbone. An example is **Feature Pyramid Network (FPN)**. -### Decode Head +### Decode head -**Decode Head** is the part that transforms the feature maps into a segmentation mask, such as **PSPNet**. +**Decode head** is the part that transforms the feature maps into a segmentation mask, such as **PSPNet**. ### Auxiliary head @@ -36,15 +34,8 @@ MMSegmentation wraps `BaseModel` and implements the [BaseSegmentor](https://gith ### forward -
- -
EncoderDecoder dataflow
-
- -
-
-
CascadeEncoderDecoder dataflow
-
+![EncoderDecoder dataflow](https://user-images.githubusercontent.com/15952744/228827860-c0e34875-d370-4736-84f0-9560c26c9576.png) +![CascadeEncoderDecoder dataflow](https://user-images.githubusercontent.com/15952744/228827987-aa214507-0c6d-4a08-8ce4-679b2b200b79.png) The `forward` method returns losses or predictions of training, validation, testing, and a simple inference process. @@ -110,16 +101,13 @@ Parameters: - data (dict or tuple or list) - Data sampled from the dataset. In MMSegmentation, the data dict contains `inputs` and `data_samples` two fields. - optim_wrapper (OptimWrapper) - OptimWrapper instance used to update model parameters. -**Note:** [OptimWrapper](https://github.com/open-mmlab/mmengine/blob/main/mmengine/optim/optimizer/optimizer_wrapper.py#L17) provides a common interface for updating parameters, please refer to optimizer wrapper [documentation](https://mmengine.readthedocs.io/zh_CN/latest/tutorials/optim_wrapper.html) in [MMEngine](https://github.com/open-mmlab/mmengine) for more information. +**Note:** [OptimWrapper](https://github.com/open-mmlab/mmengine/blob/main/mmengine/optim/optimizer/optimizer_wrapper.py#L17) provides a common interface for updating parameters, please refer to optimizer wrapper [documentation](https://mmengine.readthedocs.io/en/latest/tutorials/optim_wrapper.html) in [MMEngine](https://github.com/open-mmlab/mmengine) for more information. Returns: - Dict\[str, `torch.Tensor`\]: A `dict` of tensor for logging. -
- -
train_step dataflow
-
+![train_step dataflow](https://user-images.githubusercontent.com/15952744/228828089-a9ae1225-958d-4cf7-99af-9af8576f7ef7.png) ### val_step @@ -133,10 +121,7 @@ Returns: - `list` - The predictions of given data. -
- -
test_step/val_step dataflow
-
+![test_step/val_step dataflow](https://user-images.githubusercontent.com/15952744/228828179-3269baa3-bebd-4c9a-9787-59e7d785fbcf.png) ### test_step @@ -157,7 +142,7 @@ The parameters of the `SegDataPreProcessor` constructor: - pad_val (float, optional) - Padding value. Default: 0. - seg_pad_val (float, optional) - Padding value of segmentation map. Default: 255. - bgr_to_rgb (bool) - whether to convert image from BGR to RGB. Defaults to False. -- rgb_to_bgr (bool) - whether to convert image from RGB to RGB. Defaults to False. +- rgb_to_bgr (bool) - whether to convert image from RGB to BGR. Defaults to False. - batch_augments (list\[dict\], optional) - Batch-level augmentations. Default to None. The data will be processed as follows: diff --git a/docs/en/advanced_guides/training_tricks.md b/docs/en/advanced_guides/training_tricks.md index 8fa89131d5..bc4f72257d 100644 --- a/docs/en/advanced_guides/training_tricks.md +++ b/docs/en/advanced_guides/training_tricks.md @@ -1,4 +1,4 @@ -# \[WIP\] Training Tricks +# Training Tricks MMSegmentation support following training tricks out of box. @@ -9,18 +9,19 @@ In semantic segmentation, some methods make the LR of heads larger than backbone In MMSegmentation, you may add following lines to config to make the LR of heads 10 times of backbone. ```python -optimizer=dict( +optim_wrapper=dict( paramwise_cfg = dict( custom_keys={ 'head': dict(lr_mult=10.)})) ``` With this modification, the LR of any parameter group with `'head'` in name will be multiplied by 10. -You may refer to [MMCV doc](https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.DefaultOptimizerConstructor) for further details. +You may refer to [MMEngine documentation](https://mmengine.readthedocs.io/en/latest/tutorials/optim_wrapper.html#advanced-usages) for further details. ## Online Hard Example Mining (OHEM) -We implement pixel sampler [here](https://github.com/open-mmlab/mmsegmentation/tree/master/mmseg/core/seg/sampler) for training sampling. +We implement pixel sampler for training sampling, like OHEM (Online Hard Example Mining), +which is used for remove the "easy" examples for model training. Here is an example config of training PSPNet with OHEM enabled. ```python @@ -58,33 +59,17 @@ For loss calculation, we support multiple losses training concurrently. Here is ```python _base_ = './fcn_unet_s5-d16_64x64_40k_drive.py' model = dict( - decode_head=dict(loss_decode=[dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), - dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0)]), - auxiliary_head=dict(loss_decode=[dict(type='CrossEntropyLoss', loss_name='loss_ce',loss_weight=1.0), - dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0)]), - ) + decode_head=dict(loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0) + ]), + auxiliary_head=dict(loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0) + ]), +) ``` In this way, `loss_weight` and `loss_name` will be weight and name in training log of corresponding loss, respectively. Note: If you want this loss item to be included into the backward graph, `loss_` must be the prefix of the name. - -## Ignore specified label index in loss calculation - -In default setting, `avg_non_ignore=False` which means each pixel counts for loss calculation although some of them belong to ignore-index labels. - -For loss calculation, we support ignore index of certain label by `avg_non_ignore` and `ignore_index`. In this way, the average loss would only be calculated in non-ignored labels which may achieve better performance, and here is the [reference](https://github.com/open-mmlab/mmsegmentation/pull/1409). Here is an example config of training `unet` on `Cityscapes` dataset: in loss calculation it would ignore label 0 which is background and loss average is only calculated on non-ignore labels: - -```python -_base_ = './unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py' -model = dict( - decode_head=dict( - ignore_index=0, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0, avg_non_ignore=True), - auxiliary_head=dict( - ignore_index=0, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0, avg_non_ignore=True)), - )) -``` diff --git a/docs/en/advanced_guides/transforms.md b/docs/en/advanced_guides/transforms.md index e0c4155b57..68b1f44bd3 100644 --- a/docs/en/advanced_guides/transforms.md +++ b/docs/en/advanced_guides/transforms.md @@ -12,15 +12,10 @@ The structure of this guide is as follows: ## Design of Data pipelines -Following typical conventions, we use `Dataset` and `DataLoader` for data loading -with multiple workers. `Dataset` returns a dict of data items corresponding -the arguments of models' forward method. -Since the data in semantic segmentation may not be the same size, -we introduce a new `DataContainer` type in MMCV to help collect and distribute -data of different size. -See [here](https://github.com/open-mmlab/mmcv/blob/master/mmcv/parallel/data_container.py) for more details. +Following typical conventions, we use `Dataset` and `DataLoader` for data loading with multiple workers. `Dataset` returns a dict of data items corresponding the arguments of models' forward method. Since the data in semantic segmentation may not be the same size, we introduce a new `DataContainer` type in MMCV to help collect and distribute data of different size. See [here](https://github.com/open-mmlab/mmcv/blob/master/mmcv/parallel/data_container.py) for more details. In 1.x version of MMSegmentation, all data transformations are inherited from [`BaseTransform`](https://github.com/open-mmlab/mmcv/blob/2.x/mmcv/transforms/base.py#L6). + The input and output types of transformations are both dict. A simple example is as follows: ```python @@ -38,13 +33,11 @@ The input and output types of transformations are both dict. A simple example is dict_keys(['img_path', 'seg_map_path', 'reduce_zero_label', 'seg_fields', 'gt_seg_map']) ``` -The data preparation pipeline and the dataset are decomposed. Usually a dataset -defines how to process the annotations and a data pipeline defines all the steps to prepare a data dict. -A pipeline consists of a sequence of operations. Each operation takes a dict as input and also outputs a dict for the next transform. +The data preparation pipeline and the dataset are decomposed. Usually a dataset defines how to process the annotations and a data pipeline defines all the steps to prepare a data dict. A pipeline consists of a sequence of operations. Each operation takes a dict as input and also outputs a dict for the next transform. The operations are categorized into data loading, pre-processing, formatting and test-time augmentation. -Here is a pipeline example for PSPNet. +Here is a pipeline example for PSPNet: ```python crop_size = (512, 1024) @@ -71,8 +64,7 @@ test_pipeline = [ ] ``` -For each operation, we list the related dict fields that are `added`/`updated`/`removed`. -Before pipelines, the information we can directly obtain from the datasets are `img_path` and `seg_map_path`. +For each operation, we list the related dict fields that are `added`/`updated`/`removed`. Before pipelines, the information we can directly obtain from the datasets are `img_path` and `seg_map_path`. ### Data loading @@ -98,16 +90,14 @@ Before pipelines, the information we can directly obtain from the datasets are ` `RandomCrop`: Random crop image & segmentation map. -- update: `img`, `gt_seg_map`, `img_shape`. +- update: `img`, `gt_seg_map`, `img_shape` `RandomFlip`: Flip the image & segmentation map. - add: `flip`, `flip_direction` - update: `img`, `gt_seg_map` -`PhotoMetricDistortion`: Apply photometric distortion to image sequentially, -every transformation is applied with a probability of 0.5. -The position of random contrast is in second or second to last(mode 0 or 1 below, respectively). +`PhotoMetricDistortion`: Apply photometric distortion to image sequentially, every transformation is applied with a probability of 0.5. The position of random contrast is in second or second to last(mode 0 or 1 below, respectively). ``` 1. random brightness diff --git a/docs/en/get_started.md b/docs/en/get_started.md index cf861b1fe8..3cda319065 100644 --- a/docs/en/get_started.md +++ b/docs/en/get_started.md @@ -51,7 +51,7 @@ mim install "mmcv>=2.0.0rc1" Case a: If you develop and run mmseg directly, install it from source: ```shell -git clone -b dev-1.x https://github.com/open-mmlab/mmsegmentation.git +git clone -b main https://github.com/open-mmlab/mmsegmentation.git cd mmsegmentation pip install -v -e . # '-v' means verbose, or more output @@ -164,7 +164,7 @@ thus we only need to install MMCV and MMSegmentation with the following commands ```shell !git clone https://github.com/open-mmlab/mmsegmentation.git %cd mmsegmentation -!git checkout dev-1.x +!git checkout main !pip install -e . ``` @@ -181,7 +181,7 @@ Within Jupyter, the exclamation mark `!` is used to call external executables an ### Using MMSegmentation with Docker -We provide a [Dockerfile](https://github.com/open-mmlab/mmsegmentation/blob/master/docker/Dockerfile) to build an image. Ensure that your [docker version](https://docs.docker.com/engine/install/) >=19.03. +We provide a [Dockerfile](https://github.com/open-mmlab/mmsegmentation/blob/main/docker/Dockerfile) to build an image. Ensure that your [docker version](https://docs.docker.com/engine/install/) >=19.03. ```shell # build an image with PyTorch 1.11, CUDA 11.3 @@ -197,5 +197,5 @@ docker run --gpus all --shm-size=8g -it -v {DATA_DIR}:/mmsegmentation/data mmseg ## Trouble shooting -If you have some issues during the installation, please first view the [FAQ](faq.md) page. +If you have some issues during the installation, please first view the [FAQ](notes/faq.md) page. You may [open an issue](https://github.com/open-mmlab/mmsegmentation/issues/new/choose) on GitHub if no solution is found. diff --git a/docs/en/migration/interface.md b/docs/en/migration/interface.md index d75f8ec3ef..46040d911f 100644 --- a/docs/en/migration/interface.md +++ b/docs/en/migration/interface.md @@ -6,14 +6,17 @@ This guide describes the fundamental differences between MMSegmentation 0.x and ## New dependencies -MMSegmentation 1.x depends on some new packages, you can prepare a new clean environment and install again according to the [installation tutorial](get_started.md). +MMSegmentation 1.x depends on some new packages, you can prepare a new clean environment and install again according to the [installation tutorial](../get_started.md). + Or install the below packages manually. 1. [MMEngine](https://github.com/open-mmlab/mmengine): MMEngine is the core the OpenMMLab 2.0 architecture, and we splited many compentents unrelated to computer vision from MMCV to MMEngine. 2. [MMCV](https://github.com/open-mmlab/mmcv): The computer vision package of OpenMMLab. This is not a new dependency, but you need to upgrade it to above **2.0.0rc1** version. -3. [MMClassification](https://github.com/open-mmlab/mmclassification)(Optional): The image classification toolbox and benchmark of OpenMMLab. This is not a new dependency, but you need to upgrade it to above **1.0.0rc0** version. +3. [MMClassification](https://github.com/open-mmlab/mmclassification)(Optional): The image classification toolbox and benchmark of OpenMMLab. This is not a new dependency, but you need to upgrade it to above **1.0.0rc0** version. + +4. [MMDetection](https://github.com/open-mmlab/mmdetection)(Optional): The object detection toolbox and benchmark of OpenMMLab. This is not a new dependency, but you need to upgrade it to above **3.0.0rc0** version. ## Train launch @@ -62,6 +65,35 @@ Compared with MMSeg0.x, MMSeg1.x provides fewer command line arguments in `tools --cfg-options randomness.deterministic=True +## Test launch + +Similar to training launch, there are only common arguments in tools/test.py of MMSegmentation 1.x. +Below is the difference in test scripts, +please refer to [this documentation](../user_guides/4_train_test.md) for more details about test launch. + + + + + + + + + + + + + + + + + + + + + + +
Function0.x1.x
Evaluation metrics--eval mIoU--cfg-options test_evaluator.type=IoUMetric
Whether to use test time augmentation--aug-test--tta
Whether save the output results without perform evaluation--format-only--cfg-options test_evaluator.format_only=True
+ ## Configuration file ### Model settings @@ -86,7 +118,7 @@ Add `model.data_preprocessor` field to configure the `DataPreProcessor`, includi - `bgr_to_rgb` (bool): whether to convert image from BGR to RGB.Defaults to False. -- `rgb_to_bgr` (bool): whether to convert image from RGB to RGB. Defaults to False. +- `rgb_to_bgr` (bool): whether to convert image from RGB to BGR. Defaults to False. **Note:** Please refer [models documentation](../advanced_guides/models.md) for more details. @@ -260,8 +292,7 @@ tta_pipeline = [ Changes in **`evaluation`**: - The **`evaluation`** field is split to `val_evaluator` and `test_evaluator`. And it won't support `interval` and `save_best` arguments. - The `interval` is moved to `train_cfg.val_interval`, and the `save_best` - is moved to `default_hooks.checkpoint.save_best`. `pre_eval` has been removed. + The `interval` is moved to `train_cfg.val_interval`, and the `save_best` is moved to `default_hooks.checkpoint.save_best`. `pre_eval` has been removed. - `'mIoU'` has been changed to `'IoUMetric'`. @@ -291,8 +322,7 @@ test_evaluator = val_evaluator Changes in **`optimizer`** and **`optimizer_config`**: -- Now we use `optim_wrapper` field to specify all configuration about the optimization process. And the - `optimizer` is a sub field of `optim_wrapper` now. +- Now we use `optim_wrapper` field to specify all configuration about the optimization process. And the `optimizer` is a sub field of `optim_wrapper` now. - `paramwise_cfg` is also a sub field of `optim_wrapper`, instead of `optimizer`. - `optimizer_config` is removed now, and all configurations of it are moved to `optim_wrapper`. - `grad_clip` is renamed to `clip_grad`. @@ -326,11 +356,9 @@ optim_wrapper = dict( Changes in **`lr_config`**: - The `lr_config` field is removed and we use new `param_scheduler` to replace it. -- The `warmup` related arguments are removed, since we use schedulers combination to implement this - functionality. +- The `warmup` related arguments are removed, since we use schedulers combination to implement this functionality. -The new schedulers combination mechanism is very flexible, and you can use it to design many kinds of learning -rate / momentum curves. See [the tutorial](TODO) for more details. +The new schedulers combination mechanism is very flexible, and you can use it to design many kinds of learning rate / momentum curves. See [the tutorial](TODO) for more details.
@@ -374,8 +402,7 @@ param_scheduler = [ Changes in **`runner`**: -Most configuration in the original `runner` field is moved to `train_cfg`, `val_cfg` and `test_cfg`, which -configure the loop in training, validation and test. +Most configuration in the original `runner` field is moved to `train_cfg`, `val_cfg` and `test_cfg`, which configure the loop in training, validation and test.
@@ -402,8 +429,7 @@ test_cfg = dict(type='TestLoop') # Use the default test loop.
-In fact, in OpenMMLab 2.0, we introduced `Loop` to control the behaviors in training, validation and test. The functionalities of `Runner` are also changed. You can find more details of [runner tutorial](https://github.com/open-mmlab/mmengine/blob/main/docs/en/design/runner.md) -in [MMEngine](https://github.com/open-mmlab/mmengine/). +In fact, in OpenMMLab 2.0, we introduced `Loop` to control the behaviors in training, validation and test. The functionalities of `Runner` are also changed. You can find more details of [runner tutorial](https://github.com/open-mmlab/mmengine/blob/main/docs/en/design/runner.md) in [MMEngine](https://github.com/open-mmlab/mmengine/). ### Runtime settings @@ -433,8 +459,7 @@ default_hooks = dict( visualization=dict(type='SegVisualizationHook')) ``` -In addition, we split the original logger to logger and visualizer. The logger is used to record -information and the visualizer is used to show the logger in different backends, like terminal and TensorBoard. +In addition, we split the original logger to logger and visualizer. The logger is used to record information and the visualizer is used to show the logger in different backends, like terminal and TensorBoard. @@ -478,8 +503,7 @@ Changes in **`load_from`** and **`resume_from`**: - If `resume=False` and `load_from` is **not None**, only load the checkpoint, not resume training. - If `resume=False` and `load_from` is **None**, do not load nor resume. -Changes in **`dist_params`**: The `dist_params` field is a sub field of `env_cfg` now. And there are some new -configurations in the `env_cfg`. +Changes in **`dist_params`**: The `dist_params` field is a sub field of `env_cfg` now. And there are some new configurations in the `env_cfg`. ```python env_cfg = dict( @@ -496,8 +520,6 @@ env_cfg = dict( Changes in **`workflow`**: `workflow` related functionalities are removed. -New field **`visualizer`**: The visualizer is a new design in OpenMMLab 2.0 architecture. We use a -visualizer instance in the runner to handle results & log visualization and save to different backends. -See the [visualization tutorial](user_guides/visualization.md) for more details. +New field **`visualizer`**: The visualizer is a new design in OpenMMLab 2.0 architecture. We use a visualizer instance in the runner to handle results & log visualization and save to different backends. See the [visualization tutorial](../user_guides/visualization.md) for more details. -New field **`default_scope`**: The start point to search module for all registries. The `default_scope` in MMSegmentation is `mmseg`. See [the registry tutorial](https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/registry.md) for more details. +New field **`default_scope`**: The start point to search module for all registries. The `default_scope` in MMSegmentation is `mmseg`. See [the registry tutorial](https://github.com/open-mmlab/mmengine/blob/main/docs/en/advanced_tutorials/registry.md) for more details. diff --git a/docs/en/migration/package.md b/docs/en/migration/package.md index c0aa1d6e31..728e9a9bb6 100644 --- a/docs/en/migration/package.md +++ b/docs/en/migration/package.md @@ -74,9 +74,8 @@ We moved registry implementations for all kinds of modules in MMSegmentation in ### `mmseg.apis` -OpenMMLab 2.0 tries to support unified interface for multitasking of Computer Vision, -and releases much stronger [`Runner`](https://github.com/open-mmlab/mmengine/blob/main/docs/en/design/runner.md), -so MMSeg 1.x removed modules in `train.py` and `test.py` renamed `init_segmentor` to `init_model` and `inference_segmentor` to `inference_model` +OpenMMLab 2.0 tries to support unified interface for multitasking of Computer Vision, and releases much stronger [`Runner`](https://github.com/open-mmlab/mmengine/blob/main/docs/en/design/runner.md), so MMSeg 1.x removed modules in `train.py` and `test.py` renamed `init_segmentor` to `init_model` and `inference_segmentor` to `inference_model`. + Here is the changes of `mmseg.apis`: | Function | Changes | @@ -92,7 +91,7 @@ Here is the changes of `mmseg.apis`: ### `mmseg.datasets` -OpenMMLab 2.0 defines the `BaseDataset` to function and interface of dataset, and MMSegmentation 1.x also follow this protocol and defines the `BaseSegDataset` inherited from `BaseDataset`. MMCV 2.x collects general data transforms for multiple tasks e.g. classification, detection, segmentation, so MMSegmentation 1.x uses these data transforms and removes them from mmseg.datasets +OpenMMLab 2.0 defines the `BaseDataset` to function and interface of dataset, and MMSegmentation 1.x also follow this protocol and defines the `BaseSegDataset` inherited from `BaseDataset`. MMCV 2.x collects general data transforms for multiple tasks e.g. classification, detection, segmentation, so MMSegmentation 1.x uses these data transforms and removes them from mmseg.datasets. | Packages/Modules | Changes | | :-------------------: | :------------------------------------------------------------------------------------------ | diff --git a/docs/en/model_zoo.md b/docs/en/model_zoo.md index 782a47002f..6717df6cc7 100644 --- a/docs/en/model_zoo.md +++ b/docs/en/model_zoo.md @@ -34,123 +34,123 @@ ### FCN -Please refer to [FCN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn) for details. +Please refer to [FCN](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn) for details. ### PSPNet -Please refer to [PSPNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet) for details. +Please refer to [PSPNet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet) for details. ### DeepLabV3 -Please refer to [DeepLabV3](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3) for details. +Please refer to [DeepLabV3](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3) for details. ### PSANet -Please refer to [PSANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet) for details. +Please refer to [PSANet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet) for details. ### DeepLabV3+ -Please refer to [DeepLabV3+](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus) for details. +Please refer to [DeepLabV3+](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus) for details. ### UPerNet -Please refer to [UPerNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet) for details. +Please refer to [UPerNet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet) for details. ### NonLocal Net -Please refer to [NonLocal Net](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net) for details. +Please refer to [NonLocal Net](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net) for details. ### EncNet -Please refer to [EncNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet) for details. +Please refer to [EncNet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/encnet) for details. ### CCNet -Please refer to [CCNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet) for details. +Please refer to [CCNet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ccnet) for details. ### DANet -Please refer to [DANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet) for details. +Please refer to [DANet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/danet) for details. ### APCNet -Please refer to [APCNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet) for details. +Please refer to [APCNet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/apcnet) for details. ### HRNet -Please refer to [HRNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet) for details. +Please refer to [HRNet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet) for details. ### GCNet -Please refer to [GCNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet) for details. +Please refer to [GCNet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet) for details. ### DMNet -Please refer to [DMNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet) for details. +Please refer to [DMNet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dmnet) for details. ### ANN -Please refer to [ANN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann) for details. +Please refer to [ANN](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ann) for details. ### OCRNet -Please refer to [OCRNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet) for details. +Please refer to [OCRNet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet) for details. ### Fast-SCNN -Please refer to [Fast-SCNN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastscnn) for details. +Please refer to [Fast-SCNN](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fastscnn) for details. ### ResNeSt -Please refer to [ResNeSt](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/resnest) for details. +Please refer to [ResNeSt](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/resnest) for details. ### Semantic FPN -Please refer to [Semantic FPN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/sem_fpn) for details. +Please refer to [Semantic FPN](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/sem_fpn) for details. ### PointRend -Please refer to [PointRend](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/point_rend) for details. +Please refer to [PointRend](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/point_rend) for details. ### MobileNetV2 -Please refer to [MobileNetV2](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v2) for details. +Please refer to [MobileNetV2](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mobilenet_v2) for details. ### MobileNetV3 -Please refer to [MobileNetV3](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v3) for details. +Please refer to [MobileNetV3](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mobilenet_v3) for details. ### EMANet -Please refer to [EMANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/emanet) for details. +Please refer to [EMANet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/emanet) for details. ### DNLNet -Please refer to [DNLNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet) for details. +Please refer to [DNLNet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dnlnet) for details. ### CGNet -Please refer to [CGNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/cgnet) for details. +Please refer to [CGNet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/cgnet) for details. ### Mixed Precision (FP16) Training -Please refer [Mixed Precision (FP16) Training on BiSeNetV2](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv2/bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes.py) for details. +Please refer [Mixed Precision (FP16) Training on BiSeNetV2](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/bisenetv2/bisenetv2_fcn_4xb4-160k_cityscapes-1024x1024.py) for details. ### U-Net -Please refer to [U-Net](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/README.md) for details. +Please refer to [U-Net](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet/README.md) for details. ### ViT -Please refer to [ViT](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/README.md) for details. +Please refer to [ViT](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/vit/README.md) for details. ### Swin -Please refer to [Swin](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/README.md) for details. +Please refer to [Swin](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/swin/README.md) for details. ### SETR -Please refer to [SETR](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/setr/README.md) for details. +Please refer to [SETR](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/setr/README.md) for details. ## Speed benchmark diff --git a/docs/en/modelzoo_statistics.md b/docs/en/modelzoo_statistics.md index c8fa46d013..e5e21a1474 100644 --- a/docs/en/modelzoo_statistics.md +++ b/docs/en/modelzoo_statistics.md @@ -7,96 +7,96 @@ - Number of checkpoints: 612 - - \[ALGORITHM\] [ANN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann) (16 ckpts) + - \[ALGORITHM\] [ANN](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ann) (16 ckpts) - - \[ALGORITHM\] [APCNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet) (12 ckpts) + - \[ALGORITHM\] [APCNet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/apcnet) (12 ckpts) - - \[BACKBONE\] [BEiT](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/beit) (2 ckpts) + - \[BACKBONE\] [BEiT](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/beit) (2 ckpts) - - \[ALGORITHM\] [BiSeNetV1](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1) (11 ckpts) + - \[ALGORITHM\] [BiSeNetV1](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/bisenetv1) (11 ckpts) - - \[ALGORITHM\] [BiSeNetV2](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv2) (4 ckpts) + - \[ALGORITHM\] [BiSeNetV2](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/bisenetv2) (4 ckpts) - - \[ALGORITHM\] [CCNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet) (16 ckpts) + - \[ALGORITHM\] [CCNet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ccnet) (16 ckpts) - - \[ALGORITHM\] [CGNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/cgnet) (2 ckpts) + - \[ALGORITHM\] [CGNet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/cgnet) (2 ckpts) - - \[BACKBONE\] [ConvNeXt](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/convnext) (6 ckpts) + - \[BACKBONE\] [ConvNeXt](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/convnext) (6 ckpts) - - \[ALGORITHM\] [DANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet) (16 ckpts) + - \[ALGORITHM\] [DANet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/danet) (16 ckpts) - - \[ALGORITHM\] [DeepLabV3](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3) (41 ckpts) + - \[ALGORITHM\] [DeepLabV3](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3) (41 ckpts) - - \[ALGORITHM\] [DeepLabV3+](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus) (42 ckpts) + - \[ALGORITHM\] [DeepLabV3+](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/deeplabv3plus) (42 ckpts) - - \[ALGORITHM\] [DMNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet) (12 ckpts) + - \[ALGORITHM\] [DMNet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dmnet) (12 ckpts) - - \[ALGORITHM\] [DNLNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet) (12 ckpts) + - \[ALGORITHM\] [DNLNet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dnlnet) (12 ckpts) - - \[ALGORITHM\] [DPT](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dpt) (1 ckpts) + - \[ALGORITHM\] [DPT](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/dpt) (1 ckpts) - - \[ALGORITHM\] [EMANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/emanet) (4 ckpts) + - \[ALGORITHM\] [EMANet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/emanet) (4 ckpts) - - \[ALGORITHM\] [EncNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet) (12 ckpts) + - \[ALGORITHM\] [EncNet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/encnet) (12 ckpts) - - \[ALGORITHM\] [ERFNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/erfnet) (1 ckpts) + - \[ALGORITHM\] [ERFNet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/erfnet) (1 ckpts) - - \[ALGORITHM\] [FastFCN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastfcn) (12 ckpts) + - \[ALGORITHM\] [FastFCN](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fastfcn) (12 ckpts) - - \[ALGORITHM\] [Fast-SCNN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastscnn) (1 ckpts) + - \[ALGORITHM\] [Fast-SCNN](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fastscnn) (1 ckpts) - - \[ALGORITHM\] [FCN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn) (41 ckpts) + - \[ALGORITHM\] [FCN](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn) (41 ckpts) - - \[ALGORITHM\] [GCNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet) (16 ckpts) + - \[ALGORITHM\] [GCNet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/gcnet) (16 ckpts) - - \[BACKBONE\] [HRNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet) (37 ckpts) + - \[BACKBONE\] [HRNet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/hrnet) (37 ckpts) - - \[ALGORITHM\] [ICNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet) (12 ckpts) + - \[ALGORITHM\] [ICNet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/icnet) (12 ckpts) - - \[ALGORITHM\] [ISANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet) (16 ckpts) + - \[ALGORITHM\] [ISANet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/isanet) (16 ckpts) - - \[ALGORITHM\] [K-Net](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/knet) (7 ckpts) + - \[ALGORITHM\] [K-Net](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/knet) (7 ckpts) - - \[BACKBONE\] [MAE](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mae) (1 ckpts) + - \[BACKBONE\] [MAE](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mae) (1 ckpts) - - \[ALGORITHM\] [Mask2Former](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mask2former) (13 ckpts) + - \[ALGORITHM\] [Mask2Former](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mask2former) (13 ckpts) - - \[ALGORITHM\] [MaskFormer](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/maskformer) (4 ckpts) + - \[ALGORITHM\] [MaskFormer](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/maskformer) (4 ckpts) - - \[BACKBONE\] [MobileNetV2](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v2) (8 ckpts) + - \[BACKBONE\] [MobileNetV2](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mobilenet_v2) (8 ckpts) - - \[BACKBONE\] [MobileNetV3](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v3) (4 ckpts) + - \[BACKBONE\] [MobileNetV3](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/mobilenet_v3) (4 ckpts) - - \[ALGORITHM\] [NonLocal Net](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net) (16 ckpts) + - \[ALGORITHM\] [NonLocal Net](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/nonlocal_net) (16 ckpts) - - \[ALGORITHM\] [OCRNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet) (24 ckpts) + - \[ALGORITHM\] [OCRNet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/ocrnet) (24 ckpts) - - \[ALGORITHM\] [PointRend](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/point_rend) (4 ckpts) + - \[ALGORITHM\] [PointRend](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/point_rend) (4 ckpts) - - \[BACKBONE\] [PoolFormer](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/poolformer) (5 ckpts) + - \[BACKBONE\] [PoolFormer](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/poolformer) (5 ckpts) - - \[ALGORITHM\] [PSANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet) (16 ckpts) + - \[ALGORITHM\] [PSANet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/psanet) (16 ckpts) - - \[ALGORITHM\] [PSPNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet) (54 ckpts) + - \[ALGORITHM\] [PSPNet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/pspnet) (54 ckpts) - - \[BACKBONE\] [ResNeSt](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/resnest) (8 ckpts) + - \[BACKBONE\] [ResNeSt](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/resnest) (8 ckpts) - - \[ALGORITHM\] [SegFormer](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer) (13 ckpts) + - \[ALGORITHM\] [SegFormer](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segformer) (13 ckpts) - - \[ALGORITHM\] [Segmenter](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segmenter) (5 ckpts) + - \[ALGORITHM\] [Segmenter](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/segmenter) (5 ckpts) - - \[ALGORITHM\] [Semantic FPN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/sem_fpn) (4 ckpts) + - \[ALGORITHM\] [Semantic FPN](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/sem_fpn) (4 ckpts) - - \[ALGORITHM\] [SETR](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/setr) (7 ckpts) + - \[ALGORITHM\] [SETR](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/setr) (7 ckpts) - - \[ALGORITHM\] [STDC](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/stdc) (4 ckpts) + - \[ALGORITHM\] [STDC](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/stdc) (4 ckpts) - - \[BACKBONE\] [Swin Transformer](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin) (6 ckpts) + - \[BACKBONE\] [Swin Transformer](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/swin) (6 ckpts) - - \[BACKBONE\] [Twins](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/twins) (12 ckpts) + - \[BACKBONE\] [Twins](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/twins) (12 ckpts) - - \[ALGORITHM\] [UNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet) (25 ckpts) + - \[ALGORITHM\] [UNet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/unet) (25 ckpts) - - \[ALGORITHM\] [UPerNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet) (16 ckpts) + - \[ALGORITHM\] [UPerNet](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/upernet) (16 ckpts) - - \[BACKBONE\] [Vision Transformer](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit) (11 ckpts) + - \[BACKBONE\] [Vision Transformer](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/vit) (11 ckpts) diff --git a/docs/en/notes/faq.md b/docs/en/notes/faq.md index fe5cac3834..5b9c35339b 100644 --- a/docs/en/notes/faq.md +++ b/docs/en/notes/faq.md @@ -26,5 +26,5 @@ Notes: ## How to know the number of GPUs needed to train the model -- Infer from the name of the config file of the model. You can refer to the `Config Name Style` part of [Learn about Configs](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/tutorials/config.md). For example, for config file with name `segformer_mit-b0_8xb1-160k_cityscapes-1024x1024.py`, `8xb1` means training the model corresponding to it needs 8 GPUs, and the batch size of each GPU is 1. +- Infer from the name of the config file of the model. You can refer to the `Config Name Style` part of [Learn about Configs](../user_guides/1_config.md). For example, for config file with name `segformer_mit-b0_8xb1-160k_cityscapes-1024x1024.py`, `8xb1` means training the model corresponding to it needs 8 GPUs, and the batch size of each GPU is 1. - Infer from the log file. Open the log file of the model and search `nGPU` in the file. The number of figures following `nGPU` is the number of GPUs needed to train the model. For instance, searching for `nGPU` in the log file yields the record `nGPU 0,1,2,3,4,5,6,7`, which indicates that eight GPUs are needed to train the model. diff --git a/docs/en/overview.md b/docs/en/overview.md index 399f343fd4..bbc0b8e32c 100644 --- a/docs/en/overview.md +++ b/docs/en/overview.md @@ -72,8 +72,8 @@ Here is a detailed step-by-step guide to learn more about MMSegmentation: please refer to the below guides to build your own segmentation project: - [Add new models](advanced_guides/add_models.md) - - [Add new datasets](advanced_guides/add_dataset.md) - - [Add new transforms](advanced_guides/add_transform.md) + - [Add new datasets](advanced_guides/add_datasets.md) + - [Add new transforms](advanced_guides/add_transforms.md) - [Customize runtime](advanced_guides/customize_runtime.md) 5. If you are more familiar with MMSegmentation v0.x, there is documentation about migration from MMSegmentation v0.x to v1.x @@ -82,4 +82,4 @@ Here is a detailed step-by-step guide to learn more about MMSegmentation: ## References -- https://paperswithcode.com/task/semantic-segmentation/codeless#task-home +- [Paper with code](https://paperswithcode.com/task/semantic-segmentation/codeless#task-home) diff --git a/docs/en/switch_language.md b/docs/en/switch_language.md index 80e30dc3ae..d6973b6c05 100644 --- a/docs/en/switch_language.md +++ b/docs/en/switch_language.md @@ -1,3 +1,3 @@ -## English +## English -## 简体中文 +## 简体中文 diff --git a/docs/en/user_guides/2_dataset_prepare.md b/docs/en/user_guides/2_dataset_prepare.md index 5d36061d89..d4fbfbf5d0 100644 --- a/docs/en/user_guides/2_dataset_prepare.md +++ b/docs/en/user_guides/2_dataset_prepare.md @@ -1,4 +1,4 @@ -## Prepare datasets +# Tutorial 2: Prepare datasets It is recommended to symlink the dataset root to `$MMSEGMENTATION/data`. If your folder structure is different, you may need to change the corresponding paths in config files. @@ -154,22 +154,44 @@ mmsegmentation │ │ │ ├── training │ │ │ ├── validation │ │ │ ├── test +│ ├── mapillary +│ │ ├── training +│ │ │ ├── images +│ │ │ ├── v1.2 +| │ │ │ ├── instances +| │ │ │ ├── labels +| │   │   │ └── panoptic +│ │ │ ├── v2.0 +| │ │ │ ├── instances +| │ │ │ ├── labels +| │ │ │ ├── panoptic +| │   │   │ └── polygons +│ │ ├── validation +│ │ │ ├── images +| │ │ ├── v1.2 +| │ │ │ ├── instances +| │ │ │ ├── labels +| │   │   │ └── panoptic +│ │ │ ├── v2.0 +| │ │ │ ├── instances +| │ │ │ ├── labels +| │ │ │ ├── panoptic +| │   │   │ └── polygons ``` -### Cityscapes +## Cityscapes The data could be found [here](https://www.cityscapes-dataset.com/downloads/) after registration. By convention, `**labelTrainIds.png` are used for cityscapes training. -We provided a [scripts](https://github.com/open-mmlab/mmsegmentation/blob/1.x/tools/dataset_converters/cityscapes.py) based on [cityscapesscripts](https://github.com/mcordts/cityscapesScripts) -to generate `**labelTrainIds.png`. +We provided a [script](https://github.com/open-mmlab/mmsegmentation/blob/1.x/tools/dataset_converters/cityscapes.py) based on [cityscapesscripts](https://github.com/mcordts/cityscapesScripts)to generate `**labelTrainIds.png`. ```shell # --nproc means 8 process for conversion, which could be omitted as well. python tools/dataset_converters/cityscapes.py data/cityscapes --nproc 8 ``` -### Pascal VOC +## Pascal VOC Pascal VOC 2012 could be downloaded from [here](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar). Beside, most recent works on Pascal VOC dataset usually exploit extra augmentation data, which could be found [here](http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz). @@ -181,14 +203,14 @@ If you would like to use augmented VOC dataset, please run following command to python tools/dataset_converters/voc_aug.py data/VOCdevkit data/VOCdevkit/VOCaug --nproc 8 ``` -Please refer to [concat dataset](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/advanced_guides/datasets.md) for details about how to concatenate them and train them together. +Please refer to [concat dataset](../advanced_guides/add_datasets.md#concatenate-dataset) and [voc_aug config example](../../../configs/_base_/datasets/pascal_voc12_aug.py) for details about how to concatenate them and train them together. -### ADE20K +## ADE20K The training and validation set of ADE20K could be download from this [link](http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip). We may also download test set from [here](http://data.csail.mit.edu/places/ADEchallenge/release_test.zip). -### Pascal Context +## Pascal Context The training and validation set of Pascal Context could be download from [here](http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar). You may also download test set from [here](http://host.robots.ox.ac.uk:8080/eval/downloads/VOC2010test.tar) after registration. @@ -200,7 +222,7 @@ If you would like to use Pascal Context dataset, please install [Detail](https:/ python tools/dataset_converters/pascal_context.py data/VOCdevkit data/VOCdevkit/VOC2010/trainval_merged.json ``` -### COCO Stuff 10k +## COCO Stuff 10k The data could be downloaded [here](http://calvin.inf.ed.ac.uk/wp-content/uploads/data/cocostuffdataset/cocostuff-10k-v1.1.zip) by wget. @@ -220,7 +242,7 @@ python tools/dataset_converters/coco_stuff10k.py /path/to/coco_stuff10k --nproc By convention, mask labels in `/path/to/coco_stuff164k/annotations/*2014/*_labelTrainIds.png` are used for COCO Stuff 10k training and testing. -### COCO Stuff 164k +## COCO Stuff 164k For COCO Stuff 164k dataset, please run the following commands to download and convert the augmented dataset. @@ -244,7 +266,7 @@ By convention, mask labels in `/path/to/coco_stuff164k/annotations/*2017/*_label The details of this dataset could be found at [here](https://github.com/nightrome/cocostuff#downloads). -### CHASE DB1 +## CHASE DB1 The training and validation set of CHASE DB1 could be download from [here](https://staffnet.kingston.ac.uk/~ku15565/CHASE_DB1/assets/CHASEDB1.zip). @@ -256,7 +278,7 @@ python tools/dataset_converters/chase_db1.py /path/to/CHASEDB1.zip The script will make directory structure automatically. -### DRIVE +## DRIVE The training and validation set of DRIVE could be download from [here](https://drive.grand-challenge.org/). Before that, you should register an account. Currently '1st_manual' is not provided officially. @@ -268,7 +290,7 @@ python tools/dataset_converters/drive.py /path/to/training.zip /path/to/test.zip The script will make directory structure automatically. -### HRF +## HRF First, download [healthy.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/healthy.zip), [glaucoma.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/glaucoma.zip), [diabetic_retinopathy.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/diabetic_retinopathy.zip), [healthy_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/healthy_manualsegm.zip), [glaucoma_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/glaucoma_manualsegm.zip) and [diabetic_retinopathy_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/diabetic_retinopathy_manualsegm.zip). @@ -280,7 +302,7 @@ python tools/dataset_converters/hrf.py /path/to/healthy.zip /path/to/healthy_man The script will make directory structure automatically. -### STARE +## STARE First, download [stare-images.tar](http://cecas.clemson.edu/~ahoover/stare/probing/stare-images.tar), [labels-ah.tar](http://cecas.clemson.edu/~ahoover/stare/probing/labels-ah.tar) and [labels-vk.tar](http://cecas.clemson.edu/~ahoover/stare/probing/labels-vk.tar). @@ -292,15 +314,15 @@ python tools/dataset_converters/stare.py /path/to/stare-images.tar /path/to/labe The script will make directory structure automatically. -### Dark Zurich +## Dark Zurich Since we only support test models on this dataset, you may only download [the validation set](https://data.vision.ee.ethz.ch/csakarid/shared/GCMA_UIoU/Dark_Zurich_val_anon.zip). -### Nighttime Driving +## Nighttime Driving Since we only support test models on this dataset, you may only download [the test set](http://data.vision.ee.ethz.ch/daid/NighttimeDriving/NighttimeDrivingTest.zip). -### LoveDA +## LoveDA The data could be downloaded from Google Drive [here](https://drive.google.com/drive/folders/1ibYV0qwn4yuuh068Rnc-w4tPi0U0c-ti?usp=sharing). @@ -315,25 +337,24 @@ wget https://zenodo.org/record/5706578/files/Val.zip wget https://zenodo.org/record/5706578/files/Test.zip ``` -For LoveDA dataset, please run the following command to download and re-organize the dataset. +For LoveDA dataset, please run the following command to re-organize the dataset. ```shell python tools/dataset_converters/loveda.py /path/to/loveDA ``` -Using trained model to predict test set of LoveDA and submit it to server can be found [here](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/user_guides/3_inference.md). +Using trained model to predict test set of LoveDA and submit it to server can be found [here](https://codalab.lisn.upsaclay.fr/competitions/421). More details about LoveDA can be found [here](https://github.com/Junjue-Wang/LoveDA). -### ISPRS Potsdam +## ISPRS Potsdam -The [Potsdam](https://www2.isprs.org/commissions/comm2/wg4/benchmark/2d-sem-label-potsdam/) -dataset is for urban semantic segmentation used in the 2D Semantic Labeling Contest - Potsdam. +The [Potsdam](https://www2.isprs.org/commissions/comm2/wg4/benchmark/2d-sem-label-potsdam/) dataset is for urban semantic segmentation used in the 2D Semantic Labeling Contest - Potsdam. The dataset can be requested at the challenge [homepage](https://www2.isprs.org/commissions/comm2/wg4/benchmark/data-request-form/). The '2_Ortho_RGB.zip' and '5_Labels_all_noBoundary.zip' are required. -For Potsdam dataset, please run the following command to download and re-organize the dataset. +For Potsdam dataset, please run the following command to re-organize the dataset. ```shell python tools/dataset_converters/potsdam.py /path/to/potsdam @@ -341,29 +362,28 @@ python tools/dataset_converters/potsdam.py /path/to/potsdam In our default setting, it will generate 3456 images for training and 2016 images for validation. -### ISPRS Vaihingen +## ISPRS Vaihingen -The [Vaihingen](https://www2.isprs.org/commissions/comm2/wg4/benchmark/2d-sem-label-vaihingen/) -dataset is for urban semantic segmentation used in the 2D Semantic Labeling Contest - Vaihingen. +The [Vaihingen](https://www2.isprs.org/commissions/comm2/wg4/benchmark/2d-sem-label-vaihingen/) dataset is for urban semantic segmentation used in the 2D Semantic Labeling Contest - Vaihingen. The dataset can be requested at the challenge [homepage](https://www2.isprs.org/commissions/comm2/wg4/benchmark/data-request-form/). The 'ISPRS_semantic_labeling_Vaihingen.zip' and 'ISPRS_semantic_labeling_Vaihingen_ground_truth_eroded_COMPLETE.zip' are required. -For Vaihingen dataset, please run the following command to download and re-organize the dataset. +For Vaihingen dataset, please run the following command to re-organize the dataset. ```shell python tools/dataset_converters/vaihingen.py /path/to/vaihingen ``` -In our default setting (`clip_size` =512, `stride_size`=256), it will generate 344 images for training and 398 images for validation. +In our default setting (`clip_size`=512, `stride_size`=256), it will generate 344 images for training and 398 images for validation. -### iSAID +## iSAID The data images could be download from [DOTA-v1.0](https://captain-whu.github.io/DOTA/dataset.html) (train/val/test) The data annotations could be download from [iSAID](https://captain-whu.github.io/iSAID/dataset.html) (train/val) -The dataset is a Large-scale Dataset for Instance Segmentation (also have segmantic segmentation) in Aerial Images. +The dataset is a Large-scale Dataset for Instance Segmentation (also have semantic segmentation) in Aerial Images. You may need to follow the following structure for dataset preparation after downloading iSAID dataset. @@ -392,7 +412,7 @@ You may need to follow the following structure for dataset preparation after dow python tools/dataset_converters/isaid.py /path/to/iSAID ``` -In our default setting (`patch_width`=896, `patch_height`=896, `overlap_area`=384), it will generate 33978 images for training and 11644 images for validation. +In our default setting (`patch_width`=896, `patch_height`=896, `overlap_area`=384), it will generate 33978 images for training and 11644 images for validation. ## LIP(Look Into Person) dataset @@ -412,7 +432,7 @@ mv val_segmentations ../ cd .. ``` -The contents of LIP datasets include: +The contents of LIP datasets include: ```none ├── data @@ -433,10 +453,9 @@ The contents of LIP datasets include: ## Synapse dataset -This dataset could be download from [this page](https://www.synapse.org/#!Synapse:syn3193805/wiki/) +This dataset could be download from [this page](https://www.synapse.org/#!Synapse:syn3193805/wiki/). -To follow the data preparation setting of [TransUNet](https://arxiv.org/abs/2102.04306), which splits original training set (30 scans) -into new training (18 scans) and validation set (12 scans). Please run the following command to prepare the dataset. +To follow the data preparation setting of [TransUNet](https://arxiv.org/abs/2102.04306), which splits original training set (30 scans) into new training (18 scans) and validation set (12 scans). Please run the following command to prepare the dataset. ```shell unzip RawData.zip @@ -509,10 +528,9 @@ Then, use this command to convert synapse dataset. python tools/dataset_converters/synapse.py --dataset-path /path/to/synapse ``` -Noted that MMSegmentation default evaluation metric (such as mean dice value) is calculated on 2D slice image, -which is not comparable to results of 3D scan in some paper such as [TransUNet](https://arxiv.org/abs/2102.04306). +Noted that MMSegmentation default evaluation metric (such as mean dice value) is calculated on 2D slice image, which is not comparable to results of 3D scan in some paper such as [TransUNet](https://arxiv.org/abs/2102.04306). -### REFUGE +## REFUGE Register in [REFUGE Challenge](https://refuge.grand-challenge.org) and download [REFUGE dataset](https://refuge.grand-challenge.org/REFUGE2Download). @@ -551,3 +569,54 @@ The script will make directory structure below: ``` It includes 400 images for training, 400 images for validation and 400 images for testing which is the same as REFUGE 2018 dataset. + +## Mapillary Vistas Datasets + +- The dataset could be download [here](https://www.mapillary.com/dataset/vistas) after registration. + +- Mapillary Vistas Dataset use 8-bit with color-palette to store labels. No conversion operation is required. + +- Assumption you have put the dataset zip file in `mmsegmentation/data/mapillary` + +- Please run the following commands to unzip dataset. + + ```bash + cd data/mapillary + unzip An-ZjB1Zm61yAZG0ozTymz8I8NqI4x0MrYrh26dq7kPgfu8vf9ImrdaOAVOFYbJ2pNAgUnVGBmbue9lTgdBOb5BbKXIpFs0fpYWqACbrQDChAA2fdX0zS9PcHu7fY8c-FOvyBVxPNYNFQuM.zip + ``` + +- After unzip, you will get Mapillary Vistas Dataset like this structure. Semantic segmentation mask labels in `labels` folder. + + ```none + mmsegmentation + ├── mmseg + ├── tools + ├── configs + ├── data + │ ├── mapillary + │ │ ├── training + │ │ │ ├── images + │ │ │ ├── v1.2 + | │ │ │ ├── instances + | │ │ │ ├── labels + | │   │   │ └── panoptic + │ │ │ ├── v2.0 + | │ │ │ ├── instances + | │ │ │ ├── labels + | │ │ │ ├── panoptic + | │   │   │ └── polygons + │ │ ├── validation + │ │ │ ├── images + | │ │ ├── v1.2 + | │ │ │ ├── instances + | │ │ │ ├── labels + | │   │   │ └── panoptic + │ │ │ ├── v2.0 + | │ │ │ ├── instances + | │ │ │ ├── labels + | │ │ │ ├── panoptic + | │   │   │ └── polygons + ``` + +- You could set Datasets version with `MapillaryDataset_v1` and `MapillaryDataset_v2` in your configs. + View the Mapillary Vistas Datasets config file here [V1.2](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/_base_/datasets/mapillary_v1.py) and [V2.0](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/_base_/datasets/mapillary_v2.py) diff --git a/docs/en/user_guides/3_inference.md b/docs/en/user_guides/3_inference.md index c9f4e62f10..cacebd2f60 100644 --- a/docs/en/user_guides/3_inference.md +++ b/docs/en/user_guides/3_inference.md @@ -19,7 +19,7 @@ MMSegmentation provides several interfaces for users to easily use pre-trained m ## Inferencer -We provides the most **convenient** way to use the model in MMSegmentation `MMSegInferencer`. You can get segmentation mask for an image with only 3 lines of code. +We provide the most **convenient** way to use the model in MMSegmentation `MMSegInferencer`. You can get segmentation mask for an image with only 3 lines of code. ### Basic Usage @@ -36,7 +36,7 @@ The following example shows how to use `MMSegInferencer` to perform inference on The visualization result should look like:
-https://user-images.githubusercontent.com/76149310/221507927-ae01e3a7-016f-4425-b966-7b19cbbe494e.png +
Moreover, you can use `MMSegInferencer` to process a list of images: @@ -44,7 +44,7 @@ Moreover, you can use `MMSegInferencer` to process a list of images: ``` # Input a list of images >>> images = [image1, image2, ...] # image1 can be a file path or a np.ndarray ->>> inferencer(images, show=True, wait_time=0.5) # wait_time is delay time, and 0 means forever. +>>> inferencer(images, show=True, wait_time=0.5) # wait_time is delay time, and 0 means forever # Or input image directory >>> images = $IMAGESDIR @@ -56,13 +56,12 @@ Moreover, you can use `MMSegInferencer` to process a list of images: >>> inferencer(images, out_dir='outputs', img_out_dir='vis', pred_out_dir='pred') ``` -There is a optional parameter of inferencer, `return_datasamples`, whose default value is False, and -return value of inferencer is a `dict` type by default, including 2 keys 'visualization' and 'predictions'. +There is a optional parameter of inferencer, `return_datasamples`, whose default value is False, and return value of inferencer is a `dict` type by default, including 2 keys 'visualization' and 'predictions'. If `return_datasamples=True` inferencer will return [`SegDataSample`](../advanced_guides/structures.md), or list of it. ``` result = inferencer('demo/demo.png') -# result is a `dict` including 2 keys 'visualization' and 'predictions'. +# result is a `dict` including 2 keys 'visualization' and 'predictions' # 'visualization' includes color segmentation map print(result['visualization'].shape) # (512, 683, 3) @@ -92,18 +91,12 @@ print(type(results[0])) ### Initialization `MMSegInferencer` must be initialized from a `model`, which can be a model name or a `Config` even a path of config file. -The model names can be found in models' metafile, like one model name of maskformer is `maskformer_r50-d32_8xb2-160k_ade20k-512x512`, and if input model name and the weights of the model will be download automatically. Below are other input parameters: - -- weights (str, optional) - Path to the checkpoint. If it is not specified and model is a model name of metafile, the weights will be loaded - from metafile. Defaults to None. -- classes (list, optional) - Input classes for result rendering, as the prediction of segmentation - model is a segment map with label indices, `classes` is a list which includes - items responding to the label indices. If classes is not defined, visualizer will take `cityscapes` classes by default. Defaults to None. -- palette (list, optional) - Input palette for result rendering, which is a list of color palette - responding to the classes. If palette is not defined, visualizer will take `cityscapes` palette by default. Defaults to None. -- dataset_name (str, optional)[Dataset name or alias](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/utils/class_names.py#L302-L317) - visulizer will use the meta information of the dataset i.e. classes and palette, - but the `classes` and `palette` have higher priority. Defaults to None. +The model names can be found in models' metafile (configs/xxx/metafile.yaml), like one model name of maskformer is `maskformer_r50-d32_8xb2-160k_ade20k-512x512`, and if input model name and the weights of the model will be download automatically. Below are other input parameters: + +- weights (str, optional) - Path to the checkpoint. If it is not specified and model is a model name of metafile, the weights will be loaded from metafile. Defaults to None. +- classes (list, optional) - Input classes for result rendering, as the prediction of segmentation model is a segment map with label indices, `classes` is a list which includes items responding to the label indices. If classes is not defined, visualizer will take `cityscapes` classes by default. Defaults to None. +- palette (list, optional) - Input palette for result rendering, which is a list of colors responding to the classes. If the palette is not defined, the visualizer will take the palette of `cityscapes` by default. Defaults to None. +- dataset_name (str, optional) - [Dataset name or alias](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/utils/class_names.py#L302-L317), visualizer will use the meta information of the dataset i.e. classes and palette, but the `classes` and `palette` have higher priority. Defaults to None. - device (str, optional) - Device to run inference. If None, the available device will be automatically used. Defaults to None. - scope (str, optional) - The scope of the model. Defaults to 'mmseg'. @@ -113,8 +106,7 @@ The model names can be found in models' metafile, like one model name of maskfor - show (bool) - Whether to display the image in a popup window. Defaults to False. - wait_time (float) - The interval of show (s). Defaults to 0. -- img_out_dir (str) - Subdirectory of `out_dir`, used to save rendering color segmentation mask, so `out_dir` must be defined - if you would like to save predicted mask. Defaults to 'vis'. +- img_out_dir (str) - Subdirectory of `out_dir`, used to save rendering color segmentation mask, so `out_dir` must be defined if you would like to save predicted mask. Defaults to 'vis'. - opacity (int, float) - The transparency of segmentation mask. Defaults to 0.8. The examples of these parameters is in [Basic Usage](#basic-usage) @@ -245,7 +237,7 @@ vis_image = show_result_pyplot(model, img_path, result) # save the visualization result, the output image would be found at the path `work_dirs/result.png` vis_iamge = show_result_pyplot(model, img_path, result, out_file='work_dirs/result.png') -# Modify the time of displaying images, note that 0 is the special value that means "forever". +# Modify the time of displaying images, note that 0 is the special value that means "forever" vis_image = show_result_pyplot(model, img_path, result, wait_time=5) ``` diff --git a/docs/en/user_guides/4_train_test.md b/docs/en/user_guides/4_train_test.md index 00e074ad39..9b2d17dc46 100644 --- a/docs/en/user_guides/4_train_test.md +++ b/docs/en/user_guides/4_train_test.md @@ -70,7 +70,7 @@ This tool accepts several optional arguments, including: export CUDA_VISIBLE_DEVICES=-1 ``` -And then run the script [above](#testing-on-a-single-gpu). +then run the script [above](#testing-on-a-single-gpu). ## Training and testing on multiple GPUs and multiple machines @@ -218,3 +218,98 @@ You can check [the source code](../../../tools/slurm_test.sh) to review full arg CUDA_VISIBLE_DEVICES=0,1,2,3 GPUS=4 MASTER_PORT=29500 sh tools/slurm_train.sh ${PARTITION} ${JOB_NAME} config1.py ${WORK_DIR} CUDA_VISIBLE_DEVICES=4,5,6,7 GPUS=4 MASTER_PORT=29501 sh tools/slurm_train.sh ${PARTITION} ${JOB_NAME} config2.py ${WORK_DIR} ``` + +## Testing and saving segment files + +### Basic Usage + +When you want to save the results, you can use `--out` to specify the output directory. + +```shell +python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} --out ${OUTPUT_DIR} +``` + +Here is an example to save the predicted results from model `fcn_r50-d8_4xb4-80k_ade20k-512x512` on ADE20k validatation dataset. + +```shell +python tools/test.py configs/fcn/fcn_r50-d8_4xb4-80k_ade20k-512x512.py ckpt/fcn_r50-d8_512x512_80k_ade20k_20200614_144016-f8ac5082.pth --out work_dirs/format_results +``` + +You also can modify the config file to define `output_dir`. We also take +`fcn_r50-d8_4xb4-80k_ade20k-512x512` as example just add +`test_evaluator` in `configs/fcn/fcn_r50-d8_4xb4-80k_ade20k-512x512.py` + +```python +test_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'], output_dir='work_dirs/format_results') +``` + +then run command without `--out`: + +```shell +python tools/test.py configs/fcn/fcn_r50-d8_4xb4-80k_ade20k-512x512.py ckpt/fcn_r50-d8_512x512_80k_ade20k_20200614_144016-f8ac5082.pth +``` + +If you would like to only save the predicted results without evaluation as annotation is not released by the official dataset, you can set `format_only=True` and modify `test_dataloader`. +As there is no annotation in dataset, we remove `dict(type='LoadAnnotations')` from `test_dataloader` Here is the example configuration: + +```python +test_evaluator = dict( + type='IoUMetric', + iou_metrics=['mIoU'], + format_only=True, + output_dir='work_dirs/format_results') +test_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type = 'ADE20KDataset' + data_root='data/ade/release_test', + data_prefix=dict(img_path='testing'), + # we don't load annotation in test transform pipeline. + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2048, 512), keep_ratio=True), + dict(type='PackSegInputs') + ])) +``` + +then run test command: + +```shell +python tools/test.py configs/fcn/fcn_r50-d8_4xb4-80k_ade20k-512x512.py ckpt/fcn_r50-d8_512x512_80k_ade20k_20200614_144016-f8ac5082.pth +``` + +### Testing Cityscape dataset and save predicted segment files + +We recommend `CityscapesMetric` which is the wrapper of Cityscapes'sdk, when you want to +save the predicted results of Cityscape test dataset to submit them in [Cityscape test server](https://www.cityscapes-dataset.com/submit/). Here is the example configuration: + +```python +test_evaluator = dict( + type='CityscapesMetric', + format_only=True, + keep_results=True, + output_dir='work_dirs/format_results') +test_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='CityscapesDataset', + data_root='data/cityscapes/', + data_prefix=dict(img_path='leftImg8bit/test'), + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2048, 1024), keep_ratio=True), + dict(type='PackSegInputs') + ])) +``` + +then run test command, for example: + +```shell +python tools/test.py configs/fcn/fcn_r18-d8_4xb2-80k_cityscapes-512x1024.py ckpt/fcn_r18-d8_512x1024_80k_cityscapes_20201225_021327-6c50f8b4.pth +``` diff --git a/docs/en/user_guides/index.rst b/docs/en/user_guides/index.rst index 9e7d365925..1feb1271ae 100644 --- a/docs/en/user_guides/index.rst +++ b/docs/en/user_guides/index.rst @@ -18,3 +18,4 @@ Useful Tools visualization.md useful_tools.md deployment.md + visualization_feature_map.md diff --git a/docs/en/user_guides/visualization_feature_map.md b/docs/en/user_guides/visualization_feature_map.md new file mode 100644 index 0000000000..d61226f055 --- /dev/null +++ b/docs/en/user_guides/visualization_feature_map.md @@ -0,0 +1,201 @@ +# Wandb Feature Map Visualization + +MMSegmentation 1.x provides backend support for Weights & Biases to facilitate visualization and management of project code results. + +## Wandb Configuration + +Install Weights & Biases following [official instructions](https://docs.wandb.ai/quickstart) e.g. + +```shell +pip install wandb +wandb login +``` + +Add `WandbVisBackend` in `vis_backend` of `visualizer` in `default_runtime.py` config file: + +```python +vis_backends=[dict(type='LocalVisBackend'), + dict(type='TensorboardVisBackend'), + dict(type='WandbVisBackend')] +``` + +## Examining feature map visualization in Wandb + +`SegLocalVisualizer` is child class inherits from `Visualizer` in MMEngine and works for MMSegmentation visualization, for more details about `Visualizer` please refer to [visualization tutorial](https://github.com/open-mmlab/mmengine/blob/main/docs/en/advanced_tutorials/visualization.md) in MMEngine. + +Here is an example about `SegLocalVisualizer`, first you may download example data below by following commands: + +
+ +
+ +```shell +wget https://user-images.githubusercontent.com/24582831/189833109-eddad58f-f777-4fc0-b98a-6bd429143b06.png --output-document aachen_000000_000019_leftImg8bit.png +wget https://user-images.githubusercontent.com/24582831/189833143-15f60f8a-4d1e-4cbb-a6e7-5e2233869fac.png --output-document aachen_000000_000019_gtFine_labelTrainIds.png + +wget https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x1024_40k_cityscapes/ann_r50-d8_512x1024_40k_cityscapes_20200605_095211-049fc292.pth + +``` + +```python +# Copyright (c) OpenMMLab. All rights reserved. +from argparse import ArgumentParser +from typing import Type + +import mmcv +import torch +import torch.nn as nn + +from mmengine.model import revert_sync_batchnorm +from mmengine.structures import PixelData +from mmseg.apis import inference_model, init_model +from mmseg.structures import SegDataSample +from mmseg.utils import register_all_modules +from mmseg.visualization import SegLocalVisualizer + + +class Recorder: + """record the forward output feature map and save to data_buffer.""" + + def __init__(self) -> None: + self.data_buffer = list() + + def __enter__(self, ): + self._data_buffer = list() + + def record_data_hook(self, model: nn.Module, input: Type, output: Type): + self.data_buffer.append(output) + + def __exit__(self, *args, **kwargs): + pass + + +def visualize(args, model, recorder, result): + seg_visualizer = SegLocalVisualizer( + vis_backends=[dict(type='WandbVisBackend')], + save_dir='temp_dir', + alpha=0.5) + seg_visualizer.dataset_meta = dict( + classes=model.dataset_meta['classes'], + palette=model.dataset_meta['palette']) + + image = mmcv.imread(args.img, 'color') + + seg_visualizer.add_datasample( + name='predict', + image=image, + data_sample=result, + draw_gt=False, + draw_pred=True, + wait_time=0, + out_file=None, + show=False) + + # add feature map to wandb visualizer + for i in range(len(recorder.data_buffer)): + feature = recorder.data_buffer[i][0] # remove the batch + drawn_img = seg_visualizer.draw_featmap( + feature, image, channel_reduction='select_max') + seg_visualizer.add_image(f'feature_map{i}', drawn_img) + + if args.gt_mask: + sem_seg = mmcv.imread(args.gt_mask, 'unchanged') + sem_seg = torch.from_numpy(sem_seg) + gt_mask = dict(data=sem_seg) + gt_mask = PixelData(**gt_mask) + data_sample = SegDataSample() + data_sample.gt_sem_seg = gt_mask + + seg_visualizer.add_datasample( + name='gt_mask', + image=image, + data_sample=data_sample, + draw_gt=True, + draw_pred=False, + wait_time=0, + out_file=None, + show=False) + + seg_visualizer.add_image('image', image) + + +def main(): + parser = ArgumentParser( + description='Draw the Feature Map During Inference') + parser.add_argument('img', help='Image file') + parser.add_argument('config', help='Config file') + parser.add_argument('checkpoint', help='Checkpoint file') + parser.add_argument('--gt_mask', default=None, help='Path of gt mask file') + parser.add_argument('--out-file', default=None, help='Path to output file') + parser.add_argument( + '--device', default='cuda:0', help='Device used for inference') + parser.add_argument( + '--opacity', + type=float, + default=0.5, + help='Opacity of painted segmentation map. In (0, 1] range.') + parser.add_argument( + '--title', default='result', help='The image identifier.') + args = parser.parse_args() + + register_all_modules() + + # build the model from a config file and a checkpoint file + model = init_model(args.config, args.checkpoint, device=args.device) + if args.device == 'cpu': + model = revert_sync_batchnorm(model) + + # show all named module in the model and use it in source list below + for name, module in model.named_modules(): + print(name) + + source = [ + 'decode_head.fusion.stages.0.query_project.activate', + 'decode_head.context.stages.0.key_project.activate', + 'decode_head.context.bottleneck.activate' + ] + source = dict.fromkeys(source) + + count = 0 + recorder = Recorder() + # registry the forward hook + for name, module in model.named_modules(): + if name in source: + count += 1 + module.register_forward_hook(recorder.record_data_hook) + if count == len(source): + break + + with recorder: + # test a single image, and record feature map to data_buffer + result = inference_model(model, args.img) + + visualize(args, model, recorder, result) + + +if __name__ == '__main__': + main() + +``` + +Save the above code as feature_map_visual.py and execute the following code in terminal + +```shell +python feature_map_visual.py ${image} ${config} ${checkpoint} [optional args] +``` + +e.g + +```shell +python feature_map_visual.py \ +aachen_000000_000019_leftImg8bit.png \ +configs/ann/ann_r50-d8_4xb2-40k_cityscapes-512x1024.py \ +ann_r50-d8_512x1024_40k_cityscapes_20200605_095211-049fc292.pth \ +--gt_mask aachen_000000_000019_gtFine_labelTrainIds.png +``` + +The visualized image result and its corresponding reature map will appear in the wandb account. + +
+ +
diff --git a/docs/zh_cn/advanced_guides/add_datasets.md b/docs/zh_cn/advanced_guides/add_datasets.md index 4ea14934ed..22fbf3462f 100644 --- a/docs/zh_cn/advanced_guides/add_datasets.md +++ b/docs/zh_cn/advanced_guides/add_datasets.md @@ -1,4 +1,62 @@ -# 新增自定义数据集(待更新) +# 新增自定义数据集 + +## 新增自定义数据集 + +在这里,我们展示如何构建一个新的数据集。 + +1. 创建一个新文件 `mmseg/datasets/example.py` + + ```python + from mmseg.registry import DATASETS + from .basesegdataset import BaseSegDataset + + + @DATASETS.register_module() + class ExampleDataset(BaseSegDataset): + + METAINFO = dict( + classes=('xxx', 'xxx', ...), + palette=[[x, x, x], [x, x, x], ...]) + + def __init__(self, aeg1, arg2): + pass + ``` + +2. 在 `mmseg/datasets/__init__.py` 中导入模块 + + ```python + from .example import ExampleDataset + ``` + +3. 通过创建一个新的数据集配置文件 `configs/_base_/datasets/example_dataset.py` 来使用它 + + ```python + dataset_type = 'ExampleDataset' + data_root = 'data/example/' + ... + ``` + +4. 在 `mmseg/utils/class_names.py` 中补充数据集元信息 + + ```python + def example_classes(): + return [ + 'xxx', 'xxx', + ... + ] + + def example_palette(): + return [ + [x, x, x], [x, x, x], + ... + ] + dataset_aliases ={ + 'example': ['example', ...], + ... + } + ``` + +**注意:** 如果新数据集不满足 mmseg 的要求,则需要在 `tools/dataset_converters/` 中准备一个数据集预处理脚本 ## 通过重新组织数据来定制数据集 @@ -26,30 +84,17 @@ 一个训练对将由 img_dir/ann_dir 里同样首缀的文件组成。 -如果给定 `split` 参数,只有部分在 img_dir/ann_dir 里的文件会被加载。 -我们可以对被包括在 split 文本里的文件指定前缀。 +有些数据集不会发布测试集或测试集的标注,如果没有测试集的标注,我们就无法在本地进行评估模型,因此我们在配置文件中将验证集设置为默认测试集。 -除此以外,一个 split 文本如下所示: - -```none -xxx -zzz -``` +关于如何构建自己的数据集或实现新的数据集类,请参阅[数据集指南](./datasets.md)以获取更多详细信息。 -只有 - -`data/my_dataset/img_dir/train/xxx{img_suffix}`, -`data/my_dataset/img_dir/train/zzz{img_suffix}`, -`data/my_dataset/ann_dir/train/xxx{seg_map_suffix}`, -`data/my_dataset/ann_dir/train/zzz{seg_map_suffix}` 将被加载。 - -注意:标注是跟图像同样的形状 (H, W),其中的像素值的范围是 `[0, num_classes - 1]`。 +**注意:** 标注是跟图像同样的形状 (H, W),其中的像素值的范围是 `[0, num_classes - 1]`。 您也可以使用 [pillow](https://pillow.readthedocs.io/en/stable/handbook/concepts.html#palette) 的 `'P'` 模式去创建包含颜色的标注。 ## 通过混合数据去定制数据集 MMSegmentation 同样支持混合数据集去训练。 -当前它支持拼接 (concat), 重复 (repeat) 和多图混合 (multi-image mix)数据集。 +当前它支持拼接 (concat), 重复 (repeat) 和多图混合 (multi-image mix) 数据集。 ### 重复数据集 @@ -58,79 +103,29 @@ MMSegmentation 同样支持混合数据集去训练。 ```python dataset_A_train = dict( - type='RepeatDataset', - times=N, - dataset=dict( # 这是 Dataset_A 数据集的原始配置 - type='Dataset_A', - ... - pipeline=train_pipeline - ) + type='RepeatDataset', + times=N, + dataset=dict( # 这是 Dataset_A 数据集的原始配置 + type='Dataset_A', + ... + pipeline=train_pipeline ) +) ``` ### 拼接数据集 -有2种方式去拼接数据集。 - -1. 如果您想拼接的数据集是同样的类型,但有不同的标注文件, - 您可以按如下操作去拼接数据集的配置文件: - - 1. 您也许可以拼接两个标注文件夹 `ann_dir` - - ```python - dataset_A_train = dict( - type='Dataset_A', - img_dir = 'img_dir', - ann_dir = ['anno_dir_1', 'anno_dir_2'], - pipeline=train_pipeline - ) - ``` - - 2. 您也可以去拼接两个 `split` 文件列表 - - ```python - dataset_A_train = dict( - type='Dataset_A', - img_dir = 'img_dir', - ann_dir = 'anno_dir', - split = ['split_1.txt', 'split_2.txt'], - pipeline=train_pipeline - ) - ``` +如果要拼接不同的数据集,可以按如下方式连接数据集配置。 - 3. 您也可以同时拼接 `ann_dir` 文件夹和 `split` 文件列表 - - ```python - dataset_A_train = dict( - type='Dataset_A', - img_dir = 'img_dir', - ann_dir = ['anno_dir_1', 'anno_dir_2'], - split = ['split_1.txt', 'split_2.txt'], - pipeline=train_pipeline - ) - ``` - - 在这样的情况下, `ann_dir_1` 和 `ann_dir_2` 分别对应于 `split_1.txt` 和 `split_2.txt` - -2. 如果您想拼接不同的数据集,您可以如下去拼接数据集的配置文件: - - ```python - dataset_A_train = dict() - dataset_B_train = dict() - - data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train = [ - dataset_A_train, - dataset_B_train - ], - val = dataset_A_val, - test = dataset_A_test - ) - ``` +```python +dataset_A_train = dict() +dataset_B_train = dict() +concatenate_dataset = dict( + type='ConcatDataset', + datasets=[dataset_A_train, dataset_B_train]) +``` -一个更复杂的例子如下:分别重复 `Dataset_A` 和 `Dataset_B` N 次和 M 次,然后再去拼接重复后的数据集 +下面是一个更复杂的示例,它分别重复 `Dataset_A` 和 `Dataset_B` N 次和 M 次,然后连接重复的数据集。 ```python dataset_A_train = dict( @@ -159,41 +154,36 @@ dataset_B_train = dict( pipeline=train_pipeline ) ) -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train = [ - dataset_A_train, - dataset_B_train - ], - val = dataset_A_val, - test = dataset_A_test -) +train_dataloader = dict( + dataset=dict( + type='ConcatDataset', + datasets=[dataset_A_train, dataset_B_train])) + +val_dataloader = dict(dataset=dataset_A_val) +test_dataloader = dict(dataset=dataset_A_test) ``` +您可以参考 mmengine 的基础数据集[教程](https://mmengine.readthedocs.io/zh_CN/latest/advanced_tutorials/basedataset.html)以了解更多详细信息 + ### 多图混合集 -我们使用 `MultiImageMixDataset` 作为包装(wrapper)去混合多个数据集的图片。 -`MultiImageMixDataset`可以被类似mosaic和mixup的多图混合数据増广使用。 +我们使用 `MultiImageMixDataset` 作为包装(wrapper)去混合多个数据集的图片。 +`MultiImageMixDataset`可以被类似 mosaic 和 mixup 的多图混合数据増广使用。 -`MultiImageMixDataset`与`Mosaic`数据増广一起使用的例子: +`MultiImageMixDataset` 与 `Mosaic` 数据増广一起使用的例子: ```python train_pipeline = [ dict(type='RandomMosaic', prob=1), dict(type='Resize', img_scale=(1024, 512), keep_ratio=True), dict(type='RandomFlip', prob=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), + dict(type='PackSegInputs') ] train_dataset = dict( type='MultiImageMixDataset', dataset=dict( - classes=classes, - palette=palette, type=dataset_type, reduce_zero_label=False, img_dir=data_root + "images/train", diff --git a/docs/zh_cn/advanced_guides/add_metrics.md b/docs/zh_cn/advanced_guides/add_metrics.md index 3a371e357e..0637b44728 100644 --- a/docs/zh_cn/advanced_guides/add_metrics.md +++ b/docs/zh_cn/advanced_guides/add_metrics.md @@ -1 +1,81 @@ -# 新增评测指标 (待更新) +# 新增评测指标 + +## 使用 MMSegmentation 的源代码进行开发 + +在这里,我们用 `CustomMetric` 作为例子来展示如何开发一个新的评测指标。 + +1. 创建一个新文件 `mmseg/evaluation/metrics/custom_metric.py`。 + + ```python + from typing import List, Sequence + + from mmengine.evaluator import BaseMetric + + from mmseg.registry import METRICS + + + @METRICS.register_module() + class CustomMetric(BaseMetric): + + def __init__(self, arg1, arg2): + """ + The metric first processes each batch of data_samples and predictions, + and appends the processed results to the results list. Then it + collects all results together from all ranks if distributed training + is used. Finally, it computes the metrics of the entire dataset. + """ + + def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None: + pass + + def compute_metrics(self, results: list) -> dict: + pass + + def evaluate(self, size: int) -> dict: + pass + ``` + + 在上面的示例中,`CustomMetric` 是 `BaseMetric` 的子类。它有三个方法:`process`,`compute_metrics` 和 `evaluate`。 + + - `process()` 处理一批数据样本和预测。处理后的结果需要显示地传给 `self.results` ,将在处理所有数据样本后用于计算指标。更多细节请参考 [MMEngine 文档](https://github.com/open-mmlab/mmengine/blob/main/docs/zh_cn/design/evaluation.md) + + - `compute_metrics()` 用于从处理后的结果中计算指标。 + + - `evaluate()` 是一个接口,用于计算指标并返回结果。它将由 `ValLoop` 或 `TestLoop` 在 `Runner` 中调用。在大多数情况下,您不需要重写此方法,但如果您想做一些额外的工作,可以重写它。 + + **注意:** 您可以在[这里](https://github.com/open-mmlab/mmengine/blob/main/mmengine/runner/loops.py#L366) 找到 `Runner` 调用 `evaluate()` 方法的过程。`Runner` 是训练和测试过程的执行器,您可以在[训练引擎文档](./engine.md)中找到有关它的详细信息。 + +2. 在 `mmseg/evaluation/metrics/__init__.py` 中导入新的指标。 + + ```python + from .custom_metric import CustomMetric + __all__ = ['CustomMetric', ...] + ``` + +3. 在配置文件中设置新的评测指标 + + ```python + val_evaluator = dict(type='CustomMetric', arg1=xxx, arg2=xxx) + test_evaluator = dict(type='CustomMetric', arg1=xxx, arg2=xxx) + ``` + +## 使用发布版本的 MMSegmentation 进行开发 + +上面的示例展示了如何使用 MMSegmentation 的源代码开发新指标。如果您想使用 MMSegmentation 的发布版本开发新指标,可以按照以下步骤操作。 + +1. 创建一个新文件 `/Path/to/metrics/custom_metric.py`,实现 `process`,`compute_metrics` 和 `evaluate` 方法,`evaluate` 方法是可选的。 + +2. 在代码或配置文件中导入新的指标。 + + ```python + from path.to.metrics import CustomMetric + ``` + + 或者 + + ```python + custom_imports = dict(imports=['/Path/to/metrics'], allow_failed_imports=False) + + val_evaluator = dict(type='CustomMetric', arg1=xxx, arg2=xxx) + test_evaluator = dict(type='CustomMetric', arg1=xxx, arg2=xxx) + ``` diff --git a/docs/zh_cn/advanced_guides/add_models.md b/docs/zh_cn/advanced_guides/add_models.md index 3f86a0c7c6..e05c07c8ba 100644 --- a/docs/zh_cn/advanced_guides/add_models.md +++ b/docs/zh_cn/advanced_guides/add_models.md @@ -1,3 +1,260 @@ -# 新增模块(待更新) +# 新增模块 -中文版文档支持中,请先阅读[英文版本](../../en/advanced_guides/add_models.md) +## 开发新组件 + +我们可以自定义 [模型文档](./models.md) 中介绍的所有组件,例如**主干网络(backbone)**、**头(head)**、**损失函数(loss function)**和**数据预处理器(data preprocessor)**。 + +### 添加新的主干网络(backbone) + +在这里,我们以 MobileNet 为例展示如何开发新的主干网络。 + +1. 创建一个新文件 `mmseg/models/backbones/mobilenet.py`。 + + ```python + import torch.nn as nn + + from mmseg.registry import MODELS + + + @MODELS.register_module() + class MobileNet(nn.Module): + + def __init__(self, arg1, arg2): + pass + + def forward(self, x): # should return a tuple + pass + + def init_weights(self, pretrained=None): + pass + ``` + +2. 在 `mmseg/models/backbones/__init__.py` 中引入模块。 + + ```python + from .mobilenet import MobileNet + ``` + +3. 在配置文件中使用它。 + + ```python + model = dict( + ... + backbone=dict( + type='MobileNet', + arg1=xxx, + arg2=xxx), + ... + ``` + +### 添加新的头(head) + +在 MMSegmentation 中,我们提供 [BaseDecodeHead](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/models/decode_heads/decode_head.py#L17) 用于开发所有分割头。 +所有新实现的解码头都应该从中派生出来。 +接下来我们以 [PSPNet](https://arxiv.org/abs/1612.01105) 为例说明如何开发新的头。 + +首先,在 `mmseg/models/decode_heads/psp_head.py` 中添加一个新的解码头。 +PSPNet 实现了用于分割解码的解码头。 +为了实现解码头,在新模块中我们需要执行以下三个函数。 + +```python +from mmseg.registry import MODELS + +@MODELS.register_module() +class PSPHead(BaseDecodeHead): + + def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs): + super(PSPHead, self).__init__(**kwargs) + + def init_weights(self): + pass + + def forward(self, inputs): + pass +``` + +接下来,用户需要在 `mmseg/models/decode_heads/__init__.py` 中添加模块,这样相应的注册器就可以找到并加载它们。 + +PSPNet 的配置文件如下 + +```python +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='pretrain_model/resnet50_v1c_trick-2cccc1ad.pth', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='PSPHead', + in_channels=2048, + in_index=3, + channels=512, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))) + +``` + +### 添加新的损失函数(loss) + +假设您想为分割解码添加一个叫做 `MyLoss` 的新的损失函数。 +要添加新的损失函数,用户需要在 `mmseg/models/loss/my_loss.py` 中实现它。 +修饰器 `weighted_loss` 可以对损失的每个元素进行加权。 + +```python +import torch +import torch.nn as nn + +from mmseg.registry import MODELS +from .utils import weighted_loss + +@weighted_loss +def my_loss(pred, target): + assert pred.size() == target.size() and target.numel() > 0 + loss = torch.abs(pred - target) + return loss + +@MODELS.register_module() +class MyLoss(nn.Module): + + def __init__(self, reduction='mean', loss_weight=1.0): + super(MyLoss, self).__init__() + self.reduction = reduction + self.loss_weight = loss_weight + + def forward(self, + pred, + target, + weight=None, + avg_factor=None, + reduction_override=None): + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = ( + reduction_override if reduction_override else self.reduction) + loss = self.loss_weight * my_loss( + pred, target, weight, reduction=reduction, avg_factor=avg_factor) + return loss +``` + +然后,用户需要将其添加到 `mmseg/models/loss/__init__.py` 中。 + +```python +from .my_loss import MyLoss, my_loss + +``` + +要使用它,请修改 `loss_xx` 字段。 +然后需要修改头中的 `loss_decode` 字段。 +`loss_weight` 可用于平衡多重损失。 + +```python +loss_decode=dict(type='MyLoss', loss_weight=1.0)) +``` + +### 添加新的数据预处理器(data preprocessor) + +在 MMSegmentation 1.x 版本中,我们使用 [SegDataPreProcessor](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/models/data_preprocessor.py#L13) 将数据复制到目标设备,并将数据预处理为默认的模型输入格式。这里我们将展示如何开发一个新的数据预处理器。 + +1. 创建一个新文件 `mmseg/models/my_datapreprocessor.py`。 + + ```python + from mmengine.model import BaseDataPreprocessor + + from mmseg.registry import MODELS + + @MODELS.register_module() + class MyDataPreProcessor(BaseDataPreprocessor): + def __init__(self, **kwargs): + super().__init__(**kwargs) + + def forward(self, data: dict, training: bool=False) -> Dict[str, Any]: + # TODO Define the logic for data pre-processing in the forward method + pass + ``` + +2. 在 `mmseg/models/__init__.py` 中导入数据预处理器 + + ```python + from .my_datapreprocessor import MyDataPreProcessor + ``` + +3. 在配置文件中使用它。 + + ```python + model = dict( + data_preprocessor=dict(type='MyDataPreProcessor) + ... + ) + ``` + +## 开发新的分割器(segmentor) + +分割器是一种户可以通过添加自定义组件和定义算法执行逻辑来自定义其算法的算法架构。请参考[模型文档](./models.md)了解更多详情。 + +由于 MMSegmentation 中的 [BaseSegmenter](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/models/segmentors/base.py#L15) 统一了前向过程的三种模式,为了开发新的分割器,用户需要重写与 `loss`、`predict` 和 `tensor` 相对应的 `loss`、`predict` 和 `_forward` 方法。 + +这里我们将展示如何开发一个新的分割器。 + +1. 创建一个新文件 `mmseg/models/segmentors/my_segmentor.py`。 + + ```python + from typing import Dict, Optional, Union + + import torch + + from mmseg.registry import MODELS + from mmseg.models import BaseSegmentor + + @MODELS.register_module() + class MySegmentor(BaseSegmentor): + def __init__(self, **kwargs): + super().__init__(**kwargs) + # TODO users should build components of the network here + + def loss(self, inputs: Tensor, data_samples: SampleList) -> dict: + """Calculate losses from a batch of inputs and data samples.""" + pass + + def predict(self, inputs: Tensor, data_samples: OptSampleList=None) -> SampleList: + """Predict results from a batch of inputs and data samples with post- + processing.""" + pass + + def _forward(self, + inputs: Tensor, + data_samples: OptSampleList = None) -> Tuple[List[Tensor]]: + """Network forward process. + + Usually includes backbone, neck and head forward without any post- + processing. + """ + pass + ``` + +2. 在 `mmseg/models/segmentors/__init__.py` 中导入分割器。 + + ```python + from .my_segmentor import MySegmentor + ``` + +3. 在配置文件中使用它。 + + ```python + model = dict( + type='MySegmentor' + ... + ) + ``` diff --git a/docs/zh_cn/advanced_guides/add_transforms.md b/docs/zh_cn/advanced_guides/add_transforms.md index 58a2485e04..d7206680d3 100644 --- a/docs/zh_cn/advanced_guides/add_transforms.md +++ b/docs/zh_cn/advanced_guides/add_transforms.md @@ -1,3 +1,51 @@ -# 新增数据增强(待更新) +# 新增数据增强 -中文版文档支持中,请先阅读[英文版本](../../en/advanced_guides/add_transform.md) +## 自定义数据增强 + +自定义数据增强必须继承 `BaseTransform` 并实现 `transform` 函数。这里我们使用一个简单的翻转变换作为示例: + +```python +import random +import mmcv +from mmcv.transforms import BaseTransform, TRANSFORMS + +@TRANSFORMS.register_module() +class MyFlip(BaseTransform): + def __init__(self, direction: str): + super().__init__() + self.direction = direction + + def transform(self, results: dict) -> dict: + img = results['img'] + results['img'] = mmcv.imflip(img, direction=self.direction) + return results +``` + +此外,新的类需要被导入。 + +```python +from .my_pipeline import MyFlip +``` + +这样,我们就可以实例化一个 `MyFlip` 对象并使用它来处理数据字典。 + +```python +import numpy as np + +transform = MyFlip(direction='horizontal') +data_dict = {'img': np.random.rand(224, 224, 3)} +data_dict = transform(data_dict) +processed_img = data_dict['img'] +``` + +或者,我们可以在配置文件中的数据流程中使用 `MyFlip` 变换。 + +```python +pipeline = [ + ... + dict(type='MyFlip', direction='horizontal'), + ... +] +``` + +需要注意,如果要在配置文件中使用 `MyFlip`,必须确保在运行时导入了包含 `MyFlip` 的文件。 diff --git a/docs/zh_cn/advanced_guides/data_flow.md b/docs/zh_cn/advanced_guides/data_flow.md index 0716d36d1b..20dbe07e75 100644 --- a/docs/zh_cn/advanced_guides/data_flow.md +++ b/docs/zh_cn/advanced_guides/data_flow.md @@ -16,7 +16,7 @@ val_cfg = dict(type='ValLoop') test_cfg = dict(type='TestLoop') ``` -在上图中,红色线表示 [train_step](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/advanced_guides/models.md#train_step) ***([中文链接待更新](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/zh_cn/advanced_guides/models.md#train_step))*** ,在每次训练迭代中,数据加载器(dataloader)从存储中加载图像并传输到数据预处理器(data preprocessor),数据预处理器会将图像放到特定的设备上,并将数据堆叠到批处理中,之后模型接受批处理数据作为输入,最后将模型的输出发送给优化器(optimizer)。蓝色线表示 [val_step](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/advanced_guides/models.md#val_step) 和 [test_step](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/advanced_guides/models.md#test_step) ***([中文链接待更新](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/zh_cn/advanced_guides/models.md#test_step))*** 。这两个过程的数据流除了模型输出与 `train_step` 不同外,其余均和 `train_step` 类似。由于在评估时模型参数会被冻结,因此模型的输出将被传递给 [Evaluator](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/advanced_guides/evaluation.md#ioumetric) ***([中文链接待更新](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/zh_cn/advanced_guides/evaluation.md#ioumetric))*** +在上图中,红色线表示 [train_step](./models.md#train_step),在每次训练迭代中,数据加载器(dataloader)从存储中加载图像并传输到数据预处理器(data preprocessor),数据预处理器会将图像放到特定的设备上,并将数据堆叠到批处理中,之后模型接受批处理数据作为输入,最后将模型的输出发送给优化器(optimizer)。蓝色线表示 [val_step](./models.md#val_step) 和 [test_step](./models.md#test_step)。这两个过程的数据流除了模型输出与 `train_step` 不同外,其余均和 `train_step` 类似。由于在评估时模型参数会被冻结,因此模型的输出将被传递给 [Evaluator](./evaluation.md#ioumetric)。 来计算指标。 ## MMSegmentation 中的数据流约定 @@ -28,7 +28,7 @@ test_cfg = dict(type='TestLoop') 数据加载器(DataLoader)是 MMEngine 的训练和测试流程中的一个重要组件。 从概念上讲,它源于 [PyTorch](https://pytorch.org/) 并保持一致。DataLoader 从文件系统加载数据,原始数据通过数据准备流程后被发送给数据预处理器。 -MMSegmentation 在 [PackSegInputs](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/datasets/transforms/formatting.py#L12) 中定义了默认数据格式, 它是 `train_pipeline` 和 `test_pipeline` 的最后一个组件。有关数据转换 `pipeline` 的更多信息,请参阅[数据转换文档](https://mmsegmentation.readthedocs.io/en/dev-1.x/advanced_guides/transforms.html)。 ***([中文链接待更新](https://mmsegmentation.readthedocs.io/zh_CN/dev-1.x/advanced_guides/transforms.html))*** +MMSegmentation 在 [PackSegInputs](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/datasets/transforms/formatting.py#L12) 中定义了默认数据格式, 它是 `train_pipeline` 和 `test_pipeline` 的最后一个组件。有关数据转换 `pipeline` 的更多信息,请参阅[数据转换文档](./transforms.md)。 在没有任何修改的情况下,PackSegInputs 的返回值通常是一个包含 `inputs` 和 `data_samples` 的 `dict`。以下伪代码展示了 mmseg 中数据加载器输出的数据类型,它是从数据集中获取的一批数据样本,数据加载器将它们打包成一个字典列表。`inputs` 是输入进模型的张量列表,`data_samples` 包含了输入图像的 meta information 和相应的 ground truth。 @@ -39,11 +39,11 @@ dict( ) ``` -**注意:** [SegDataSample](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/structures/seg_data_sample.py) 是 MMSegmentation 的数据结构接口,用于连接不同组件。`SegDataSample` 实现了抽象数据元素 `mmengine.structures.BaseDataElement`,更多信息请在 [MMEngine](https://github.com/open-mmlab/mmengine) 中参阅 [SegDataSample 文档](https://mmsegmentation.readthedocs.io/zh_CN/1.x/advanced_guides/structures.html)和[数据元素文档](https://mmengine.readthedocs.io/zh_CN/latest/advanced_tutorials/data_element.html)。 +**注意:** [SegDataSample](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/structures/seg_data_sample.py) 是 MMSegmentation 的数据结构接口,用于连接不同组件。`SegDataSample` 实现了抽象数据元素 `mmengine.structures.BaseDataElement`,更多信息请在 [MMEngine](https://github.com/open-mmlab/mmengine) 中参阅 [SegDataSample 文档](./structures.md)和[数据元素文档](https://mmengine.readthedocs.io/zh_CN/latest/advanced_tutorials/data_element.html)。 ### 数据预处理器到模型 -虽然在[上面的图](##数据流概述)中分开绘制了数据预处理器和模型,但数据预处理器是模型的一部分,因此可以在[模型教程](https://mmsegmentation.readthedocs.io/en/dev-1.x/advanced_guides/models.html)中找到数据预处理器章节。 ***([中文链接待更新](https://mmsegmentation.readthedocs.io/zh_CN/dev-1.x/advanced_guides/models.html))*** +虽然在[上面的图](##数据流概述)中分开绘制了数据预处理器和模型,但数据预处理器是模型的一部分,因此可以在[模型教程](./models.md)中找到数据预处理器章节。 数据预处理器的返回值是一个包含 `inputs` 和 `data_samples` 的字典,其中 `inputs` 是批处理图像的 4D 张量,`data_samples` 中添加了一些用于数据预处理的额外元信息。当传递给网络时,字典将被解包为两个值。 以下伪代码展示了数据预处理器的返回值和模型的输入值。 @@ -61,22 +61,22 @@ class Network(BaseSegmentor): pass ``` -**注意:** 模型的前向传播有 3 种模式,由输入参数 mode 控制,更多信息请参阅[模型教程](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/advanced_guides/models.md)。 ***([中文链接待更新](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/zh_cn/advanced_guides/models.md))*** +**注意:** 模型的前向传播有 3 种模式,由输入参数 mode 控制,更多信息请参阅[模型教程](./models.md)。 ### 模型输出 -如[模型教程](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/advanced_guides/models.md#forward) ***([中文链接待更新](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/zh_cn/advanced_guides/models.md#forward))*** 所提到的 3 种前向传播具有 3 种输出。 +如[模型教程](./models.md#forward) ***([中文链接待更新](./models.md#forward))*** 所提到的 3 种前向传播具有 3 种输出。 `train_step` 和 `test_step`(或 `val_step`)分别对应于 `'loss'` 和 `'predict'`。 -在 `test_step` 或 `val_step` 中,推理结果会被传递给 `Evaluator` 。您可以参阅[评估文档](https://mmsegmentation.readthedocs.io/en/dev-1.x/advanced_guides/evaluation.html) ***([中文链接待更新](https://mmsegmentation.readthedocs.io/zh_CN/dev-1.x/advanced_guides/evaluation.html))*** 来获取更多关于 `Evaluator` 的信息。 +在 `test_step` 或 `val_step` 中,推理结果会被传递给 `Evaluator` 。您可以参阅[评估文档](./evaluation.md)来获取更多关于 `Evaluator` 的信息。 -在推理后,MMSegmentation 中的 [BaseSegmentor](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/models/segmentors/base.py#L15) 会对推理结果进行简单的后处理以打包推理结果。神经网络生成的分割 logits,经过 `argmax` 操作后的分割 mask 和 ground truth(如果存在)将被打包到类似 `SegDataSample` 的实例。 [postprocess_result](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/models/segmentors/base.py#L132) 的返回值是一个 **`SegDataSample`的`List`**。下图显示了这些 `SegDataSample` 实例的关键属性。 +在推理后,MMSegmentation 中的 [BaseSegmentor](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/models/segmentors/base.py#L15) 会对推理结果进行简单的后处理以打包推理结果。神经网络生成的分割 logits,经过 `argmax` 操作后的分割 mask 和 ground truth(如果存在)将被打包到类似 `SegDataSample` 的实例。 [postprocess_result](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/models/segmentors/base.py#L132) 的返回值是一个 **`SegDataSample`的`List`**。下图显示了这些 `SegDataSample` 实例的关键属性。 ![SegDataSample](https://user-images.githubusercontent.com/15952744/209912225-ab46a8d9-904a-43cb-8bf1-8bec4938ed29.png) -与数据预处理器一致,损失函数也是模型的一部分,它是[解码头](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/models/decode_heads/decode_head.py#L142)的属性之一。 +与数据预处理器一致,损失函数也是模型的一部分,它是[解码头](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/models/decode_heads/decode_head.py#L142)的属性之一。 -在 MMSegmentation 中,`decode_head` 的 [loss_by_feat](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/models/decode_heads/decode_head.py#L291) 方法是用于计算损失的统一接口。 +在 MMSegmentation 中,`decode_head` 的 [loss_by_feat](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/models/decode_heads/decode_head.py#L291) 方法是用于计算损失的统一接口。 参数: @@ -87,4 +87,4 @@ class Network(BaseSegmentor): - dict\[str, Tensor\]:一个损失组件的字典 -**注意:** `train_step` 将损失传递进 OptimWrapper 以更新模型中的权重,更多信息请参阅 [train_step](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/advanced_guides/models.md#train_step)。 ***([中文链接待更新](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/zh_cn/advanced_guides/models.md#train_step))*** +**注意:** `train_step` 将损失传递进 OptimWrapper 以更新模型中的权重,更多信息请参阅 [train_step](./models.md#train_step)。 diff --git a/docs/zh_cn/advanced_guides/datasets.md b/docs/zh_cn/advanced_guides/datasets.md index 546e97f70d..80852a04e5 100644 --- a/docs/zh_cn/advanced_guides/datasets.md +++ b/docs/zh_cn/advanced_guides/datasets.md @@ -1,10 +1,10 @@ # 数据集 -在 MMSegmentation 算法库中, 所有 Dataset 类的功能有两个: 加载[预处理](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/user_guides/2_dataset_prepare.md) 之后的数据集的信息, 和将数据送入[数据集变换流水线](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/datasets/basesegdataset.py#L141) 中, 进行[数据变换操作](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/zh_cn/advanced_guides/transforms.md). 加载的数据集信息包括两类: 元信息 (meta information), 数据集本身的信息, 例如数据集总共的类别, 和它们对应调色盘信息: 数据信息 (data information) 是指每组数据中图片和对应标签的路径. 下文中介绍了 MMSegmentation 1.x 中数据集的常用接口, 和 mmseg 数据集基类中数据信息加载与修改数据集类别的逻辑, 以及数据集与数据变换流水线 (pipeline) 的关系. +在 MMSegmentation 算法库中, 所有 Dataset 类的功能有两个: 加载[预处理](../user_guides/2_dataset_prepare.md) 之后的数据集的信息, 和将数据送入[数据集变换流水线](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/datasets/basesegdataset.py#L141) 中, 进行[数据变换操作](./transforms.md). 加载的数据集信息包括两类: 元信息 (meta information), 数据集本身的信息, 例如数据集总共的类别, 和它们对应调色盘信息: 数据信息 (data information) 是指每组数据中图片和对应标签的路径. 下文中介绍了 MMSegmentation 1.x 中数据集的常用接口, 和 mmseg 数据集基类中数据信息加载与修改数据集类别的逻辑, 以及数据集与数据变换流水线 (pipeline) 的关系. ## 常用接口 -以 Cityscapes 为例, 介绍数据集常用接口. 如需运行以下示例, 请在当前工作目录下的 `data` 目录下载并[预处理](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/user_guides/2_dataset_prepare.md#cityscapes) Cityscapes 数据集. +以 Cityscapes 为例, 介绍数据集常用接口. 如需运行以下示例, 请在当前工作目录下的 `data` 目录下载并[预处理](../user_guides/2_dataset_prepare.md#cityscapes) Cityscapes 数据集. 实例化 Cityscapes 训练数据集: @@ -96,7 +96,7 @@ print(dataset.metainfo) 'reduce_zero_label': False} ``` -数据集 `__getitem__` 方法的返回值, 是经过数据增强的样本数据的输出, 同样也是一个字典, 包括两个字段, `'inputs'` 字段是当前样本经过数据增强操作的图像, 类型为 torch.Tensor, `'data_samples'` 字段存放的数据类型是 MMSegmentation 1.x 新添加的数据结构 [`Segdatasample`](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/zh_cn/advanced_guides/structures.md), 其中`gt_sem_seg` 字段是经过数据增强的标签数据. +数据集 `__getitem__` 方法的返回值, 是经过数据增强的样本数据的输出, 同样也是一个字典, 包括两个字段, `'inputs'` 字段是当前样本经过数据增强操作的图像, 类型为 torch.Tensor, `'data_samples'` 字段存放的数据类型是 MMSegmentation 1.x 新添加的数据结构 [`Segdatasample`](./structures.md), 其中`gt_sem_seg` 字段是经过数据增强的标签数据. ```python print(dataset[0]) @@ -166,13 +166,13 @@ print(dataset[0]) ## BaseSegDataset -由于 MMSegmentation 中的所有数据集的基本功能均包括(1) 加载[数据集预处理](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/zh_cn/user_guides/2_dataset_prepare.md) 之后的数据信息和 (2) 将数据送入数据变换流水线中进行数据变换, 因此在 MMSegmentation 中将其中的共同接口抽象成 [`BaseSegDataset`](https://mmsegmentation.readthedocs.io/en/dev-1.x/api.html?highlight=BaseSegDataset#mmseg.datasets.BaseSegDataset),它继承自 [MMEngine 的 `BaseDataset`](https://github.com/open-mmlab/mmengine/blob/main/docs/en/advanced_tutorials/basedataset.md), 遵循 OpenMMLab 数据集初始化统一流程, 支持高效的内部数据存储格式, 支持数据集拼接、数据集重复采样等功能. +由于 MMSegmentation 中的所有数据集的基本功能均包括(1) 加载[数据集预处理](../user_guides/2_dataset_prepare.md) 之后的数据信息和 (2) 将数据送入数据变换流水线中进行数据变换, 因此在 MMSegmentation 中将其中的共同接口抽象成 [`BaseSegDataset`](https://mmsegmentation.readthedocs.io/en/main/api.html?highlight=BaseSegDataset#mmseg.datasets.BaseSegDataset),它继承自 [MMEngine 的 `BaseDataset`](https://github.com/open-mmlab/mmengine/blob/main/docs/en/advanced_tutorials/basedataset.md), 遵循 OpenMMLab 数据集初始化统一流程, 支持高效的内部数据存储格式, 支持数据集拼接、数据集重复采样等功能. 在 MMSegmentation BaseSegDataset 中重新定义了**数据信息加载方法**(`load_data_list`)和并新增了 `get_label_map` 方法用来**修改数据集的类别信息**. ### 数据信息加载 -数据信息加载的内容是样本数据的图片路径和标签路径, 具体实现在 MMSegmentation 的 BaseSegDataset 的 [`load_data_list`](https://github.com/open-mmlab/mmsegmentation/blob/163277bfe0fa8fefb63ee5137917fafada1b301c/mmseg/datasets/basesegdataset.py#L231) 中. -主要有两种获取图片和标签的路径方法, 如果当数据集目录按以下目录结构组织, [`load_data_list`](https://github.com/open-mmlab/mmsegmentation/blob/163277bfe0fa8fefb63ee5137917fafada1b301c/mmseg/datasets/basesegdataset.py#L231)) 会根据数据路径和后缀来解析. +数据信息加载的内容是样本数据的图片路径和标签路径, 具体实现在 MMSegmentation 的 BaseSegDataset 的 [`load_data_list`](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/datasets/basesegdataset.py#L231) 中. +主要有两种获取图片和标签的路径方法, 如果当数据集目录按以下目录结构组织, [`load_data_list`](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/datasets/basesegdataset.py#L231)) 会根据数据路径和后缀来解析. ``` ├── data @@ -322,7 +322,7 @@ print(dataset.metainfo) 'reduce_zero_label': False} ``` -可以看到, 数据集元信息的类别和默认 Cityscapes 不同. 并且, 定义了标签重映射的字段 `label_map` 用来修改每个分割掩膜上的像素的类别索引, 分割标签类别会根据 `label_map`, 将类别重映射, [具体实现](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/datasets/basesegdataset.py#L151): +可以看到, 数据集元信息的类别和默认 Cityscapes 不同. 并且, 定义了标签重映射的字段 `label_map` 用来修改每个分割掩膜上的像素的类别索引, 分割标签类别会根据 `label_map`, 将类别重映射, [具体实现](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/datasets/basesegdataset.py#L151): ```python gt_semantic_seg_copy = gt_semantic_seg.copy() diff --git a/docs/zh_cn/advanced_guides/engine.md b/docs/zh_cn/advanced_guides/engine.md index a5746fcec8..79b4c8d229 100644 --- a/docs/zh_cn/advanced_guides/engine.md +++ b/docs/zh_cn/advanced_guides/engine.md @@ -61,21 +61,21 @@ OpenMMLab 将模型训练和测试过程抽象为 `Runner`, 插入钩子可以 - 默认钩子 (default hooks) -它们实现了训练时所必需的功能, 在配置文件中用 `default_hooks` 定义传给 `Runner`, `Runner` 通过 [`register_default_hooks`](https://github.com/open-mmlab/mmengine/blob/090104df21acd05a8aadae5a0d743a7da3314f6f/mmengine/runner/runner.py#L1780) 方法注册. +它们实现了训练时所必需的功能, 在配置文件中用 `default_hooks` 定义传给 `Runner`, `Runner` 通过 [`register_default_hooks`](https://github.com/open-mmlab/mmengine/blob/main/mmengine/runner/runner.py#L1780) 方法注册. 钩子有对应的优先级, 优先级越高, 越早被执行器调用. 如果优先级一样, 被调用的顺序和钩子注册的顺序一致. 不建议用户修改默认钩子的优先级, 可以参考 [mmengine hooks 文档](https://github.com/open-mmlab/mmengine/blob/main/docs/zh_cn/tutorials/hook.md) 了解钩子优先级的定义. 下面是 MMSegmentation 中所用到的默认钩子: -| 钩子 | 功能 | 优先级 | -| :-----------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------: | :---------------: | -| [IterTimerHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/iter_timer_hook.py) | 记录 iteration 花费的时间. | NORMAL (50) | -| [LoggerHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/logger_hook.py) | 从 `Runner` 里不同的组件中收集日志记录, 并将其输出到终端, JSON 文件, tensorboard, wandb 等下游. | BELOW_NORMAL (60) | -| [ParamSchedulerHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/param_scheduler_hook.py) | 更新优化器里面的一些超参数, 例如学习率的动量. | LOW (70) | -| [CheckpointHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/checkpoint_hook.py) | 规律性地保存 checkpoint 文件. | VERY_LOW (90) | -| [DistSamplerSeedHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/sampler_seed_hook.py) | 确保分布式采样器 shuffle 是打开的. | NORMAL (50) | -| [SegVisualizationHook](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/visualization/local_visualizer.py) | 可视化验证和测试过程里的预测结果. | NORMAL (50) | +| 钩子 | 功能 | 优先级 | +| :--------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------: | :---------------: | +| [IterTimerHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/iter_timer_hook.py) | 记录 iteration 花费的时间. | NORMAL (50) | +| [LoggerHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/logger_hook.py) | 从 `Runner` 里不同的组件中收集日志记录, 并将其输出到终端, JSON 文件, tensorboard, wandb 等下游. | BELOW_NORMAL (60) | +| [ParamSchedulerHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/param_scheduler_hook.py) | 更新优化器里面的一些超参数, 例如学习率的动量. | LOW (70) | +| [CheckpointHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/checkpoint_hook.py) | 规律性地保存 checkpoint 文件. | VERY_LOW (90) | +| [DistSamplerSeedHook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/sampler_seed_hook.py) | 确保分布式采样器 shuffle 是打开的. | NORMAL (50) | +| [SegVisualizationHook](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/visualization/local_visualizer.py) | 可视化验证和测试过程里的预测结果. | NORMAL (50) | -MMSegmentation 会在 [`defualt_hooks`](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/_base_/schedules/schedule_160k.py#L19-L25) 里面注册一些训练所必需功能的钩子:: +MMSegmentation 会在 [`defualt_hooks`](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/_base_/schedules/schedule_160k.py#L19-L25) 里面注册一些训练所必需功能的钩子:: ```python default_hooks = dict( @@ -94,6 +94,7 @@ default_hooks = dict( 以 `default_hooks` 里面的 `logger` 和 `checkpoint` 为例, 我们来介绍如何修改 `default_hooks` 中默认的钩子. (1) 模型保存配置 + `default_hooks` 使用 `checkpoint` 字段来初始化[模型保存钩子 (CheckpointHook)](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/checkpoint_hook.py#L19). ```python @@ -104,6 +105,7 @@ checkpoint = dict(type='CheckpointHook', interval=1) 更多相关参数的细节可以参考[这里](https://mmengine.readthedocs.io/zh_CN/latest/api/generated/mmengine.hooks.CheckpointHook.html#checkpointhook). (2) 日志配置 + `日志钩子 (LoggerHook)` 被用来收集 `执行器 (Runner)` 里面不同组件的日志信息然后写入终端, JSON 文件, tensorboard 和 wandb 等地方. ```python @@ -126,7 +128,7 @@ visualizer = dict( - 自定义钩子 (custom hooks) -自定义钩子在配置通过 `custom_hooks` 定义, `Runner` 通过 [`register_custom_hooks`](https://github.com/open-mmlab/mmengine/blob/090104df21acd05a8aadae5a0d743a7da3314f6f/mmengine/runner/runner.py#L1852) 方法注册. +自定义钩子在配置通过 `custom_hooks` 定义, `Runner` 通过 [`register_custom_hooks`](https://github.com/open-mmlab/mmengine/blob/main/mmengine/runner/runner.py#L1820) 方法注册. 自定义钩子优先级需要在配置文件里设置, 如果没有设置, 则会被默认设置为 `NORMAL`. 下面是部分 MMEngine 中实现的自定义钩子: | 钩子 | 用法 | @@ -145,7 +147,7 @@ custom_hooks = [ ### SegVisualizationHook -MMSegmentation 实现了 [`SegVisualizationHook`](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/engine/hooks/visualization_hook.py#L17), 用来在验证和测试时可视化预测结果. +MMSegmentation 实现了 [`SegVisualizationHook`](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/engine/hooks/visualization_hook.py#L17), 用来在验证和测试时可视化预测结果. `SegVisualizationHook` 重写了基类 `Hook` 中的 `_after_iter` 方法, 在验证或测试时, 根据指定的迭代次数间隔调用 `visualizer` 的 `add_datasample` 方法绘制语义分割结果, 具体实现如下: ```python @@ -181,7 +183,7 @@ class SegVisualizationHook(Hook): ``` -关于可视化更多的细节可以查看[这里](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/user_guides/visualization.md). +关于可视化更多的细节可以查看[这里](../user_guides/visualization.md). ## 优化器 @@ -234,7 +236,7 @@ optim_wrapper = dict(type='AmpOptimWrapper', optimizer=optimizer) 在模型训练中, 如果想在优化器里为不同参数分别设置优化策略, 例如设置不同的学习率、权重衰减等超参数, 可以通过设置配置文件里 `optim_wrapper` 中的 `paramwise_cfg` 来实现. -下面的配置文件以 [ViT `optim_wrapper`](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/vit/vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py#L15-L27) 为例介绍 `paramwise_cfg` 参数使用. +下面的配置文件以 [ViT `optim_wrapper`](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/vit/vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py#L15-L27) 为例介绍 `paramwise_cfg` 参数使用. 训练时将 `pos_embed`, `mask_token`, `norm` 模块的 weight decay 参数的系数设置成 0. 即: 在训练时, 这些模块的 weight decay 将被变为 `weight_decay * decay_mult`=0. @@ -257,9 +259,9 @@ optim_wrapper = dict( ### 优化器封装构造器 -默认的优化器封装构造器 [`DefaultOptimWrapperConstructor`](https://github.com/open-mmlab/mmengine/blob/376251961da47ea8254ab808ae5c51e1430f18dc/mmengine/optim/optimizer/default_constructor.py#L19) 根据输入的 `optim_wrapper` 和 `optim_wrapper` 中定义的 `paramwise_cfg` 来构建训练中使用的优化器. 当 [`DefaultOptimWrapperConstructor`](https://github.com/open-mmlab/mmengine/blob/376251961da47ea8254ab808ae5c51e1430f18dc/mmengine/optim/optimizer/default_constructor.py#L19) 功能不能满足需求时, 可以自定义优化器封装构造器来实现超参数的配置. +默认的优化器封装构造器 [`DefaultOptimWrapperConstructor`](https://github.com/open-mmlab/mmengine/blob/main/mmengine/optim/optimizer/default_constructor.py#L19) 根据输入的 `optim_wrapper` 和 `optim_wrapper` 中定义的 `paramwise_cfg` 来构建训练中使用的优化器. 当 [`DefaultOptimWrapperConstructor`](https://github.com/open-mmlab/mmengine/blob/main/mmengine/optim/optimizer/default_constructor.py#L19) 功能不能满足需求时, 可以自定义优化器封装构造器来实现超参数的配置. -MMSegmentation 中的实现了 [`LearningRateDecayOptimizerConstructor`](https://github.com/open-mmlab/mmsegmentation/blob/b21df463d47447f33c28d9a4f46136ad64d34a40/mmseg/engine/optimizers/layer_decay_optimizer_constructor.py#L104), 可以对以 ConvNeXt, BEiT 和 MAE 为骨干网络的模型训练时, 骨干网络的模型参数的学习率按照定义的衰减比例(`decay_rate`)逐层递减, 在配置文件中的配置如下: +MMSegmentation 中的实现了 [`LearningRateDecayOptimizerConstructor`](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/engine/optimizers/layer_decay_optimizer_constructor.py#L104), 可以对以 ConvNeXt, BEiT 和 MAE 为骨干网络的模型训练时, 骨干网络的模型参数的学习率按照定义的衰减比例(`decay_rate`)逐层递减, 在配置文件中的配置如下: ```python optim_wrapper = dict( diff --git a/docs/zh_cn/advanced_guides/evaluation.md b/docs/zh_cn/advanced_guides/evaluation.md index a82311ccc7..dc93a46e13 100644 --- a/docs/zh_cn/advanced_guides/evaluation.md +++ b/docs/zh_cn/advanced_guides/evaluation.md @@ -1,3 +1,158 @@ # 模型评测 -中文版文档支持中,请先阅读[英文版本](../../en/advanced_guides/evaluation.md) +模型评测过程会分别在 [ValLoop](https://github.com/open-mmlab/mmengine/blob/main/mmengine/runner/loops.py#L300) 和 [TestLoop](https://github.com/open-mmlab/mmengine/blob/main/mmengine/runner/loops.py#L373) 中被执行,用户可以在训练期间或使用配置文件中简单设置的测试脚本进行模型性能评估。`ValLoop` 和 `TestLoop` 属于 [Runner](https://github.com/open-mmlab/mmengine/blob/main/mmengine/runner/runner.py#L59),它们会在第一次被调用时构建。由于 `dataloader` 与 `evaluator` 是必需的参数,所以要成功构建 `ValLoop`,在构建 `Runner` 时必须设置 `val_dataloader` 和 `val_evaluator`,`TestLoop` 亦然。有关 Runner 设计的更多信息,请参阅 [MMEngine](https://github.com/open-mmlab/mmengine) 的[文档](https://github.com/open-mmlab/mmengine/blob/main/docs/zh_cn/design/runner.md)。 + +
+ +
测试/验证 数据流
+
+ +在 MMSegmentation 中,默认情况下,我们将 dataloader 和 metrics 的设置写在数据集配置文件中,并将 evaluation loop 的配置写在 `schedule_x` 配置文件中。 + +例如,在 ADE20K 配置文件 `configs/_base_/dataset/ADE20K.py` 中,在第37到48行,我们配置了 `val_dataloader`,在第51行,我们选择 `IoUMetric` 作为 evaluator,并设置 `mIoU` 作为指标: + +```python +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/validation', + seg_map_path='annotations/validation'), + pipeline=test_pipeline)) + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) +``` + +为了能够在训练期间进行评估模型,我们将评估配置添加到了 `configs/schedules/schedule_40k.py` 文件的第15至16行: + +```python +train_cfg = dict(type='IterBasedTrainLoop', max_iters=40000, val_interval=4000) +val_cfg = dict(type='ValLoop') +``` + +使用以上两种设置,MMSegmentation 在 40K 迭代训练期间,每 4000 次迭代进行一次模型 **mIoU** 指标的评估。 + +如果我们希望在训练后测试模型,则需要将 `test_dataloader`、`test_evaluator` 和 `test_cfg` 配置添加到配置文件中。 + +```python +test_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/validation', + seg_map_path='annotations/validation'), + pipeline=test_pipeline)) + +test_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) +test_cfg = dict(type='TestLoop') +``` + +在 MMSegmentation 中,默认情况下,`test_dataloader` 和 `test_evaluator` 的设置与 `ValLoop` 的 dataloader 和 evaluator 相同,我们可以修改这些设置以满足我们的需要。 + +## IoUMetric + +MMSegmentation 基于 [MMEngine](https://github.com/open-mmlab/mmengine) 提供的 [BaseMetric](https://github.com/open-mmlab/mmengine/blob/main/mmengine/evaluator/metric.py) 实现 [IoUMetric](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/evaluation/metrics/iou_metric.py) 和 [CityscapesMetric](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/evaluation/metrics/citys_metric.py),以评估模型的性能。有关统一评估接口的更多详细信息,请参阅[文档](https://mmengine.readthedocs.io/zh_CN/latest/tutorials/evaluation.html)。 + +这里我们简要介绍 `IoUMetric` 的参数和两种主要方法。 + +除了 `collect_device` 和 `prefix` 之外,`IoUMetric` 的构建还包含一些其他参数。 + +构造函数的参数: + +- ignore_index(int)- 将在评估中忽略的类别索引。默认值:255。 +- iou_metrics(list\[str\] | str)- 需要计算的指标,可选项包括 'mIoU'、'mDice' 和 'mFscore'。 +- nan_to_num(int,可选)- 如果指定,NaN 值将被用户定义的数字替换。默认值:None。 +- beta(int)- 决定综合评分中 recall 的权重。默认值:1。 +- collect_device(str)- 用于在分布式训练期间从不同进程收集结果的设备名称。必须是 'cpu' 或 'gpu'。默认为 'cpu'。 +- prefix(str,可选)- 将添加到指标名称中的前缀,以消除不同 evaluator 的同名指标的歧义。如果参数中未提供前缀,则将使用 self.default_prefix 进行替代。默认为 None。 + +`IoUMetric` 实现 IoU 指标的计算,`IoUMetric` 的两个核心方法是 `process` 和 `compute_metrics`。 + +- `process` 方法处理一批 data 和 data_samples。 +- `compute_metrics` 方法根据处理的结果计算指标。 + +### IoUMetric.process + +参数: + +- data_batch(Any)- 来自 dataloader 的一批数据。 +- data_samples(Sequence\[dict\])- 模型的一批输出。 + +返回值: + +此方法没有返回值,因为处理的结果将存储在 `self.results` 中,以在处理完所有批次后进行指标的计算。 + +### IoUMetric.compute_metrics + +参数: + +- results(list)- 每个批次的处理结果。 + +返回值: + +- Dict\[str,float\] - 计算的指标。指标的名称为 key,值是相应的结果。key 主要包括 **aAcc**、**mIoU**、**mAcc**、**mDice**、**mFscore**、**mPrecision**、**mPrecall**。 + +## CityscapesMetric + +`CityscapesMetric` 使用由 Cityscapes 官方提供的 [CityscapesScripts](https://github.com/mcordts/cityscapesScripts) 进行模型性能的评估。 + +### 使用方法 + +在使用之前,请先安装 `cityscapesscripts` 包: + +```shell +pip install cityscapesscripts +``` + +由于 `IoUMetric` 在 MMSegmentation 中作为默认的 evaluator 使用,如果您想使用 `CityscapesMetric`,则需要自定义配置文件。在自定义配置文件中,应按如下方式替换默认 evaluator。 + +```python +val_evaluator = dict(type='CityscapesMetric', output_dir='tmp') +test_evaluator = val_evaluator +``` + +### 接口 + +构造函数的参数: + +- output_dir (str) - 预测结果输出的路径 +- ignore_index (int) - 将在评估中忽略的类别索引。默认值:255。 +- format_only (bool) - 只为提交进行结果格式化而不进行评估。当您希望将结果格式化为特定格式并将其提交给测试服务器时有用。默认为 False。 +- keep_results (bool) - 是否保留结果。当 `format_only` 为 True 时,`keep_results` 必须为 True。默认为 False。 +- collect_device (str) - 用于在分布式训练期间从不同进程收集结果的设备名称。必须是 'cpu' 或 'gpu'。默认为 'cpu'。 +- prefix (str,可选) - 将添加到指标名称中的前缀,以消除不同 evaluator 的同名指标的歧义。如果参数中未提供前缀,则将使用 self.default_prefix 进行替代。默认为 None。 + +#### CityscapesMetric.process + +该方法将在图像上绘制 mask,并将绘制的图像保存到 `work_dir` 中。 + +参数: + +- data_batch(dict)- 来自 dataloader 的一批数据。 +- data_samples(Sequence\[dict\])- 模型的一批输出。 + +返回值: + +此方法没有返回值,因为处理的结果将存储在 `self.results` 中,以在处理完所有批次后进行指标的计算。 + +#### CityscapesMetric.compute_metrics + +此方法将调用 `cityscapessscripts.evaluation.evalPixelLevelSemanticLabeling` 工具来计算指标。 + +参数: + +- results(list)- 数据集的测试结果。 + +返回值: + +- dict\[str:float\] - Cityscapes 评测结果。 diff --git a/docs/zh_cn/advanced_guides/models.md b/docs/zh_cn/advanced_guides/models.md index 62dbea38c4..408a57863c 100644 --- a/docs/zh_cn/advanced_guides/models.md +++ b/docs/zh_cn/advanced_guides/models.md @@ -1,3 +1,177 @@ # 模型 -中文版文档支持中,请先阅读[英文版本](../../en/advanced_guides/models.md) +我们通常将深度学习任务中的神经网络定义为模型,这个模型即是算法的核心。[MMEngine](https://github.com/open-mmlab/mmengine) 抽象出了一个统一模型 [BaseModel](https://github.com/open-mmlab/mmengine/blob/main/mmengine/model/base_model/base_model.py#L16) 以标准化训练、测试和其他过程。MMSegmentation 实现的所有模型都继承自 `BaseModel`,并且在 MMSegmention 中,我们实现了前向传播并为语义分割算法添加了一些功能。 + +## 常用组件 + +### 分割器(Segmentor) + +在 MMSegmentation 中,我们将网络架构抽象为**分割器**,它是一个包含网络所有组件的模型。我们已经实现了**编码器解码器(EncoderDecoder)**和**级联编码器解码器(CascadeEncoderDecoder)**,它们通常由**数据预处理器**、**骨干网络**、**解码头**和**辅助头**组成。 + +### 数据预处理器(Data preprocessor) + +**数据预处理器**是将数据复制到目标设备并将数据预处理为模型输入格式的部分。 + +### 主干网络(Backbone) + +**主干网络**是将图像转换为特征图的部分,例如没有最后全连接层的 **ResNet-50**。 + +### 颈部(Neck) + +**颈部**是连接主干网络和头的部分。它对主干网络生成的原始特征图进行一些改进或重新配置。例如 **Feature Pyramid Network(FPN)**。 + +### 解码头(Decode head) + +**解码头**是将特征图转换为分割掩膜的部分,例如 **PSPNet**。 + +### 辅助头(Auxiliary head) + +**辅助头**是一个可选组件,它将特征图转换为仅用于计算辅助损失的分割掩膜。 + +## 基本接口 + +MMSegmentation 封装 `BaseModel` 并实现了 [BaseSegmenter](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/models/segmentors/base.py#L15) 类,主要提供 `forward`、`train_step`、`val_step` 和 `test_step` 接口。接下来将详细介绍这些接口。 + +### forward + +
+ +
编码器解码器数据流
+
+ +
+
+
级联编码器解码器数据流
+
+ +`forward` 方法返回训练、验证、测试和简单推理过程的损失或预测。 + +该方法应接受三种模式:“tensor”、“predict” 和 “loss”: + +- “tensor”:前向推理整个网络并返回张量或张量数组,无需任何后处理,与常见的 `nn.Module` 相同。 +- “predict”:前向推理并返回预测值,这些预测值将被完全处理到 `SegDataSample` 列表中。 +- “loss”:前向推理并根据给定的输入和数据样本返回损失的`字典`。 + +**注:**[SegDataSample](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/structures/seg_data_sample.py) 是 MMSegmentation 的数据结构接口,用作不同组件之间的接口。`SegDataSample` 实现了抽象数据元素 `mmengine.structures.BaseDataElement`,请参阅 [MMMEngine](https://github.com/open-mmlab/mmengine) 中的 [SegDataSample 文档](https://mmsegmentation.readthedocs.io/zh_CN/1.x/advanced_guides/structures.html)和[数据元素文档](https://mmengine.readthedocs.io/zh_CN/latest/advanced_tutorials/data_element.html)了解更多信息。 + +注意,此方法不处理在 `train_step` 方法中完成的反向传播或优化器更新。 + +参数: + +- inputs(torch.Tensor)- 通常为形状是(N, C, ...) 的输入张量。 +- data_sample(list\[[SegDataSample](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/structures/seg_data_sample.py)\]) - 分割数据样本。它通常包括 `metainfo` 和 `gt_sem_seg` 等信息。默认值为 None。 +- mode (str) - 返回什么类型的值。默认为 'tensor'。 + +返回值: + +- `dict` 或 `list`: + - 如果 `mode == "loss"`,则返回用于反向过程和日志记录的损失张量`字典`。 + - 如果 `mode == "predict"`,则返回 `SegDataSample` 的`列表`,推理结果将被递增地添加到传递给 forward 方法的 `data_sample` 参数中,每个 `SegDataSeample` 包含以下关键词: + - pred_sm_seg (`PixelData`):语义分割的预测。 + - seg_logits (`PixelData`):标准化前语义分割的预测指标。 + - 如果 `mode == "tensor"`,则返回`张量`或`张量数组`的`字典`以供自定义使用。 + +### 预测模式 + +我们在[配置文档](../user_guides/1_config.md)中简要描述了模型配置的字段,这里我们详细介绍 `model.test_cfg` 字段。`model.test_cfg` 用于控制前向行为,`"predict"` 模式下的 `forward` 方法可以在两种模式下运行: + +- `whole_inference`:如果 `cfg.model.test_cfg.mode == 'whole'`,则模型将使用完整图像进行推理。 + + `whole_inference` 模式的一个示例配置: + + ```python + model = dict( + type='EncoderDecoder' + ... + test_cfg=dict(mode='whole') + ) + ``` + +- `slide_inference`:如果 `cfg.model.test_cfg.mode == ‘slide’`,则模型将通过滑动窗口进行推理。**注意:** 如果选择 `slide` 模式,还应指定 `cfg.model.test_cfg.stride` 和 `cfg.model.test_cfg.crop_size`。 + + `slide_inference` 模式的一个示例配置: + + ```python + model = dict( + type='EncoderDecoder' + ... + test_cfg=dict(mode='slide', crop_size=256, stride=170) + ) + ``` + +### train_step + +`train_step` 方法调用 `loss` 模式的前向接口以获得损失`字典`。`BaseModel` 类实现默认的模型训练过程,包括预处理、模型前向传播、损失计算、优化和反向传播。 + +参数: + +- data (dict or tuple or list) - 从数据集采样的数据。在 MMSegmentation 中,数据字典包含 `inputs` 和 `data_samples` 两个字段。 +- optim_wrapper (OptimWrapper) - 用于更新模型参数的 OptimWrapper 实例。 + +**注:**[OptimWrapper](https://github.com/open-mmlab/mmengine/blob/main/mmengine/optim/optimizer/optimizer_wrapper.py#L17) 提供了一个用于更新参数的通用接口,请参阅 [MMMEngine](https://github.com/open-mmlab/mmengine) 中的优化器封装[文档](https://mmengine.readthedocs.io/zh_CN/latest/tutorials/optim_wrapper.html)了解更多信息。 + +返回值: + +-Dict\[str, `torch.Tensor`\]:用于记录日志的张量的`字典`。 + +
+ +
train_step 数据流
+
+ +### val_step + +`val_step` 方法调用 `predict` 模式的前向接口并返回预测结果,预测结果将进一步被传递给评测器的进程接口和钩子的 `after_val_inter` 接口。 + +参数: + +- data (`dict` or `tuple` or `list`) - 从数据集中采样的数据。在 MMSegmentation 中,数据字典包含 `inputs` 和 `data_samples` 两个字段。 + +返回值: + +- `list` - 给定数据的预测结果。 + +
+ +
test_step/val_step 数据流
+
+ +### test_step + +`BaseModel` 中 `test_step` 与 `val_step` 的实现相同。 + +## 数据预处理器(Data Preprocessor) + +MMSegmentation 实现的 [SegDataPreProcessor](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/models/data_preprocessor.py#L13) 继承自由 [MMEngine](https://github.com/open-mmlab/mmengine) 实现的 [BaseDataPreprocessor](https://github.com/open-mmlab/mmengine/blob/main/mmengine/model/base_model/data_preprocessor.py#L18),提供数据预处理和将数据复制到目标设备的功能。 + +Runner 在构建阶段将模型传送到指定的设备,而 [SegDataPreProcessor](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/models/data_preprocessor.py#L13) 在 `train_step`、`val_step` 和 `test_step` 中将数据传送到指定设备,之后处理后的数据将被进一步传递给模型。 + +`SegDataPreProcessor` 构造函数的参数: + +- mean (Sequence\[Number\], 可选) - R、G、B 通道的像素平均值。默认为 None。 +- std (Sequence\[Number\], 可选) - R、G、B 通道的像素标准差。默认为 None。 +- size (tuple, 可选) - 固定的填充大小。 +- size_divisor (int, 可选) - 填充大小的除法因子。 +- pad_val (float, 可选) - 填充值。默认值:0。 +- seg_pad_val (float, 可选) - 分割图的填充值。默认值:255。 +- bgr_to_rgb (bool) - 是否将图像从 BGR 转换为 RGB。默认为 False。 +- rgb_to_bgr (bool) - 是否将图像从 RGB 转换为 BGR。默认为 False。 +- batch_augments (list\[dict\], 可选) - 批量化的数据增强。默认值为 None。 + +数据将按如下方式处理: + +- 收集数据并将其移动到目标设备。 +- 用定义的 `pad_val` 将输入填充到输入大小,并用定义的 `seg_Pad_val` 填充分割图。 +- 将输入堆栈到 batch_input。 +- 如果输入的形状为 (3, H, W),则将输入从 BGR 转换为 RGB。 +- 使用定义的标准差和平均值标准化图像。 +- 在训练期间进行如 Mixup 和 Cutmix 的批量化数据增强。 + +`forward` 方法的参数: + +- data (dict) - 从数据加载器采样的数据。 +- training (bool) - 是否启用训练时数据增强。 + +`forward` 方法的返回值: + +- Dict:与模型输入格式相同的数据。 diff --git a/docs/zh_cn/advanced_guides/training_tricks.md b/docs/zh_cn/advanced_guides/training_tricks.md index a33c0ea9cf..e5b8e4dae1 100644 --- a/docs/zh_cn/advanced_guides/training_tricks.md +++ b/docs/zh_cn/advanced_guides/training_tricks.md @@ -1,4 +1,4 @@ -# 训练技巧(待更新) +# 训练技巧 MMSegmentation 支持如下训练技巧: @@ -9,17 +9,17 @@ MMSegmentation 支持如下训练技巧: 在 MMSegmentation 里面,您也可以在配置文件里添加如下行来让解码头组件的学习率是主干组件的10倍。 ```python -optimizer=dict( +optim_wrapper=dict( paramwise_cfg = dict( custom_keys={ 'head': dict(lr_mult=10.)})) ``` -通过这种修改,任何被分组到 `'head'` 的参数的学习率都将乘以10。您也可以参照 [MMCV 文档](https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.DefaultOptimizerConstructor) 获取更详细的信息。 +通过这种修改,任何被分组到 `'head'` 的参数的学习率都将乘以10。您也可以参照 [MMEngine 文档](https://mmengine.readthedocs.io/zh_CN/latest/tutorials/optim_wrapper.html#id6) 获取更详细的信息。 ## 在线难样本挖掘 (Online Hard Example Mining, OHEM) -对于训练时采样,我们在 [这里](https://github.com/open-mmlab/mmsegmentation/tree/master/mmseg/core/seg/sampler) 做了像素采样器。 +MMSegmentation 中实现了像素采样器,训练时可以对特定像素进行采样,例如 OHEM(Online Hard Example Mining),可以解决样本不平衡问题, 如下例子是使用 PSPNet 训练并采用 OHEM 策略的配置: ```python @@ -58,38 +58,17 @@ model=dict( ```python _base_ = './fcn_unet_s5-d16_64x64_40k_drive.py' model = dict( - decode_head=dict(loss_decode=[dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), - dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0)]), - auxiliary_head=dict(loss_decode=[dict(type='CrossEntropyLoss', loss_name='loss_ce',loss_weight=1.0), - dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0)]), - ) + decode_head=dict(loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0) + ]), + auxiliary_head=dict(loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0) + ]), +) ``` 通过这种方式,确定训练过程中损失函数的权重 `loss_weight` 和在训练日志里的名字 `loss_name`。 -注意: `loss_name` 的名字必须带有 `loss_` 前缀,这样它才能被包括在反传的图里。 - -## 在损失函数中忽略特定的 label 类别 - -默认设置 `avg_non_ignore=False`, 即每个像素都用来计算损失函数。尽管其中的一些像素属于需要被忽略的类别。 - -对于训练时损失函数的计算,我们目前支持使用 `avg_non_ignore` 和 `ignore_index` 来忽略 label 特定的类别。 这样损失函数将只在非忽略类别像素中求平均值,会获得更好的表现。这里是[相关 PR](https://github.com/open-mmlab/mmsegmentation/pull/1409)。以 `unet` 使用 `Cityscapes` 数据集训练为例, -在计算损失函数时,忽略 label 为0的背景,并且仅在不被忽略的像素上计算均值。配置文件写为: - -```python -_base_ = './fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py' -model = dict( - decode_head=dict( - ignore_index=0, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0, avg_non_ignore=True), - auxiliary_head=dict( - ignore_index=0, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0, avg_non_ignore=True)), - )) -``` - -通过这种方式,确定训练过程中损失函数的权重 `loss_weight` 和在训练日志里的名字 `loss_name`。 - -注意: `loss_name` 的名字必须带有 `loss_` 前缀,这样它才能被包括在反传的图里。 +注意: `loss_name` 的名字必须带有 `loss_` 前缀,这样它才能被包括在计算图里。 diff --git a/docs/zh_cn/advanced_guides/transforms.md b/docs/zh_cn/advanced_guides/transforms.md index 1cbe79ba49..e5f3bebf6d 100644 --- a/docs/zh_cn/advanced_guides/transforms.md +++ b/docs/zh_cn/advanced_guides/transforms.md @@ -1,3 +1,119 @@ # 数据增强变化 -中文版文档支持中,请先阅读[英文版本](../../en/advanced_guides/transforms.md) +在本教程中,我们将介绍 MMSegmentation 中数据增强变化流程的设计。 + +本指南的结构如下: + +- [数据增强变化](#数据增强变化) + - [数据增强变化流程设计](#数据增强变化流程设计) + - [数据加载](#数据加载) + - [预处理](#预处理) + - [格式修改](#格式修改) + +## 数据增强变化流程设计 + +按照惯例,我们使用 `Dataset` 和 `DataLoader` 多进程地加载数据。`Dataset` 返回与模型 forward 方法的参数相对应的数据项的字典。由于语义分割中的数据可能大小不同,我们在 MMCV 中引入了一种新的 `DataContainer` 类型,以帮助收集和分发不同大小的数据。参见[此处](https://github.com/open-mmlab/mmcv/blob/master/mmcv/parallel/data_container.py)了解更多详情。 + +在 MMSegmentation 的 1.x 版本中,所有数据转换都继承自 [`BaseTransform`](https://github.com/open-mmlab/mmcv/blob/2.x/mmcv/transforms/base.py#L6). + +转换的输入和输出类型都是字典。一个简单的示例如下: + +```python +>>> from mmseg.datasets.transforms import LoadAnnotations +>>> transforms = LoadAnnotations() +>>> img_path = './data/cityscapes/leftImg8bit/train/aachen/aachen_000000_000019_leftImg8bit.png.png' +>>> gt_path = './data/cityscapes/gtFine/train/aachen/aachen_000015_000019_gtFine_instanceTrainIds.png' +>>> results = dict( +>>> img_path=img_path, +>>> seg_map_path=gt_path, +>>> reduce_zero_label=False, +>>> seg_fields=[]) +>>> data_dict = transforms(results) +>>> print(data_dict.keys()) +dict_keys(['img_path', 'seg_map_path', 'reduce_zero_label', 'seg_fields', 'gt_seg_map']) +``` + +数据准备流程和数据集是解耦的。通常,数据集定义如何处理标注,数据流程定义准备数据字典的所有步骤。流程由一系列操作组成。每个操作都将字典作为输入,并为接下来的转换输出字典。 + +操作分为数据加载、预处理、格式修改和测试数据增强。 + +这里是 PSPNet 的流程示例: + +```python +crop_size = (512, 1024) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + type='RandomResize', + scale=(2048, 1024), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2048, 1024), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to resize data transform + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] +``` + +对于每个操作,我们列出了 `添加`/`更新`/`删除` 相关的字典字段。在流程前,我们可以从数据集直接获得的信息是 `img_path` 和 `seg_map_path`。 + +### 数据加载 + +`LoadImageFromFile`:从文件加载图像。 + +- 添加:`img`,`img_shape`,`ori_shape` + +`LoadAnnotations`:加载数据集提供的语义分割图。 + +- 添加:`seg_fields`,`gt_seg_map` + +### 预处理 + +`RandomResize`:随机调整图像和分割图大小。 + +-添加:`scale`,`scale_factor`,`keep_ratio` +-更新:`img`,`img_shape`,`gt_seg_map` + +`Resize`:调整图像和分割图的大小。 + +-添加:`scale`,`scale_factor`,`keep_ratio` +-更新:`img`,`gt_seg_map`,`img_shape` + +`RandomCrop`:随机裁剪图像和分割图。 + +-更新:`img`,`gt_seg_map`,`img_shape` + +`RandomFlip`:翻转图像和分割图。 + +-添加:`flip`,`flip_direction` +-更新:`img`,`gt_seg_map` + +`PhotoMetricDistortion`:按顺序对图像应用光度失真,每个变换的应用概率为 0.5。随机对比度的位置是第二或倒数第二(分别为下面的模式 0 或 1)。 + +``` +1.随机亮度 +2.随机对比度(模式 0) +3.将颜色从 BGR 转换为 HSV +4.随机饱和度 +5.随机色调 +6.将颜色从 HSV 转换为 BGR +7.随机对比度(模式 1) +``` + +- 更新:`img` + +### 格式修改 + +`PackSegInputs`:为语义分段打包输入数据。 + +- 添加:`inputs`,`data_sample` +- 删除:由 `meta_keys` 指定的 keys(合并到 data_sample 的 metainfo 中),所有其他 keys diff --git a/docs/zh_cn/get_started.md b/docs/zh_cn/get_started.md index da6d728a15..55360aab8f 100644 --- a/docs/zh_cn/get_started.md +++ b/docs/zh_cn/get_started.md @@ -51,7 +51,7 @@ mim install "mmcv>=2.0.0rc1" 情况 a: 如果您想立刻开发和运行 mmsegmentation,您可通过源码安装: ```shell -git clone -b dev-1.x https://github.com/open-mmlab/mmsegmentation.git +git clone -b main https://github.com/open-mmlab/mmsegmentation.git cd mmsegmentation pip install -v -e . # '-v' 表示详细模式,更多的输出 @@ -164,7 +164,7 @@ MMSegmentation 可以在仅有 CPU 的版本上运行。在 CPU 模式,您可 ```shell !git clone https://github.com/open-mmlab/mmsegmentation.git %cd mmsegmentation -!git checkout dev-1.x +!git checkout main !pip install -e . ``` @@ -197,4 +197,4 @@ docker run --gpus all --shm-size=8g -it -v {DATA_DIR}:/mmsegmentation/data mmseg ## 问题解答 -如果您在安装过程中遇到了其他问题,请第一时间查阅 [FAQ](faq.md) 文件。如果没有找到答案,您也可以在 GitHub 上提出 [issue](https://github.com/open-mmlab/mmsegmentation/issues/new/choose) +如果您在安装过程中遇到了其他问题,请第一时间查阅 [FAQ](notes/faq.md) 文件。如果没有找到答案,您也可以在 GitHub 上提出 [issue](https://github.com/open-mmlab/mmsegmentation/issues/new/choose) diff --git a/docs/zh_cn/migration.md b/docs/zh_cn/migration.md deleted file mode 100644 index 3f19b26714..0000000000 --- a/docs/zh_cn/migration.md +++ /dev/null @@ -1,3 +0,0 @@ -# 迁移文档 - -中文迁移文档在支持中,请先阅读[英文版迁移文档](../en/migration/) diff --git a/docs/zh_cn/migration/index.rst b/docs/zh_cn/migration/index.rst new file mode 100644 index 0000000000..854b9e61d0 --- /dev/null +++ b/docs/zh_cn/migration/index.rst @@ -0,0 +1,8 @@ +迁移 +*************** + +.. toctree:: + :maxdepth: 1 + + interface.md + package.md diff --git a/docs/zh_cn/migration/interface.md b/docs/zh_cn/migration/interface.md new file mode 100644 index 0000000000..cd16d2cbc6 --- /dev/null +++ b/docs/zh_cn/migration/interface.md @@ -0,0 +1,523 @@ +# 从 MMSegmentation 0.x 迁移 + +## 引言 + +本指南介绍了 MMSegmentation 0.x 和 MMSegmentation1.x 在行为和 API 方面的基本区别,以及这些如何都与您的迁移过程相关。 + +## 新的依赖 + +MMSegmentation 1.x 依赖于一些新的软件包,您可以准备一个新的干净环境,然后根据[安装教程](../get_started.md)重新安装。 + +或手动安装以下软件包。 + +1. [MMEngine](https://github.com/open-mmlab/mmengine):MMEngine 是 OpenMMLab 2.0 架构的核心,我们将许多与计算机视觉无关的内容从 MMCV 拆分到 MMEngine 中。 + +2. [MMCV](https://github.com/open-mmlab/mmcv):OpenMMLab 的计算机视觉包。这不是一个新的依赖,但您需要将其升级到 **2.0.0rc1** 以上的版本。 + +3. [MMClassification](https://github.com/open-mmlab/mmclassification)(可选):OpenMMLab 的图像分类工具箱和基准。这不是一个新的依赖,但您需要将其升级到 **1.0.0rc0** 以上的版本。 + +4. [MMDetection](https://github.com/open-mmlab/mmdetection)(可选): OpenMMLab 的目标检测工具箱和基准。这不是一个新的依赖,但您需要将其升级到 **3.0.0rc0** 以上的版本。 + +## 启动训练 + +OpenMMLab 2.0 的主要改进是发布了 MMEngine,它为启动训练任务的统一接口提供了通用且强大的执行器。 + +与 MMSeg 0.x 相比,MMSeg 1.x 在 `tools/train.py` 中提供的命令行参数更少 + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
功能原版新版
加载预训练模型--load_from=$CHECKPOINT--cfg-options load_from=$CHECKPOINT
从特定检查点恢复训练--resume-from=$CHECKPOINT--resume=$CHECKPOINT
从最新的检查点恢复训练--auto-resume--resume='auto'
培训练期间是否不评估检查点--no-validate--cfg-options val_cfg=None val_dataloader=None val_evaluator=None
指定训练设备--gpu-id=$DEVICE_ID-
是否为不同进程设置不同的种子--diff-seed--cfg-options randomness.diff_rank_seed=True
是否为 CUDNN 后端设置确定性选项--deterministic--cfg-options randomness.deterministic=True
+ +## 测试启动 + +与训练启动类似,MMSegmentation 1.x 的测试启动脚本在 tools/test.py 中仅提供关键命令行参数,以下是测试启动脚本的区别,更多关于测试启动的细节请参考[这里](../user_guides/4_train_test.md)。 + + + + + + + + + + + + + + + + + + + + + + +
功能0.x1.x
指定评测指标--eval mIoU--cfg-options test_evaluator.type=IoUMetric
测试时数据增强--aug-test--tta
测试时是否只保存预测结果不计算评测指标--format-only--cfg-options test_evaluator.format_only=True
+ +## 配置文件 + +### 模型设置 + +`model.backend`、`model.neck`、`model.decode_head` 和 `model.loss` 字段没有更改。 + +添加 `model.data_preprocessor` 字段以配置 `DataPreProcessor`,包括: + +- `mean`(Sequence,可选):R、G、B 通道的像素平均值。默认为 None。 + +- `std`(Sequence,可选):R、G、B通道的像素标准差。默认为 None。 + +- `size`(Sequence,可选):固定的填充大小。 + +- `size_divisor`(int,可选):填充大小的除法因子。 + +- `seg_pad_val`(float,可选):分割图的填充值。默认值:255。 + +- `padding_mode`(str):填充类型。默认值:'constant'。 + + - constant:常量值填充,值由 pad_val 指定。 + +- `bgr_to_rgb`(bool):是否将图像从 BGR 转换为 RGB。默认为 False。 + +- `rgb_to_bgr`(bool):是否将图像从 RGB 转换为 BGR。默认为 False。 + +**注:** +有关详细信息,请参阅[模型文档](../advanced_guides/models.md)。 + +### 数据集设置 + +**data** 的更改: + +原版 `data` 字段被拆分为 `train_dataloader`、`val_dataloader` 和 `test_dataloader`,允许我们以细粒度配置它们。例如,您可以在训练和测试期间指定不同的采样器和批次大小。 +`samples_per_gpu` 重命名为 `batch_size`。 +`workers_per_gpu` 重命名为 `num_workers`。 + + + + + + + + + +
原版 + +```python +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict(...), + val=dict(...), + test=dict(...), +) +``` + +
新版 + +```python +train_dataloader = dict( + batch_size=4, + num_workers=4, + dataset=dict(...), + sampler=dict(type='DefaultSampler', shuffle=True) # necessary +) + +val_dataloader = dict( + batch_size=4, + num_workers=4, + dataset=dict(...), + sampler=dict(type='DefaultSampler', shuffle=False) # necessary +) + +test_dataloader = val_dataloader +``` + +
+ +**数据增强变换流程**变更 + +- 原始格式转换 **`ToTensor`**、**`ImageToTensor`**、**`Collect`** 组合为 [`PackSegInputs`](mmseg.datasets.transforms.PackSegInputs) +- 我们不建议在数据集流程中执行 **`Normalize`** 和 **Pad**。请将其从流程中删除,并将其设置在 `data_preprocessor` 字段中。 +- MMSeg 1.x 中原始的 **`Resize`** 已更改为 **`RandomResize `**,输入参数 `img_scale` 重命名为 `scale`,`keep_ratio` 的默认值修改为 False。 +- 原始的 `test_pipeline` 将单尺度和多尺度测试结合在一起,在 MMSeg 1.x 中,我们将其分为 `test_pipeline` 和 `tta_pipeline`。 + +**注:** +我们将一些数据转换工作转移到数据预处理器中,如归一化,请参阅[文档](package.md)了解更多详细信息。 + +训练流程 + + + + + + + + + +
原版 + +```python +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='Resize', img_scale=(2560, 640), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +``` + +
新版 + +```python +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict( + type='RandomResize', + scale=(2560, 640), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +``` + +
+ +测试流程 + + + + + + + + + +
原版 + +```python +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2560, 640), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +``` + +
新版 + +```python +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2560, 640), keep_ratio=True), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='PackSegInputs') +] +img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] +tta_pipeline = [ + dict(type='LoadImageFromFile', backend_args=None), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale_factor=r, keep_ratio=True) + for r in img_ratios + ], + [ + dict(type='RandomFlip', prob=0., direction='horizontal'), + dict(type='RandomFlip', prob=1., direction='horizontal') + ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] + ]) +] +``` + +
+ +**`evaluation`** 中的更改: + +- **`evaluation`** 字段被拆分为 `val_evaluator` 和 `test_evaluator `。而且不再支持 `interval` 和 `save_best` 参数。 + `interval` 已移动到 `train_cfg.val_interval`,`save_best` 已移动到 `default_hooks.checkpoint.save_best`。`pre_eval` 已删除。 +- `IoU` 已更改为 `IoUMetric`。 + + + + + + + + + +
原版 + +```python +evaluation = dict(interval=2000, metric='mIoU', pre_eval=True) +``` + +
新版 + +```python +val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) +test_evaluator = val_evaluator +``` + +
+ +### Optimizer 和 Schedule 设置 + +**`optimizer`** 和 **`optimizer_config`** 中的更改: + +- 现在我们使用 `optim_wrapper` 字段来指定优化过程的所有配置。以及 `optimizer` 是 `optim_wrapper` 的一个子字段。 +- `paramwise_cfg` 也是 `optim_wrapper` 的一个子字段,以替代 `optimizer`。 +- `optimizer_config` 现在被删除,它的所有配置都被移动到 `optim_wrapper` 中。 +- `grad_clip` 重命名为 `clip_grad`。 + + + + + + + + + +
原版 + +```python +optimizer = dict(type='AdamW', lr=0.0001, weight_decay=0.0005) +optimizer_config = dict(grad_clip=dict(max_norm=1, norm_type=2)) +``` + +
新版 + +```python +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=0.0001, weight_decay=0.0005), + clip_grad=dict(max_norm=1, norm_type=2)) +``` + +
+ +**`lr_config`** 中的更改: + +- 我们将 `lr_config` 字段删除,并使用新的 `param_scheduler` 替代。 +- 我们删除了与 `warmup` 相关的参数,因为我们使用 scheduler 组合来实现该功能。 + +新的 scheduler 组合机制非常灵活,您可以使用它来设计多种学习率/动量曲线。有关详细信息,请参见[教程](TODO)。 + + + + + + + + + +
原版 + +```python +lr_config = dict( + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) +``` + +
新版 + +```python +param_scheduler = [ + dict( + type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), + dict( + type='PolyLR', + power=1.0, + begin=1500, + end=160000, + eta_min=0.0, + by_epoch=False, + ) +] +``` + +
+ +**`runner`** 中的更改: + +原版 `runner` 字段中的大多数配置被移动到 `train_cfg`、`val_cfg` 和 `test_cfg` 中,以在训练、验证和测试中配置 loop。 + + + + + + + + + +
原版 + +```python +runner = dict(type='IterBasedRunner', max_iters=20000) +``` + +
新版 + +```python +# The `val_interval` is the original `evaluation.interval`. +train_cfg = dict(type='IterBasedTrainLoop', max_iters=20000, val_interval=2000) +val_cfg = dict(type='ValLoop') # Use the default validation loop. +test_cfg = dict(type='TestLoop') # Use the default test loop. +``` + +
+ +事实上,在 OpenMMLab 2.0 中,我们引入了 `Loop` 来控制训练、验证和测试中的行为。`Runner` 的功能也发生了变化。您可以在 [MMMEngine](https://github.com/open-mmlab/mmengine/) 的[执行器教程](https://github.com/open-mmlab/mmengine/blob/main/docs/zh_cn/design/runner.md) 中找到更多的详细信息。 + +### 运行时设置 + +**`checkpoint_config`** 和 **`log_config`** 中的更改: + +`checkpoint_config` 被移动到 `default_hooks.checkpoint` 中,`log_config` 被移动到 `default_hooks.logger` 中。 +并且我们将许多钩子设置从脚本代码移动到运行时配置的 `default_hooks` 字段中。 + +```python +default_hooks = dict( + # record the time of every iterations. + timer=dict(type='IterTimerHook'), + + # print log every 50 iterations. + logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), + + # enable the parameter scheduler. + param_scheduler=dict(type='ParamSchedulerHook'), + + # save checkpoint every 2000 iterations. + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=2000), + + # set sampler seed in distributed environment. + sampler_seed=dict(type='DistSamplerSeedHook'), + + # validation results visualization. + visualization=dict(type='SegVisualizationHook')) +``` + +此外,我们将原版 logger 拆分为 logger 和 visualizer。logger 用于记录信息,visualizer 用于在不同的后端显示 logger,如 terminal 和 TensorBoard。 + + + + + + + + + +
原版 + +```python +log_config = dict( + interval=100, + hooks=[ + dict(type='TextLoggerHook'), + dict(type='TensorboardLoggerHook'), + ]) +``` + +
新版 + +```python +default_hooks = dict( + ... + logger=dict(type='LoggerHook', interval=100), +) +vis_backends = [dict(type='LocalVisBackend'), + dict(type='TensorboardVisBackend')] +visualizer = dict( + type='SegLocalVisualizer', vis_backends=vis_backends, name='visualizer') +``` + +
+ +**`load_from`** 和 **`resume_from`** 中的更改: + +- 删除 `resume_from`。我们使用 `resume` 和 `load_from` 来替换它。 + - 如果 `resume=True` 且 `load_from` 为 **not None**,则从 `load_from` 中的检查点恢复训练。 + - 如果 `resume=True` 且 `load_from` 为 **None**,则尝试从工作目录中的最新检查点恢复。 + - 如果 `resume=False` 且 `load_from` 为 **not None**,则只加载检查点,而不继续训练。 + - 如果 `resume=False` 且 `load_from` 为 **None**,则不加载或恢复。 + +**`dist_params`** 中的更改:`dist_params` 字段现在是 `env_cfg` 的子字段。并且 `env_cfg` 中还有一些新的配置。 + +```python +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) +``` + +**`workflow`** 的改动:`workflow` 相关功能被删除。 + +新字段 **`visualizer`**:visualizer 是 OpenMMLab 2.0 体系结构中的新设计。我们在 runner 中使用 visualizer 实例来处理结果和日志可视化,并保存到不同的后端。更多详细信息,请参阅[可视化教程](../user_guides/visualization.md)。 + +新字段 **`default_scope`**:搜索所有注册模块的起点。MMSegmentation 中的 `default_scope` 为 `mmseg`。请参见[注册器教程](https://github.com/open-mmlab/mmengine/blob/main/docs/zh_cn/advanced_tutorials/registry.md)了解更多详情。 diff --git a/docs/zh_cn/migration/package.md b/docs/zh_cn/migration/package.md new file mode 100644 index 0000000000..d8d2245bed --- /dev/null +++ b/docs/zh_cn/migration/package.md @@ -0,0 +1,113 @@ +#包结构更改 + +本节包含您对 MMSeg 0.x 和 1.x 之间的变化感到好奇的内容。 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
MMSegmentation 0.xMMSegmentation 1.x
mmseg.apimmseg.api
- mmseg.core+ mmseg.engine
mmseg.datasetsmmseg.datasets
mmseg.modelsmmseg.models
- mmseg.ops+ mmseg.structure
mmseg.utilsmmseg.utils
+ mmseg.evaluation
+ mmseg.registry
+ +## 已删除的包 + +### `mmseg.core` + +在 OpenMMLab 2.0 中,`core` 包已被删除。`core` 的 `hooks` 和 `optimizers` 被移动到了 `mmseg.engine` 中,而 `core` 中的 `evaluation` 目前是 mmseg.evaluation。 + +## `mmseg.ops` + +`ops` 包包含 `encoding` 和 `wrappers`,它们被移到了 `mmseg.models.utils` 中。 + +## 增加的包 + +### `mmseg.engine` + +OpenMMLab 2.0 增加了一个新的深度学习训练基础库 MMEngine。它是所有 OpenMMLab 代码库的训练引擎。 +mmseg 的 `engine` 包是一些用于语义分割任务的定制模块,如 `SegVisualizationHook` 用于可视化分割掩膜。 + +### `mmseg.structure` + +在 OpenMMLab 2.0 中,我们为计算机视觉任务设计了数据结构,在 mmseg 中,我们在 `structure` 包中实现了 `SegDataSample`。 + +### `mmseg.evaluation` + +我们将所有评估指标都移动到了 `mmseg.evaluation` 中。 + +### `mmseg.registry` + +我们将 MMSegmentation 中所有类型模块的注册实现移动到 `mmseg.registry` 中。 + +## 修改的包 + +### `mmseg.apis` + +OpenMMLab 2.0 尝试支持计算机视觉的多任务统一接口,并发布了更强的 [`Runner`](https://github.com/open-mmlab/mmengine/blob/main/docs/zh_cn/design/runner.md),因此 MMSeg 1.x 删除了 `train.py` 和 `test.py` 中的模块,并将 `init_segmentor` 重命名为 `init_model`,将 `inference_segmentor` 重命名为 `inference_model`。 + +以下是 `mmseg.apis` 的更改: + +| 函数 | 变化 | +| :-------------------: | :--------------------------------------------- | +| `init_segmentor` | 重命名为 `init_model` | +| `inference_segmentor` | 重命名为 `inference_model` | +| `show_result_pyplot` | 基于 `SegLocalVisualizer` 实现 | +| `train_model` | 删除,使用 `runner.train` 训练。 | +| `multi_gpu_test` | 删除,使用 `runner.test` 测试。 | +| `single_gpu_test` | 删除,使用 `runner.test` 测试。 | +| `set_random_seed` | 删除,使用 `mmengine.runner.set_random_seed`。 | +| `init_random_seed` | 删除,使用 `mmengine.dist.sync_random_seed`。 | + +### `mmseg.datasets` + +OpenMMLab 2.0 将 `BaseDataset` 定义为数据集的函数和接口,MMSegmentation 1.x 也遵循此协议,并定义了从 `BaseDataset` 继承的 `BaseSegDataset`。MMCV 2.x 收集多种任务的通用数据转换,例如分类、检测、分割,因此 MMSegmentation 1.x 使用这些数据转换并将其从 mmseg.dataset 中删除。 + +| 包/模块 | 更改 | +| :-------------------: | :----------------------------------------------------------------------------------- | +| `mmseg.pipelines` | 移动到 `mmcv.transforms` 中 | +| `mmseg.sampler` | 移动到 `mmengine.dataset.sampler` 中 | +| `CustomDataset` | 重命名为 `BaseSegDataset` 并从 MMEngine 中的 `BaseDataset` 继承 | +| `DefaultFormatBundle` | 替换为 `PackSegInputs` | +| `LoadImageFromFile` | 移动到 `mmcv.transforms.LoadImageFromFile` 中 | +| `LoadAnnotations` | 移动到 `mmcv.transforms.LoadAnnotations` 中 | +| `Resize` | 移动到 `mmcv.transforms` 中并拆分为 `Resize`,`RandomResize` 和 `RandomChoiceResize` | +| `RandomFlip` | 移动到 `mmcv.transforms.RandomFlip` 中 | +| `Pad` | 移动到 `mmcv.transforms.Pad` 中 | +| `Normalize` | 移动到 `mmcv.transforms.Normalize` 中 | +| `Compose` | 移动到 `mmcv.transforms.Compose` 中 | +| `ImageToTensor` | 移动到 `mmcv.transforms.ImageToTensor` 中 | + +### `mmseg.models` + +`models` 没有太大变化,只是从以前的 `mmseg.ops` 中添加了 `encoding` 和 `wrappers` diff --git a/docs/zh_cn/notes/faq.md b/docs/zh_cn/notes/faq.md index 09fde025fd..6c5264c378 100644 --- a/docs/zh_cn/notes/faq.md +++ b/docs/zh_cn/notes/faq.md @@ -1,8 +1,118 @@ -# 常见问题解答(FAQ)(待更新) +# 常见问题解答(FAQ) -我们在这里列出了使用时的一些常见问题及其相应的解决方案。 如果您发现有一些问题被遗漏,请随时提 PR 丰富这个列表。 如果您无法在此获得帮助,请使用 [issue模板](https://github.com/open-mmlab/mmsegmentation/blob/master/.github/ISSUE_TEMPLATE/error-report.md/)创建问题,但是请在模板中填写所有必填信息,这有助于我们更快定位问题。 +我们在这里列出了使用时的一些常见问题及其相应的解决方案。 如果您发现有一些问题被遗漏,请随时提 PR 丰富这个列表。 如果您无法在此获得帮助,请使用 [issue 模板](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/.github/ISSUE_TEMPLATE/error-report.md/)创建问题,但是请在模板中填写所有必填信息,这有助于我们更快定位问题。 + +## 安装 + +兼容的 MMSegmentation 和 MMCV 版本如下。请安装正确版本的 MMCV 以避免安装问题。 + +| MMSegmentation version | MMCV version | MMEngine version | MMClassification (optional) version | MMDetection (optional) version | +| :--------------------: | :----------------------------: | :---------------: | :---------------------------------: | :----------------------------: | +| dev-1.x branch | mmcv >= 2.0.0rc4 | MMEngine >= 0.5.0 | mmcls>=1.0.0rc0 | mmdet >= 3.0.0rc6 | +| 1.x branch | mmcv >= 2.0.0rc4 | MMEngine >= 0.5.0 | mmcls>=1.0.0rc0 | mmdet >= 3.0.0rc6 | +| 1.0.0rc6 | mmcv >= 2.0.0rc4 | MMEngine >= 0.5.0 | mmcls>=1.0.0rc0 | mmdet >= 3.0.0rc6 | +| 1.0.0rc5 | mmcv >= 2.0.0rc4 | MMEngine >= 0.2.0 | mmcls>=1.0.0rc0 | mmdet>=3.0.0rc6 | +| 1.0.0rc4 | mmcv == 2.0.0rc3 | MMEngine >= 0.1.0 | mmcls>=1.0.0rc0 | mmdet>=3.0.0rc4, \<=3.0.0rc5 | +| 1.0.0rc3 | mmcv == 2.0.0rc3 | MMEngine >= 0.1.0 | mmcls>=1.0.0rc0 | mmdet>=3.0.0rc4 \<=3.0.0rc5 | +| 1.0.0rc2 | mmcv == 2.0.0rc3 | MMEngine >= 0.1.0 | mmcls>=1.0.0rc0 | mmdet>=3.0.0rc4 \<=3.0.0rc5 | +| 1.0.0rc1 | mmcv >= 2.0.0rc1, \<=2.0.0rc3> | MMEngine >= 0.1.0 | mmcls>=1.0.0rc0 | Not required | +| 1.0.0rc0 | mmcv >= 2.0.0rc1, \<=2.0.0rc3> | MMEngine >= 0.1.0 | mmcls>=1.0.0rc0 | Not required | + +如果您已经安装了版本不合适的 mmcv,请先运行`pip uninstall mmcv`卸载已安装的 mmcv,如您先前安装的为 mmcv-full(存在于 OpenMMLab 1.x),请运行`pip uninstall mmcv-full`进行卸载。 + +- 如出现 "No module named 'mmcv'" + 1. 使用`pip uninstall mmcv`卸载环境中现有的 mmcv + 2. 按照[安装说明](../get_started.md)安装对应的 mmcv ## 如何获知模型训练时需要的显卡数量 -- 看模型的config文件的命名。可以参考[学习配置文件](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/zh_cn/tutorials/config.md)中的`配置文件命名风格`部分。比如,对于名字为`segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py`的config文件,`8x1`代表训练其对应的模型需要的卡数为8,每张卡中的batch size为1。 -- 看模型的log文件。点开该模型的log文件,并在其中搜索`nGPU`,在`nGPU`后的数字个数即训练时所需的卡数。比如,在log文件中搜索`nGPU`得到`nGPU 0,1,2,3,4,5,6,7`的记录,则说明训练该模型需要使用八张卡。 +- 看模型的 config 文件命名。可以参考[了解配置文件](../user_guides/1_config.md)中的`配置文件命名风格`部分。比如,对于名字为`segformer_mit-b0_8xb1-160k_cityscapes-1024x1024.py`的 config 文件,`8xb1`代表训练其对应的模型需要的卡数为 8,每张卡中的 batch size 为 1。 +- 看模型的 log 文件。点开该模型的 log 文件,并在其中搜索`nGPU`,在`nGPU`后的数字个数即训练时所需的卡数。比如,在 log 文件中搜索`nGPU`得到`nGPU 0,1,2,3,4,5,6,7`的记录,则说明训练该模型需要使用八张卡。 + +## auxiliary head 是什么 + +简单来说,这是一个提高准确率的深度监督技术。在训练阶段,`decode_head`用于输出语义分割的结果,`auxiliary_head` 只是增加了一个辅助损失,其产生的分割结果对你的模型结果没有影响,仅在在训练中起作用。您可以阅读这篇[论文](https://arxiv.org/pdf/1612.01105.pdf)了解更多信息。 + +## 运行测试脚本时如何输出绘制分割掩膜的图像 + +在测试脚本中,我们提供了`--out`参数来控制是否输出保存预测的分割掩膜图像。您可以运行以下命令输出测试结果: + +```shell +python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} --out ${OUTPUT_DIR} +``` + +更多用例细节可查阅[文档](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/zh_cn/user_guides/4_train_test.md#%E6%B5%8B%E8%AF%95%E5%B9%B6%E4%BF%9D%E5%AD%98%E5%88%86%E5%89%B2%E7%BB%93%E6%9E%9C),[PR #2712](https://github.com/open-mmlab/mmsegmentation/pull/2712) 以及[迁移文档](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/zh_cn/migration/interface.md#%E6%B5%8B%E8%AF%95%E5%90%AF%E5%8A%A8)了解相关说明。 + +## 如何处理二值分割任务? + +MMSegmentation 使用 `num_classes` 和 `out_channels` 来控制模型最后一层 `self.conv_seg` 的输出。更多细节可以参考 [这里](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/models/decode_heads/decode_head.py)。 + +`num_classes` 应该和数据集本身类别个数一致,当是二值分割时,数据集只有前景和背景两类,所以 `num_classes` 为 2. `out_channels` 控制模型最后一层的输出的通道数,通常和 `num_classes` 相等,但当二值分割时候,可以有两种处理方法, 分别是: + +- 设置 `out_channels=2`,在训练时以 Cross Entropy Loss 作为损失函数,在推理时使用 `F.softmax()` 归一化 logits 值,然后通过 `argmax()` 得到每个像素的预测结果。 + +- 设置 `out_channels=1`,在训练时以 Binary Cross Entropy Loss 作为损失函数,在推理时使用 `F.sigmoid()` 和 `threshold` 得到预测结果,`threshold` 默认为 0.3。 + +对于实现上述两种计算二值分割的方法,需要在 `decode_head` 和 `auxiliary_head` 的配置里修改。下面是对样例 [pspnet_unet_s5-d16.py](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/_base_/models/pspnet_unet_s5-d16.py) 做出的对应修改。 + +- (1) `num_classes=2`, `out_channels=2` 并在 `CrossEntropyLoss` 里面设置 `use_sigmoid=False`。 + +```python +decode_head=dict( + type='PSPHead', + in_channels=64, + in_index=4, + num_classes=2, + out_channels=2, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), +auxiliary_head=dict( + type='FCNHead', + in_channels=128, + in_index=3, + num_classes=2, + out_channels=2, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), +``` + +- (2) `num_classes=2`, `out_channels=1` 并在 `CrossEntropyLoss` 里面设置 `use_sigmoid=True`. + +```python +decode_head=dict( + type='PSPHead', + in_channels=64, + in_index=4, + num_classes=2, + out_channels=1, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), +auxiliary_head=dict( + type='FCNHead', + in_channels=128, + in_index=3, + num_classes=2, + out_channels=1, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)), +``` + +## `reduce_zero_label` 的作用 + +数据集中 `reduce_zero_label` 参数类型为布尔类型,默认为 False,它的功能是为了忽略数据集 label 0。具体做法是将 label 0 改为 255,其余 label 相应编号减 1,同时 decode head 里将 255 设为 ignore index,即不参与 loss 计算。 +以下是 `reduce_zero_label` 具体实现逻辑: + +```python +if self.reduce_zero_label: + # avoid using underflow conversion + gt_semantic_seg[gt_semantic_seg == 0] = 255 + gt_semantic_seg = gt_semantic_seg - 1 + gt_semantic_seg[gt_semantic_seg == 254] = 255 +``` + +关于您的数据集是否需要使用 reduce_zero_label,有以下两类情况: + +- 例如在 [Potsdam](https://github.com/open-mmlab/mmsegmentation/blob/1.x/docs/en/user_guides/2_dataset_prepare.md#isprs-potsdam) 数据集上,有 0-不透水面、1-建筑、2-低矮植被、3-树、4-汽车、5-杂乱,六类。但该数据集提供了两种RGB标签,一种为图像边缘处有黑色像素的标签,另一种是没有黑色边缘的标签。对于有黑色边缘的标签,在 [dataset_converters.py](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/tools/dataset_converters/potsdam.py)中,其将黑色边缘转换为 label 0,其余标签分别为 1-不透水面、2-建筑、3-低矮植被、4-树、5-汽车、6-杂乱,那么此时,就应该在数据集 [potsdam.py](https://github.com/open-mmlab/mmsegmentation/blob/ff95416c3b5ce8d62b9289f743531398efce534f/mmseg/datasets/potsdam.py#L23) 中将`reduce_zero_label=True`。如果使用的是没有黑色边缘的标签,那么 mask label 中只有 0-5,此时就应该使`reduce_zero_label=False`。需要结合您的实际情况来使用。 +- 例如在第 0 类为background类别的数据集上,如果您最终是需要将背景和您的其余类别分开时,是不需要使用`reduce_zero_label`的,此时在数据集中应该将其设置为`reduce_zero_label=False` + +**注意:** 使用 `reduce_zero_label` 请确认数据集原始类别个数,如果只有两类,需要关闭 `reduce_zero_label` 即设置 `reduce_zero_label=False`。 diff --git a/docs/zh_cn/user_guides/2_dataset_prepare.md b/docs/zh_cn/user_guides/2_dataset_prepare.md index c9c3606977..e5e6c76c47 100644 --- a/docs/zh_cn/user_guides/2_dataset_prepare.md +++ b/docs/zh_cn/user_guides/2_dataset_prepare.md @@ -1,3 +1,618 @@ -## 准备数据集(待更新) +# 教程2:准备数据集 -中文版文档支持中,请先阅读[英文版本](../../en/user_guides/2_dataset_prepare.md) +我们建议将数据集根目录符号链接到 `$MMSEGMENTATION/data`。 +如果您的目录结构不同,您可能需要更改配置文件中相应的路径。 + +```none +mmsegmentation +├── mmseg +├── tools +├── configs +├── data +│ ├── cityscapes +│ │ ├── leftImg8bit +│ │ │ ├── train +│ │ │ ├── val +│ │ ├── gtFine +│ │ │ ├── train +│ │ │ ├── val +│ ├── VOCdevkit +│ │ ├── VOC2012 +│ │ │ ├── JPEGImages +│ │ │ ├── SegmentationClass +│ │ │ ├── ImageSets +│ │ │ │ ├── Segmentation +│ │ ├── VOC2010 +│ │ │ ├── JPEGImages +│ │ │ ├── SegmentationClassContext +│ │ │ ├── ImageSets +│ │ │ │ ├── SegmentationContext +│ │ │ │ │ ├── train.txt +│ │ │ │ │ ├── val.txt +│ │ │ ├── trainval_merged.json +│ │ ├── VOCaug +│ │ │ ├── dataset +│ │ │ │ ├── cls +│ ├── ade +│ │ ├── ADEChallengeData2016 +│ │ │ ├── annotations +│ │ │ │ ├── training +│ │ │ │ ├── validation +│ │ │ ├── images +│ │ │ │ ├── training +│ │ │ │ ├── validation +│ ├── coco_stuff10k +│ │ ├── images +│ │ │ ├── train2014 +│ │ │ ├── test2014 +│ │ ├── annotations +│ │ │ ├── train2014 +│ │ │ ├── test2014 +│ │ ├── imagesLists +│ │ │ ├── train.txt +│ │ │ ├── test.txt +│ │ │ ├── all.txt +│ ├── coco_stuff164k +│ │ ├── images +│ │ │ ├── train2017 +│ │ │ ├── val2017 +│ │ ├── annotations +│ │ │ ├── train2017 +│ │ │ ├── val2017 +│ ├── CHASE_DB1 +│ │ ├── images +│ │ │ ├── training +│ │ │ ├── validation +│ │ ├── annotations +│ │ │ ├── training +│ │ │ ├── validation +│ ├── DRIVE +│ │ ├── images +│ │ │ ├── training +│ │ │ ├── validation +│ │ ├── annotations +│ │ │ ├── training +│ │ │ ├── validation +│ ├── HRF +│ │ ├── images +│ │ │ ├── training +│ │ │ ├── validation +│ │ ├── annotations +│ │ │ ├── training +│ │ │ ├── validation +│ ├── STARE +│ │ ├── images +│ │ │ ├── training +│ │ │ ├── validation +│ │ ├── annotations +│ │ │ ├── training +│ │ │ ├── validation +| ├── dark_zurich +| │   ├── gps +| │   │   ├── val +| │   │   └── val_ref +| │   ├── gt +| │   │   └── val +| │   ├── LICENSE.txt +| │   ├── lists_file_names +| │   │   ├── val_filenames.txt +| │   │   └── val_ref_filenames.txt +| │   ├── README.md +| │   └── rgb_anon +| │   | ├── val +| │   | └── val_ref +| ├── NighttimeDrivingTest +| | ├── gtCoarse_daytime_trainvaltest +| | │   └── test +| | │   └── night +| | └── leftImg8bit +| | | └── test +| | | └── night +│ ├── loveDA +│ │ ├── img_dir +│ │ │ ├── train +│ │ │ ├── val +│ │ │ ├── test +│ │ ├── ann_dir +│ │ │ ├── train +│ │ │ ├── val +│ ├── potsdam +│ │ ├── img_dir +│ │ │ ├── train +│ │ │ ├── val +│ │ ├── ann_dir +│ │ │ ├── train +│ │ │ ├── val +│ ├── vaihingen +│ │ ├── img_dir +│ │ │ ├── train +│ │ │ ├── val +│ │ ├── ann_dir +│ │ │ ├── train +│ │ │ ├── val +│ ├── iSAID +│ │ ├── img_dir +│ │ │ ├── train +│ │ │ ├── val +│ │ │ ├── test +│ │ ├── ann_dir +│ │ │ ├── train +│ │ │ ├── val +│ ├── synapse +│ │ ├── img_dir +│ │ │ ├── train +│ │ │ ├── val +│ │ ├── ann_dir +│ │ │ ├── train +│ │ │ ├── val +│ ├── REFUGE +│ │ ├── images +│ │ │ ├── training +│ │ │ ├── validation +│ │ │ ├── test +│ │ ├── annotations +│ │ │ ├── training +│ │ │ ├── validation +│ │ │ ├── test +│ ├── mapillary +│ │ ├── training +│ │ │ ├── images +│ │ │ ├── v1.2 +| │ │ │ ├── instances +| │ │ │ ├── labels +| │   │   │ └── panoptic +│ │ │ ├── v2.0 +| │ │ │ ├── instances +| │ │ │ ├── labels +| │ │ │ ├── panoptic +| │   │   │ └── polygons +│ │ ├── validation +│ │ │ ├── images +| │ │ ├── v1.2 +| │ │ │ ├── instances +| │ │ │ ├── labels +| │   │   │ └── panoptic +│ │ │ ├── v2.0 +| │ │ │ ├── instances +| │ │ │ ├── labels +| │ │ │ ├── panoptic +| │   │   │ └── polygons +``` + +## Cityscapes + +Cityscapes [官方网站](https://www.cityscapes-dataset.com/)可以下载 Cityscapes 数据集,按照官网要求注册并登陆后,数据可以在[这里](https://www.cityscapes-dataset.com/downloads/)找到。 + +按照惯例,`**labelTrainIds.png` 用于 cityscapes 训练。 +我们提供了一个基于 [cityscapesscripts](https://github.com/mcordts/cityscapesScripts) 的[脚本](https://github.com/open-mmlab/mmsegmentation/blob/1.x/tools/dataset_converters/cityscapes.py)用于生成 `**labelTrainIds.png`。 + +```shell +# --nproc 表示 8 个转换进程,也可以省略。 +python tools/dataset_converters/cityscapes.py data/cityscapes --nproc 8 +``` + +## Pascal VOC + +Pascal VOC 2012 可从[此处](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar)下载。 +此外,Pascal VOC 数据集的最新工作通常利用额外的增强数据,可以在[这里](http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz)找到。 + +如果您想使用增强的 VOC 数据集,请运行以下命令将增强数据的标注转换为正确的格式。 + +```shell +# --nproc 表示 8 个转换进程,也可以省略。 +python tools/dataset_converters/voc_aug.py data/VOCdevkit data/VOCdevkit/VOCaug --nproc 8 +``` + +请参考[拼接数据集文档](../advanced_guides/add_datasets.md#拼接数据集)及 [voc_aug 配置示例](../../../configs/_base_/datasets/pascal_voc12_aug.py)以详细了解如何将它们拼接并合并训练。 + +## ADE20K + +ADE20K 的训练和验证集可以从这个[链接](http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip)下载。 +如果需要下载测试数据集,可以在[官网](http://host.robots.ox.ac.uk/)注册后,下载[测试集](http://host.robots.ox.ac.uk:8080/eval/downloads/VOC2010test.tar)。 + +## Pascal Context + +Pascal Context 的训练和验证集可以从[此处](http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar)下载。注册后,您也可以从[此处](http://host.robots.ox.ac.uk:8080/eval/downloads/VOC2010test.tar)下载测试集。 + +从原始数据集中抽出部分数据作为验证集,您可以从[此处](https://codalabuser.blob.core.windows.net/public/trainval_merged.json)下载 trainval_merged.json 文件。 + +请先安装 [Detail](https://github.com/zhanghang1989/detail-api) 工具然后运行以下命令将标注转换为正确的格式。 + +```shell +python tools/dataset_converters/pascal_context.py data/VOCdevkit data/VOCdevkit/VOC2010/trainval_merged.json +``` + +## COCO Stuff 10k + +数据可以通过 wget 在[这里](http://calvin.inf.ed.ac.uk/wp-content/uploads/data/cocostuffdataset/cocostuff-10k-v1.1.zip)下载。 + +对于 COCO Stuff 10k 数据集,请运行以下命令下载并转换数据集。 + +```shell +# 下载 +mkdir coco_stuff10k && cd coco_stuff10k +wget http://calvin.inf.ed.ac.uk/wp-content/uploads/data/cocostuffdataset/cocostuff-10k-v1.1.zip + +# 解压 +unzip cocostuff-10k-v1.1.zip + +# --nproc 表示 8 个转换进程,也可以省略。 +python tools/dataset_converters/coco_stuff10k.py /path/to/coco_stuff10k --nproc 8 +``` + +按照惯例,`/path/to/coco_stuff164k/annotations/*2014/*_labelTrainIds.png` 中的 mask 标注用于 COCO Stuff 10k 的训练和测试。 + +## COCO Stuff 164k + +对于 COCO Stuff 164k 数据集,请运行以下命令下载并转换增强的数据集。 + +```shell +# 下载 +mkdir coco_stuff164k && cd coco_stuff164k +wget http://images.cocodataset.org/zips/train2017.zip +wget http://images.cocodataset.org/zips/val2017.zip +wget http://calvin.inf.ed.ac.uk/wp-content/uploads/data/cocostuffdataset/stuffthingmaps_trainval2017.zip + +# 解压 +unzip train2017.zip -d images/ +unzip val2017.zip -d images/ +unzip stuffthingmaps_trainval2017.zip -d annotations/ + +# --nproc 表示 8 个转换进程,也可以省略。 +python tools/dataset_converters/coco_stuff164k.py /path/to/coco_stuff164k --nproc 8 +``` + +按照惯例,`/path/to/coco_stuff164k/annotations/*2017/*_labelTrainIds.png` 中的 mask 标注用于 COCO Stuff 164k 的训练和测试。 + +此数据集的详细信息可在[此处](https://github.com/nightrome/cocostuff#downloads)找到。 + +## CHASE DB1 + +CHASE DB1 的训练和验证集可以从[此处](https://staffnet.kingston.ac.uk/~ku15565/CHASE_DB1/assets/CHASEDB1.zip)下载。 + +请运行以下命令,准备 CHASE DB1 数据集: + +```shell +python tools/dataset_converters/chase_db1.py /path/to/CHASEDB1.zip +``` + +该脚本将自动调整数据集目录结构,使其满足 MMSegmentation 数据集加载要求。 + +## DRIVE + +按照[官网](https://drive.grand-challenge.org/)要求,注册并登陆后,便可以下载 DRIVE 的训练和验证数据集。 + +要将 DRIVE 数据集转换为 MMSegmentation 的格式,请运行以下命令: + +```shell +python tools/dataset_converters/drive.py /path/to/training.zip /path/to/test.zip +``` + +该脚本将自动调整数据集目录结构,使其满足 MMSegmentation 数据集加载要求。 + +## HRF + +请下载 [health.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/healthy.zip)、[glaucoma.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/glaucoma.zip)、[diabetic_retinopathy.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/diabetic_retinopathy.zip)、[healthy_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/healthy_manualsegm.zip)、[glaucoma_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/glaucoma_manualsegm.zip) 和 [diabetic_retinopathy_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/diabetic_retinopathy_manualsegm.zip),无需解压,可以直接运行以下命令,准备 HRF 数据集: + +```shell +python tools/dataset_converters/hrf.py /path/to/healthy.zip /path/to/healthy_manualsegm.zip /path/to/glaucoma.zip /path/to/glaucoma_manualsegm.zip /path/to/diabetic_retinopathy.zip /path/to/diabetic_retinopathy_manualsegm.zip +``` + +该脚本将自动调整数据集目录结构,使其满足 MMSegmentation 数据集加载要求。 + +## STARE + +请下载 [stare images.tar](http://cecas.clemson.edu/~ahoover/stare/probing/stare-images.tar)、[labels-ah.tar](http://cecas.clemson.edu/~ahoover/stare/probing/labels-ah.tar) 和 [labels-vk.tar](http://cecas.clemson.edu/~ahoover/stare/probing/labels-vk.tar),无需解压,可以直接运行以下命令,准备 STARE 数据集: + +```shell +python tools/dataset_converters/stare.py /path/to/stare-images.tar /path/to/labels-ah.tar /path/to/labels-vk.tar +``` + +该脚本将自动调整数据集目录结构,使其满足 MMSegmentation 数据集加载要求。 + +## Dark Zurich + +由于我们只支持在此数据集上的模型测试,因此您只需要下载并解压[验证数据集](https://data.vision.ee.ethz.ch/csakarid/shared/GCMA_UIoU/Dark_Zurich_val_anon.zip)。 + +## Nighttime Driving + +由于我们只支持在此数据集上的模型测试,因此您只需要下载并解压[验证数据集](http://data.vision.ee.ethz.ch/daid/NighttimeDriving/NighttimeDrivingTest.zip)。 + +## LoveDA + +数据可以从[此处](https://drive.google.com/drive/folders/1ibYV0qwn4yuuh068Rnc-w4tPi0U0c-ti?usp=sharing)下载 LaveDA 数据集。 + +或者可以从 [zenodo](https://zenodo.org/record/5706578#.YZvN7SYRXdF) 下载。下载后,无需解压,直接运行以下命令: + +```shell +# 下载 Train.zip +wget https://zenodo.org/record/5706578/files/Train.zip +# 下载 Val.zip +wget https://zenodo.org/record/5706578/files/Val.zip +# 下载 Test.zip +wget https://zenodo.org/record/5706578/files/Test.zip +``` + +请对于 LoveDA 数据集,请运行以下命令调整数据集目录。 + +```shell +python tools/dataset_converters/loveda.py /path/to/loveDA +``` + +可将模型对 LoveDA 的测试集的预测结果上传至到数据集[测试服务器](https://codalab.lisn.upsaclay.fr/competitions/421),查看评测结果。 + +有关 LoveDA 的更多详细信息,可查看[此处](https://github.com/Junjue-Wang/LoveDA). + +## ISPRS Potsdam + +[Potsdam](https://www2.isprs.org/commissions/comm2/wg4/benchmark/2d-sem-label-potsdam/) 城市语义分割数据集用于 2D 语义分割竞赛 —— Potsdam。 + +数据集可以在竞赛[主页](https://www2.isprs.org/commissions/comm2/wg4/benchmark/data-request-form/)上请求获得。 +实验中需要下载 '2_Ortho_RGB.zip' 和 '5_Labels_all_noBoundary.zip'。 + +对于 Potsdam 数据集,请运行以下命令调整数据集目录。 + +```shell +python tools/dataset_converters/potsdam.py /path/to/potsdam +``` + +在我们的默认设置中,将生成 3456 张图像用于训练和 2016 张图像用于验证。 + +## ISPRS Vaihingen + +[Vaihingen](https://www2.isprs.org/commissions/comm2/wg4/benchmark/2d-sem-label-vaihingen/) 城市语义分割数据集用于 2D 语义分割竞赛 —— Vaihingen。 + +数据集可以在竞赛[主页](https://www2.isprs.org/commissions/comm2/wg4/benchmark/data-request-form/)上请求获得。 +实验中需要下载 'ISPRS_semantic_labeling_Vaihingen.zip' 和 'ISPRS_semantic_labeling_Vaihingen_ground_truth_eroded_COMPLETE.zip'。 + +对于 Vaihingen 数据集,请运行以下命令调整数据集目录。 + +```shell +python tools/dataset_converters/vaihingen.py /path/to/vaihingen +``` + +在我们的默认设置(`clip_size`=512, `stride_size`=256)中,将生成 344 张图像用于训练和 398 张图像用于验证。 + +## iSAID + +iSAID 数据集可从 [DOTA-v1.0](https://captain-whu.github.io/DOTA/dataset.html) 下载训练/验证/测试数据集的图像数据, + +并从 [iSAID](https://captain-whu.github.io/iSAID/dataset.html)下载训练/验证数据集的标注数据。 + +该数据集是航空图像实例分割和语义分割任务的大规模数据集。 + +下载 iSAID 数据集后,您可能需要按照以下结构进行数据集准备。 + +```none +├── data +│ ├── iSAID +│ │ ├── train +│ │ │ ├── images +│ │ │ │ ├── part1.zip +│ │ │ │ ├── part2.zip +│ │ │ │ ├── part3.zip +│ │ │ ├── Semantic_masks +│ │ │ │ ├── images.zip +│ │ ├── val +│ │ │ ├── images +│ │ │ │ ├── part1.zip +│ │ │ ├── Semantic_masks +│ │ │ │ ├── images.zip +│ │ ├── test +│ │ │ ├── images +│ │ │ │ ├── part1.zip +│ │ │ │ ├── part2.zip +``` + +```shell +python tools/dataset_converters/isaid.py /path/to/iSAID +``` + +在我们的默认设置(`patch_width`=896, `patch_height`=896, `overlap_area`=384)中,将生成 33978 张图像用于训练和 11644 张图像用于验证。 + +## LIP(Look Into Person) dataset + +该数据集可以从[此页面](https://lip.sysuhcp.com/overview.php)下载。 + +请运行以下命令来解压数据集。 + +```shell +unzip LIP.zip +cd LIP +unzip TrainVal_images.zip +unzip TrainVal_parsing_annotations.zip +cd TrainVal_parsing_annotations +unzip TrainVal_parsing_annotations.zip +mv train_segmentations ../ +mv val_segmentations ../ +cd .. +``` + +LIP 数据集的内容包括: + +```none +├── data +│ ├── LIP +│ │ ├── train_images +│   │ │ ├── 1000_1234574.jpg +│   │ │ ├── ... +│ │ ├── train_segmentations +│   │ │ ├── 1000_1234574.png +│   │ │ ├── ... +│ │ ├── val_images +│   │ │ ├── 100034_483681.jpg +│   │ │ ├── ... +│ │ ├── val_segmentations +│   │ │ ├── 100034_483681.png +│   │ │ ├── ... +``` + +## Synapse dataset + +此数据集可以从[此页面](https://www.synapse.org/#!Synapse:syn3193805/wiki/)下载。 + +遵循 [TransUNet](https://arxiv.org/abs/2102.04306) 的数据准备设定,将原始训练集(30 次扫描)拆分为新的训练集(18 次扫描)和验证集(12 次扫描)。请运行以下命令来准备数据集。 + +```shell +unzip RawData.zip +cd ./RawData/Training +``` + +然后创建 `train.txt` 和 `val.txt` 以拆分数据集。 + +根据 TransUnet,以下是数据集的划分。 + +train.txt + +```none +img0005.nii.gz +img0006.nii.gz +img0007.nii.gz +img0009.nii.gz +img0010.nii.gz +img0021.nii.gz +img0023.nii.gz +img0024.nii.gz +img0026.nii.gz +img0027.nii.gz +img0028.nii.gz +img0030.nii.gz +img0031.nii.gz +img0033.nii.gz +img0034.nii.gz +img0037.nii.gz +img0039.nii.gz +img0040.nii.gz +``` + +val.txt + +```none +img0008.nii.gz +img0022.nii.gz +img0038.nii.gz +img0036.nii.gz +img0032.nii.gz +img0002.nii.gz +img0029.nii.gz +img0003.nii.gz +img0001.nii.gz +img0004.nii.gz +img0025.nii.gz +img0035.nii.gz +``` + +synapse 数据集的内容包括: + +```none +├── Training +│ ├── img +│ │ ├── img0001.nii.gz +│ │ ├── img0002.nii.gz +│ │ ├── ... +│ ├── label +│ │ ├── label0001.nii.gz +│ │ ├── label0002.nii.gz +│ │ ├── ... +│ ├── train.txt +│ ├── val.txt +``` + +然后,使用此命令转换 synapse 数据集。 + +```shell +python tools/dataset_converters/synapse.py --dataset-path /path/to/synapse +``` + +注意,MMSegmentation 的默认评估指标(例如 mean dice value)是在 2D 切片图像上计算的,这与 [TransUNet](https://arxiv.org/abs/2102.04306) 等一些论文中的 3D 扫描结果是不同的。 + +## REFUGE + +在 [REFUGE Challenge](https://refuge.grand-challenge.org) 官网上注册并下载 [REFUGE 数据集](https://refuge.grand-challenge.org/REFUGE2Download)。 + +然后,解压 `REFUGE2.zip`,原始数据集的内容包括: + +```none +├── REFUGE2 +│ ├── REFUGE2 +│ │ ├── Annotation-Training400.zip +│ │ ├── REFUGE-Test400.zip +│ │ ├── REFUGE-Test-GT.zip +│ │ ├── REFUGE-Training400.zip +│ │ ├── REFUGE-Validation400.zip +│ │ ├── REFUGE-Validation400-GT.zip +│ ├── __MACOSX +``` + +请运行以下命令转换 REFUGE 数据集: + +```shell +python tools/convert_datasets/refuge.py --raw_data_root=/path/to/refuge/REFUGE2/REFUGE2 +``` + +脚本会将目录结构转换如下: + +```none +│ ├── REFUGE +│ │ ├── images +│ │ │ ├── training +│ │ │ ├── validation +│ │ │ ├── test +│ │ ├── annotations +│ │ │ ├── training +│ │ │ ├── validation +│ │ │ ├── test +``` + +包含 400 张用于训练的图像、400 张用于验证的图像和 400 张用于测试的图像,这与 REFUGE 2018 数据集相同。 + +## Mapillary Vistas Datasets + +- Mapillary Vistas [官方网站](https://www.mapillary.com/dataset/vistas) 可以下载 Mapillary Vistas 数据集,按照官网要求注册并登陆后,数据可以在[这里](https://www.mapillary.com/dataset/vistas)找到。 + +- Mapillary Vistas 数据集使用 8-bit with color-palette 来存储标签。不需要进行转换操作。 + +- 假设您已将数据集 zip 文件放在 `mmsegmentation/data/mapillary` 中 + +- 请运行以下命令来解压数据集。 + + ```bash + cd data/mapillary + unzip An-ZjB1Zm61yAZG0ozTymz8I8NqI4x0MrYrh26dq7kPgfu8vf9ImrdaOAVOFYbJ2pNAgUnVGBmbue9lTgdBOb5BbKXIpFs0fpYWqACbrQDChAA2fdX0zS9PcHu7fY8c-FOvyBVxPNYNFQuM.zip + ``` + +- 解压后,您将获得类似于此结构的 Mapillary Vistas 数据集。语义分割 mask 标签在 `labels` 文件夹中。 + + ```none + mmsegmentation + ├── mmseg + ├── tools + ├── configs + ├── data + │ ├── mapillary + │ │ ├── training + │ │ │ ├── images + │ │ │ ├── v1.2 + | │ │ │ ├── instances + | │ │ │ ├── labels + | │   │   │ └── panoptic + │ │ │ ├── v2.0 + | │ │ │ ├── instances + | │ │ │ ├── labels + | │ │ │ ├── panoptic + | │   │   │ └── polygons + │ │ ├── validation + │ │ │ ├── images + | │ │ ├── v1.2 + | │ │ │ ├── instances + | │ │ │ ├── labels + | │   │   │ └── panoptic + │ │ │ ├── v2.0 + | │ │ │ ├── instances + | │ │ │ ├── labels + | │ │ │ ├── panoptic + | │   │   │ └── polygons + ``` + +- 您可以在配置中使用 `MapillaryDataset_v1` 和 `Mapillary Dataset_v2` 设置数据集版本。 + 在此处 [V1.2](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/_base_/datasets/mapillary_v1.py) 和 [V2.0](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/_base_/datasets/mapillary_v2.py) 查看 Mapillary Vistas 数据集配置文件 diff --git a/docs/zh_cn/user_guides/3_inference.md b/docs/zh_cn/user_guides/3_inference.md index d2fe60076f..0afcb4b05d 100644 --- a/docs/zh_cn/user_guides/3_inference.md +++ b/docs/zh_cn/user_guides/3_inference.md @@ -1,3 +1,244 @@ -## 使用预训练模型推理(待更新) +# 教程3:使用预训练模型推理 -中文版文档支持中,请先阅读[英文版本](../../en/user_guides/3_inference.md) +MMSegmentation 在 [Model Zoo](../Model_Zoo.md) 中为语义分割提供了预训练的模型,并支持多个标准数据集,包括 Cityscapes、ADE20K 等。 +本说明将展示如何使用现有模型对给定图像进行推理。 +关于如何在标准数据集上测试现有模型,请参阅本[指南](./4_train_test.md) + +MMSegmentation 为用户提供了数个接口,以便轻松使用预训练的模型进行推理。 + +- [教程3:使用预训练模型推理](#教程3使用预训练模型推理) + - [推理器](#推理器) + - [基本使用](#基本使用) + - [初始化](#初始化) + - [可视化预测结果](#可视化预测结果) + - [模型列表](#模型列表) + - [推理 API](#推理-api) + - [mmseg.apis.init_model](#mmsegapisinit_model) + - [mmseg.apis.inference_model](#mmsegapisinference_model) + - [mmseg.apis.show_result_pyplot](#mmsegapisshow_result_pyplot) + +## 推理器 + +在 MMSegmentation 中,我们提供了最**方便的**方式 `MMSegInferencer` 来使用模型。您只需 3 行代码就可以获得图像的分割掩膜。 + +### 基本使用 + +以下示例展示了如何使用 `MMSegInferencer` 对单个图像执行推理。 + +``` +>>> from mmseg.apis import MMSegInferencer +>>> # 将模型加载到内存中 +>>> inferencer = MMSegInferencer(model='deeplabv3plus_r18-d8_4xb2-80k_cityscapes-512x1024') +>>> # 推理 +>>> inferencer('demo/demo.png', show=True) +``` + +可视化结果应如下所示: + +
+ +
+ +此外,您可以使用 `MMSegInferencer` 来处理一个包含多张图片的 `list`: + +``` +# 输入一个图片 list +>>> images = [image1, image2, ...] # image1 可以是文件路径或 np.ndarray +>>> inferencer(images, show=True, wait_time=0.5) # wait_time 是延迟时间,0 表示无限 + +# 或输入图像目录 +>>> images = $IMAGESDIR +>>> inferencer(images, show=True, wait_time=0.5) + +# 保存可视化渲染彩色分割图和预测结果 +# out_dir 是保存输出结果的目录,img_out_dir 和 pred_out_dir 为 out_dir 的子目录 +# 以保存可视化渲染彩色分割图和预测结果 +>>> inferencer(images, out_dir='outputs', img_out_dir='vis', pred_out_dir='pred') +``` + +推理器有一个可选参数 `return_datasamples`,其默认值为 False,推理器的返回值默认为 `dict` 类型,包括 'visualization' 和 'predictions' 两个 key。 +如果 `return_datasamples=True` 推理器将返回 [`SegDataSample`](../advanced_guides/structures.md) 或其列表。 + +``` +result = inferencer('demo/demo.png') +# 结果是一个包含 'visualization' 和 'predictions' 两个 key 的 `dict` +# 'visualization' 包含彩色分割图 +print(result['visualization'].shape) +# (512, 683, 3) + +# 'predictions' 包含带有标签索引的分割掩膜 +print(result['predictions'].shape) +# (512, 683) + +result = inferencer('demo/demo.png', return_datasamples=True) +print(type(result)) +# + +# 输入一个图片 list +results = inferencer(images) +# 输出为列表 +print(type(results['visualization']), results['visualization'][0].shape) +# (512, 683, 3) +print(type(results['predictions']), results['predictions'][0].shape) +# (512, 683) + +results = inferencer(images, return_datasamples=True) +# +print(type(results[0])) +# +``` + +### 初始化 + +`MMSegInferencer` 必须使用 `model` 初始化,该 `model` 可以是模型名称或一个 `Config`,甚至可以是配置文件的路径。 +模型名称可以在模型的元文件(configs/xxx/metafile.yaml)中找到,比如 maskformer 的一个模型名称是 `maskformer_r50-d32_8xb2-160k_ade20k-512x512`,如果输入模型名称,模型的权重将自动下载。以下是其他输入参数: + +- weights(str,可选)- 权重的路径。如果未指定,并且模型是元文件中的模型名称,则权重将从元文件加载。默认为 None。 +- classes(list,可选)- 输入类别用于结果渲染,由于分割模型的预测结构是标签索引的分割图,`classes` 是一个相应的标签索引的类别列表。若 classes 没有定义,可视化工具将默认使用 `cityscapes` 的类别。默认为 None。 +- palette(list,可选)- 输入调色盘用于结果渲染,它是对应分类的配色列表。若 palette 没有定义,可视化工具将默认使用 `cityscapes` 的调色盘。默认为 None。 +- dataset_name(str,可选)- [数据集名称或别名](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/utils/class_names.py#L302-L317),可视化工具将使用数据集的元信息,如类别和配色,但 `classes` 和 `palette` 具有更高的优先级。默认为 None。 +- device(str,可选)- 运行推理的设备。如果无,则会自动使用可用的设备。默认为 None。 +- scope(str,可选)- 模型的作用域。默认为 'mmseg'。 + +### 可视化预测结果 + +`MMSegInferencer` 有4个用于可视化预测的参数,您可以在初始化推理器时使用它们: + +- show(bool)- 是否弹出窗口显示图像。默认为 False。 +- wait_time(float)- 显示的间隔。默认值为 0。 +- img_out_dir(str)- `out_dir` 的子目录,用于保存渲染有色分割掩膜,因此如果要保存预测掩膜,则必须定义 `out_dir`。默认为 `vis`。 +- opacity(int,float)- 分割掩膜的透明度。默认值为 0.8。 + +这些参数的示例请参考[基本使用](#基本使用) + +### 模型列表 + +在 MMSegmentation 中有一个非常容易列出所有模型名称的方法 + +``` +>>> from mmseg.apis import MMSegInferencer +# models 是一个模型名称列表,它们将自动打印 +>>> models = MMSegInferencer.list_models('mmseg') +``` + +## 推理 API + +### mmseg.apis.init_model + +从配置文件初始化一个分割器。 + +参数: + +- config(str,`Path` 或 `mmengine.Config`)- 配置文件路径或配置对象。 +- checkpoint(str,可选)- 权重路径。如果为 None,则模型将不会加载任何权重。 +- device(str,可选)- CPU/CUDA 设备选项。默认为 'cuda:0'。 +- cfg_options(dict,可选)- 用于覆盖所用配置中的某些设置的选项。 + +返回值: + +- nn.Module:构建好的分割器。 + +示例: + +```python +from mmseg.apis import init_model + +config_path = 'configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py' +checkpoint_path = 'checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth' + +# 初始化不带权重的模型 +model = init_model(config_path) + +# 初始化模型并加载权重 +model = init_model(config_path, checkpoint_path) + +# 在 CPU 上的初始化模型并加载权重 +model = init_model(config_path, checkpoint_path, 'cpu') +``` + +### mmseg.apis.inference_model + +使用分割器推理图像。 + +参数: + +- model(nn.Module)- 加载的分割器 +- imgs(str,np.ndarray 或 list\[str/np.ndarray\])- 图像文件或加载的图像 + +返回值: + +- `SegDataSample` 或 list\[`SegDataSample`\]:如果 imgs 是列表或元组,则返回相同长度的列表类型结果,否则直接返回分割结果。 + +**注意:** [SegDataSample](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/structures/seg_data_sample.py) 是 MMSegmentation 的数据结构接口,用作不同组件之间的接口。`SegDataSample` 实现抽象数据元素 `mmengine.structures.BaseDataElement`,请参阅 [MMEngine](https://github.com/open-mmlab/mmengine) 中的数据元素[文档](https://mmengine.readthedocs.io/zh_CN/latest/advanced_tutorials/data_element.html)了解更多信息。 + +`SegDataSample` 中的参数分为几个部分: + +- `gt_sem_seg`(`PixelData`)- 语义分割的标注。 +- `pred_sem_seg`(`PixelData`)- 语义分割的预测。 +- `seg_logits`(`PixelData`)- 模型最后一层的输出结果。 + +**注意:** [PixelData](https://github.com/open-mmlab/mmengine/blob/main/mmengine/structures/pixel_data.py) 是像素级标注或预测的数据结构,请参阅 [MMEngine](https://github.com/open-mmlab/mmengine) 中的 PixelData [文档](https://mmengine.readthedocs.io/en/latest/advanced_tutorials/data_element.html)了解更多信息。 + +示例: + +```python +from mmseg.apis import init_model, inference_model + +config_path = 'configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py' +checkpoint_path = 'checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth' +img_path = 'demo/demo.png' + + +model = init_model(config_path, checkpoint_path) +result = inference_model(model, img_path) +``` + +### mmseg.apis.show_result_pyplot + +在图像上可视化分割结果。 + +参数: + +- model(nn.Module)- 加载的分割器。 +- img(str 或 np.ndarray)- 图像文件名或加载的图像。 +- result(`SegDataSample`)- SegDataSample 预测结果。 +- opacity(float)- 绘制分割图的不透明度。默认值为 `0.5`,必须在 `(0,1]` 范围内。 +- title(str)- pyplot 图的标题。默认值为 ''。 +- draw_gt(bool)- 是否绘制 GT SegDataSample。默认为 `True`。 +- draw_pred(draws_pred)- 是否绘制预测 SegDataSample。默认为 `True`。 +- wait_time(float)- 显示的间隔,0 是表示“无限”的特殊值。默认为 `0`。 +- show(bool)- 是否展示绘制的图像。默认为 `True`。 +- save_dir(str,可选)- 为所有存储后端保存的文件路径。如果为 `None`,则后端存储将不会保存任何数据。 +- out_file(str,可选)- 输出文件的路径。默认为 `None`。 + +返回值: + +- np.ndarray:通道为 RGB 的绘制图像。 + +示例: + +```python +from mmseg.apis import init_model, inference_model, show_result_pyplot + +config_path = 'configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py' +checkpoint_path = 'checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth' +img_path = 'demo/demo.png' + + +# 从配置文件和权重文件构建模型 +model = init_model(config_path, checkpoint_path, device='cuda:0') + +# 推理给定图像 +result = inference_model(model, img_path) + +# 展示分割结果 +vis_image = show_result_pyplot(model, img_path, result) + +# 保存可视化结果,输出图像将在 `workdirs/result.png` 路径下找到 +vis_iamge = show_result_pyplot(model, img_path, result, out_file='work_dirs/result.png') + +# 修改展示图像的时间,注意 0 是表示“无限”的特殊值 +vis_image = show_result_pyplot(model, img_path, result, wait_time=5) +``` + +**注意:** 如果当前设备没有图形用户界面,建议将 `show` 设置为 `False`,并指定 `out_file` 或 `save_dir` 来保存结果。如果您想在窗口上显示结果,则不需要特殊设置。 diff --git a/docs/zh_cn/user_guides/4_train_test.md b/docs/zh_cn/user_guides/4_train_test.md index 309e046b2d..f821acaf52 100644 --- a/docs/zh_cn/user_guides/4_train_test.md +++ b/docs/zh_cn/user_guides/4_train_test.md @@ -223,3 +223,95 @@ GPUS=4 sh tools/slurm_train.sh dev pspnet configs/pspnet/pspnet_r50-d8_512x1024_ CUDA_VISIBLE_DEVICES=0,1,2,3 GPUS=4 MASTER_PORT=29500 sh tools/slurm_train.sh ${分区} ${任务名} config1.py ${工作路径} CUDA_VISIBLE_DEVICES=4,5,6,7 GPUS=4 MASTER_PORT=29501 sh tools/slurm_train.sh ${分区} ${任务名} config2.py ${工作路径} ``` + +## 测试并保存分割结果 + +### 基础使用 + +当需要保存测试输出的分割结果,用 `--out` 指定分割结果输出路径 + +```shell +python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} --out ${OUTPUT_DIR} +``` + +以保存模型 `fcn_r50-d8_4xb4-80k_ade20k-512x512` 在 ADE20K 验证数据集上的结果为例: + +```shell +python tools/test.py configs/fcn/fcn_r50-d8_4xb4-80k_ade20k-512x512.py ckpt/fcn_r50-d8_512x512_80k_ade20k_20200614_144016-f8ac5082.pth --out work_dirs/format_results +``` + +或者通过配置文件定义 `output_dir`。例如在 `configs/fcn/fcn_r50-d8_4xb4-80k_ade20k-512x512.py` 添加 `test_evaluator` 定义: + +```python +test_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'], output_dir='work_dirs/format_results') +``` + +然后执行相同功能的命令不需要再使用 `--out`: + +```shell +python tools/test.py configs/fcn/fcn_r50-d8_4xb4-80k_ade20k-512x512.py ckpt/fcn_r50-d8_512x512_80k_ade20k_20200614_144016-f8ac5082.pth +``` + +当测试的数据集没有提供标注,评测时没有真值可以参与计算,因此需要设置 `format_only=True`, +同时需要修改 `test_dataloader`,由于没有标注,我们需要在数据增强变换中删掉 `dict(type='LoadAnnotations')`,以下是一个配置示例: + +```python +test_evaluator = dict( + type='IoUMetric', + iou_metrics=['mIoU'], + format_only=True, + output_dir='work_dirs/format_results') +test_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type = 'ADE20KDataset' + data_root='data/ade/release_test', + data_prefix=dict(img_path='testing'), + # 测试数据变换中没有加载标注 + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2048, 512), keep_ratio=True), + dict(type='PackSegInputs') + ])) +``` + +然后执行测试命令: + +```shell +python tools/test.py configs/fcn/fcn_r50-d8_4xb4-80k_ade20k-512x512.py ckpt/fcn_r50-d8_512x512_80k_ade20k_20200614_144016-f8ac5082.pth +``` + +### 测试 Cityscapes 数据集并保存输出分割结果 + +推荐使用 `CityscapesMetric` 来保存模型在 Cityscapes 数据集上的测试结果,以下是一个配置示例: + +```python +test_evaluator = dict( + type='CityscapesMetric', + format_only=True, + keep_results=True, + output_dir='work_dirs/format_results') +test_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='CityscapesDataset', + data_root='data/cityscapes/', + data_prefix=dict(img_path='leftImg8bit/test'), + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2048, 1024), keep_ratio=True), + dict(type='PackSegInputs') + ])) +``` + +然后执行相同的命令,例如: + +```shell +python tools/test.py configs/fcn/fcn_r18-d8_4xb2-80k_cityscapes-512x1024.py ckpt/fcn_r18-d8_512x1024_80k_cityscapes_20201225_021327-6c50f8b4.pth +``` diff --git a/mmseg/apis/mmseg_inferencer.py b/mmseg/apis/mmseg_inferencer.py index cb387b10b3..1c72285c56 100644 --- a/mmseg/apis/mmseg_inferencer.py +++ b/mmseg/apis/mmseg_inferencer.py @@ -30,7 +30,7 @@ class MMSegInferencer(BaseInferencer): Args: model (str, optional): Path to the config file or the model name - defined in metafile. Take the `mmseg metafile `_ + defined in metafile. Take the `mmseg metafile `_ as an example the `model` could be "fcn_r50-d8_4xb2-40k_cityscapes-512x1024", and the weights of model will be download automatically. If use config file, like @@ -48,7 +48,7 @@ class MMSegInferencer(BaseInferencer): a list of color palette responding to the classes. If palette is not defined, visualizer will take `cityscapes` palette by default. Defaults to None. - dataset_name (str, optional): `Dataset name or alias `_ + dataset_name (str, optional): `Dataset name or alias `_ visulizer will use the meta information of the dataset i.e. classes and palette, but the `classes` and `palette` have higher priority. Defaults to None. diff --git a/mmseg/datasets/__init__.py b/mmseg/datasets/__init__.py index 0dd19ee312..a90d53c88e 100644 --- a/mmseg/datasets/__init__.py +++ b/mmseg/datasets/__init__.py @@ -14,6 +14,7 @@ from .isprs import ISPRSDataset from .lip import LIPDataset from .loveda import LoveDADataset +from .mapillary import MapillaryDataset_v1, MapillaryDataset_v2 from .night_driving import NightDrivingDataset from .pascal_context import PascalContextDataset, PascalContextDataset59 from .potsdam import PotsdamDataset @@ -49,5 +50,6 @@ 'DecathlonDataset', 'LIPDataset', 'ResizeShortestEdge', 'BioMedicalGaussianNoise', 'BioMedicalGaussianBlur', 'BioMedicalRandomGamma', 'BioMedical3DPad', 'RandomRotFlip', - 'SynapseDataset', 'REFUGEDataset' + 'SynapseDataset', 'REFUGEDataset', 'MapillaryDataset_v1', + 'MapillaryDataset_v2' ] diff --git a/mmseg/datasets/dataset_wrappers.py b/mmseg/datasets/dataset_wrappers.py index 933eb50d99..082c116ff4 100644 --- a/mmseg/datasets/dataset_wrappers.py +++ b/mmseg/datasets/dataset_wrappers.py @@ -106,11 +106,11 @@ def __getitem__(self, idx): continue if hasattr(transform, 'get_indices'): - indexes = transform.get_indices(self.dataset) - if not isinstance(indexes, collections.abc.Sequence): - indexes = [indexes] + indices = transform.get_indices(self.dataset) + if not isinstance(indices, collections.abc.Sequence): + indices = [indices] mix_results = [ - copy.deepcopy(self.dataset[index]) for index in indexes + copy.deepcopy(self.dataset[index]) for index in indices ] results['mix_results'] = mix_results diff --git a/projects/mapillary_dataset/mmseg/datasets/mapillary_v2_0.py b/mmseg/datasets/mapillary.py similarity index 66% rename from projects/mapillary_dataset/mmseg/datasets/mapillary_v2_0.py rename to mmseg/datasets/mapillary.py index 9c67a8b212..6c2947338e 100644 --- a/projects/mapillary_dataset/mmseg/datasets/mapillary_v2_0.py +++ b/mmseg/datasets/mapillary.py @@ -1,10 +1,72 @@ # Copyright (c) OpenMMLab. All rights reserved. -from mmseg.datasets.basesegdataset import BaseSegDataset from mmseg.registry import DATASETS +from .basesegdataset import BaseSegDataset @DATASETS.register_module() -class MapillaryDataset_v2_0(BaseSegDataset): +class MapillaryDataset_v1(BaseSegDataset): + """Mapillary Vistas Dataset. + + Dataset paper link: + http://ieeexplore.ieee.org/document/8237796/ + + v1.2 contain 66 object classes. + (37 instance-specific) + + v2.0 contain 124 object classes. + (70 instance-specific, 46 stuff, 8 void or crowd). + + The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is + fixed to '.png' for Mapillary Vistas Dataset. + """ + METAINFO = dict( + classes=('Bird', 'Ground Animal', 'Curb', 'Fence', 'Guard Rail', + 'Barrier', 'Wall', 'Bike Lane', 'Crosswalk - Plain', + 'Curb Cut', 'Parking', 'Pedestrian Area', 'Rail Track', + 'Road', 'Service Lane', 'Sidewalk', 'Bridge', 'Building', + 'Tunnel', 'Person', 'Bicyclist', 'Motorcyclist', + 'Other Rider', 'Lane Marking - Crosswalk', + 'Lane Marking - General', 'Mountain', 'Sand', 'Sky', 'Snow', + 'Terrain', 'Vegetation', 'Water', 'Banner', 'Bench', + 'Bike Rack', 'Billboard', 'Catch Basin', 'CCTV Camera', + 'Fire Hydrant', 'Junction Box', 'Mailbox', 'Manhole', + 'Phone Booth', 'Pothole', 'Street Light', 'Pole', + 'Traffic Sign Frame', 'Utility Pole', 'Traffic Light', + 'Traffic Sign (Back)', 'Traffic Sign (Front)', 'Trash Can', + 'Bicycle', 'Boat', 'Bus', 'Car', 'Caravan', 'Motorcycle', + 'On Rails', 'Other Vehicle', 'Trailer', 'Truck', + 'Wheeled Slow', 'Car Mount', 'Ego Vehicle', 'Unlabeled'), + palette=[[165, 42, 42], [0, 192, 0], [196, 196, 196], [190, 153, 153], + [180, 165, 180], [90, 120, 150], [102, 102, 156], + [128, 64, 255], [140, 140, 200], [170, 170, 170], + [250, 170, 160], [96, 96, 96], + [230, 150, 140], [128, 64, 128], [110, 110, 110], + [244, 35, 232], [150, 100, 100], [70, 70, 70], [150, 120, 90], + [220, 20, 60], [255, 0, 0], [255, 0, 100], [255, 0, 200], + [200, 128, 128], [255, 255, 255], [64, 170, + 64], [230, 160, 50], + [70, 130, 180], [190, 255, 255], [152, 251, 152], + [107, 142, 35], [0, 170, 30], [255, 255, 128], [250, 0, 30], + [100, 140, 180], [220, 220, 220], [220, 128, 128], + [222, 40, 40], [100, 170, 30], [40, 40, 40], [33, 33, 33], + [100, 128, 160], [142, 0, 0], [70, 100, 150], [210, 170, 100], + [153, 153, 153], [128, 128, 128], [0, 0, 80], [250, 170, 30], + [192, 192, 192], [220, 220, 0], [140, 140, 20], [119, 11, 32], + [150, 0, 255], [0, 60, 100], [0, 0, 142], [0, 0, 90], + [0, 0, 230], [0, 80, 100], [128, 64, 64], [0, 0, 110], + [0, 0, 70], [0, 0, 192], [32, 32, 32], [120, 10, + 10], [0, 0, 0]]) + + def __init__(self, + img_suffix='.jpg', + seg_map_suffix='.png', + **kwargs) -> None: + super().__init__( + img_suffix=img_suffix, seg_map_suffix=seg_map_suffix, **kwargs) + + +@DATASETS.register_module() +class MapillaryDataset_v2(BaseSegDataset): """Mapillary Vistas Dataset. Dataset paper link: diff --git a/mmseg/datasets/transforms/formatting.py b/mmseg/datasets/transforms/formatting.py index 4391161dfd..89fd883791 100644 --- a/mmseg/datasets/transforms/formatting.py +++ b/mmseg/datasets/transforms/formatting.py @@ -1,4 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. +import warnings + import numpy as np from mmcv.transforms import to_tensor from mmcv.transforms.base import BaseTransform @@ -42,7 +44,7 @@ class PackSegInputs(BaseTransform): def __init__(self, meta_keys=('img_path', 'seg_map_path', 'ori_shape', 'img_shape', 'pad_shape', 'scale_factor', 'flip', - 'flip_direction')): + 'flip_direction', 'reduce_zero_label')): self.meta_keys = meta_keys def transform(self, results: dict) -> dict: @@ -72,9 +74,16 @@ def transform(self, results: dict) -> dict: data_sample = SegDataSample() if 'gt_seg_map' in results: - gt_sem_seg_data = dict( - data=to_tensor(results['gt_seg_map'][None, - ...].astype(np.int64))) + if len(results['gt_seg_map'].shape) == 2: + data = to_tensor(results['gt_seg_map'][None, + ...].astype(np.int64)) + else: + warnings.warn('Please pay attention your ground truth ' + 'segmentation map, usually the segmentation ' + 'map is 2D, but got ' + f'{results["gt_seg_map"].shape}') + data = to_tensor(results['gt_seg_map'].astype(np.int64)) + gt_sem_seg_data = dict(data=data) data_sample.gt_sem_seg = PixelData(**gt_sem_seg_data) if 'gt_edge_map' in results: diff --git a/mmseg/datasets/transforms/transforms.py b/mmseg/datasets/transforms/transforms.py index ef4e78dd8c..fb7e2a0e66 100644 --- a/mmseg/datasets/transforms/transforms.py +++ b/mmseg/datasets/transforms/transforms.py @@ -1029,17 +1029,17 @@ def transform(self, results: dict) -> dict: return results def get_indices(self, dataset: MultiImageMixDataset) -> list: - """Call function to collect indexes. + """Call function to collect indices. Args: dataset (:obj:`MultiImageMixDataset`): The dataset. Returns: - list: indexes. + list: indices. """ - indexes = [random.randint(0, len(dataset)) for _ in range(3)] - return indexes + indices = [random.randint(0, len(dataset)) for _ in range(3)] + return indices @cache_randomness def generate_mosaic_center(self): @@ -1062,8 +1062,9 @@ def _mosaic_transform_img(self, results: dict) -> dict: assert 'mix_results' in results if len(results['img'].shape) == 3: + c = results['img'].shape[2] mosaic_img = np.full( - (int(self.img_scale[0] * 2), int(self.img_scale[1] * 2), 3), + (int(self.img_scale[0] * 2), int(self.img_scale[1] * 2), c), self.pad_val, dtype=results['img'].dtype) else: diff --git a/mmseg/evaluation/__init__.py b/mmseg/evaluation/__init__.py index c28bb75cb4..a82008f3ad 100644 --- a/mmseg/evaluation/__init__.py +++ b/mmseg/evaluation/__init__.py @@ -1,4 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. -from .metrics import CitysMetric, IoUMetric +from .metrics import CityscapesMetric, IoUMetric -__all__ = ['IoUMetric', 'CitysMetric'] +__all__ = ['IoUMetric', 'CityscapesMetric'] diff --git a/mmseg/evaluation/metrics/__init__.py b/mmseg/evaluation/metrics/__init__.py index aec08bb071..0aa39e480c 100644 --- a/mmseg/evaluation/metrics/__init__.py +++ b/mmseg/evaluation/metrics/__init__.py @@ -1,5 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. -from .citys_metric import CitysMetric +from .citys_metric import CityscapesMetric from .iou_metric import IoUMetric -__all__ = ['IoUMetric', 'CitysMetric'] +__all__ = ['IoUMetric', 'CityscapesMetric'] diff --git a/mmseg/evaluation/metrics/citys_metric.py b/mmseg/evaluation/metrics/citys_metric.py index 50e9ea68a0..32984653c3 100644 --- a/mmseg/evaluation/metrics/citys_metric.py +++ b/mmseg/evaluation/metrics/citys_metric.py @@ -1,30 +1,41 @@ # Copyright (c) OpenMMLab. All rights reserved. import os.path as osp -from typing import Dict, List, Optional, Sequence +import shutil +from collections import OrderedDict +from typing import Dict, Optional, Sequence + +try: + + import cityscapesscripts.evaluation.evalPixelLevelSemanticLabeling as CSEval # noqa + import cityscapesscripts.helpers.labels as CSLabels +except ImportError: + CSLabels = None + CSEval = None import numpy as np +from mmengine.dist import is_main_process, master_only from mmengine.evaluator import BaseMetric from mmengine.logging import MMLogger, print_log -from mmengine.utils import mkdir_or_exist, scandir +from mmengine.utils import mkdir_or_exist from PIL import Image from mmseg.registry import METRICS @METRICS.register_module() -class CitysMetric(BaseMetric): +class CityscapesMetric(BaseMetric): """Cityscapes evaluation metric. Args: + output_dir (str): The directory for output prediction ignore_index (int): Index that will be ignored in evaluation. Default: 255. - citys_metrics (list[str] | str): Metrics to be evaluated, - Default: ['cityscapes']. - to_label_id (bool): whether convert output to label_id for - submission. Default: True. - suffix (str): The filename prefix of the png files. - If the prefix is "somepath/xxx", the png files will be - named "somepath/xxx.png". Default: '.format_cityscapes'. + format_only (bool): Only format result for results commit without + perform evaluation. It is useful when you want to format the result + to a specific format and submit it to the test server. + Defaults to False. + keep_results (bool): Whether to keep the results. When ``format_only`` + is True, ``keep_results`` must be True. Defaults to False. collect_device (str): Device name used for collecting results from different ranks during distributed training. Must be 'cpu' or 'gpu'. Defaults to 'cpu'. @@ -35,19 +46,35 @@ class CitysMetric(BaseMetric): """ def __init__(self, + output_dir: str, ignore_index: int = 255, - citys_metrics: List[str] = ['cityscapes'], - to_label_id: bool = True, - suffix: str = '.format_cityscapes', + format_only: bool = False, + keep_results: bool = False, collect_device: str = 'cpu', - prefix: Optional[str] = None) -> None: + prefix: Optional[str] = None, + **kwargs) -> None: super().__init__(collect_device=collect_device, prefix=prefix) - + if CSEval is None: + raise ImportError('Please run "pip install cityscapesscripts" to ' + 'install cityscapesscripts first.') + self.output_dir = output_dir self.ignore_index = ignore_index - self.metrics = citys_metrics - assert self.metrics[0] == 'cityscapes' - self.to_label_id = to_label_id - self.suffix = suffix + + self.format_only = format_only + if format_only: + assert keep_results, ( + 'When format_only is True, the results must be keep, please ' + f'set keep_results as True, but got {keep_results}') + self.keep_results = keep_results + self.prefix = prefix + if is_main_process(): + mkdir_or_exist(self.output_dir) + + @master_only + def __del__(self) -> None: + """Clean up.""" + if not self.keep_results: + shutil.rmtree(self.output_dir) def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None: """Process one batch of data and data_samples. @@ -59,26 +86,27 @@ def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None: data_batch (dict): A batch of data from the dataloader. data_samples (Sequence[dict]): A batch of outputs from the model. """ - mkdir_or_exist(self.suffix) + mkdir_or_exist(self.output_dir) for data_sample in data_samples: pred_label = data_sample['pred_sem_seg']['data'][0].cpu().numpy() - # results2img - if self.to_label_id: - pred_label = self._convert_to_label_id(pred_label) + # when evaluating with official cityscapesscripts, + # labelIds should be used + pred_label = self._convert_to_label_id(pred_label) basename = osp.splitext(osp.basename(data_sample['img_path']))[0] - png_filename = osp.join(self.suffix, f'{basename}.png') + png_filename = osp.abspath( + osp.join(self.output_dir, f'{basename}.png')) output = Image.fromarray(pred_label.astype(np.uint8)).convert('P') - import cityscapesscripts.helpers.labels as CSLabels - palette = np.zeros((len(CSLabels.id2label), 3), dtype=np.uint8) - for label_id, label in CSLabels.id2label.items(): - palette[label_id] = label.color - output.putpalette(palette) output.save(png_filename) - - ann_dir = osp.join(data_samples[0]['seg_map_path'].split('val')[0], - 'val') - self.results.append(ann_dir) + if self.format_only: + # format_only always for test dataset without ground truth + gt_filename = '' + else: + # when evaluating with official cityscapesscripts, + # **_gtFine_labelIds.png is used + gt_filename = data_sample['seg_map_path'].replace( + 'labelTrainIds.png', 'labelIds.png') + self.results.append((png_filename, gt_filename)) def compute_metrics(self, results: list) -> Dict[str, float]: """Compute the metrics from processed results. @@ -90,38 +118,28 @@ def compute_metrics(self, results: list) -> Dict[str, float]: dict[str: float]: Cityscapes evaluation results. """ logger: MMLogger = MMLogger.get_current_instance() - try: - import cityscapesscripts.evaluation.evalPixelLevelSemanticLabeling as CSEval # noqa - except ImportError: - raise ImportError('Please run "pip install cityscapesscripts" to ' - 'install cityscapesscripts first.') - msg = 'Evaluating in Cityscapes style' + if self.format_only: + logger.info(f'results are saved to {osp.dirname(self.output_dir)}') + return OrderedDict() + msg = 'Evaluating in Cityscapes style' if logger is None: msg = '\n' + msg print_log(msg, logger=logger) - result_dir = self.suffix - eval_results = dict() - print_log(f'Evaluating results under {result_dir} ...', logger=logger) + print_log( + f'Evaluating results under {self.output_dir} ...', logger=logger) CSEval.args.evalInstLevelScore = True - CSEval.args.predictionPath = osp.abspath(result_dir) + CSEval.args.predictionPath = osp.abspath(self.output_dir) CSEval.args.evalPixelAccuracy = True CSEval.args.JSONOutput = False - seg_map_list = [] - pred_list = [] - ann_dir = results[0] - # when evaluating with official cityscapesscripts, - # **_gtFine_labelIds.png is used - for seg_map in scandir(ann_dir, 'gtFine_labelIds.png', recursive=True): - seg_map_list.append(osp.join(ann_dir, seg_map)) - pred_list.append(CSEval.getPrediction(CSEval.args, seg_map)) + pred_list, gt_list = zip(*results) metric = dict() eval_results.update( - CSEval.evaluateImgLists(pred_list, seg_map_list, CSEval.args)) + CSEval.evaluateImgLists(pred_list, gt_list, CSEval.args)) metric['averageScoreCategories'] = eval_results[ 'averageScoreCategories'] metric['averageScoreInstCategories'] = eval_results[ @@ -133,7 +151,6 @@ def _convert_to_label_id(result): """Convert trainId to id for cityscapes.""" if isinstance(result, str): result = np.load(result) - import cityscapesscripts.helpers.labels as CSLabels result_copy = result.copy() for trainId, label in CSLabels.trainId2label.items(): result_copy[result == trainId] = label.id diff --git a/mmseg/evaluation/metrics/iou_metric.py b/mmseg/evaluation/metrics/iou_metric.py index a152ef9dd6..16014c7400 100644 --- a/mmseg/evaluation/metrics/iou_metric.py +++ b/mmseg/evaluation/metrics/iou_metric.py @@ -1,11 +1,15 @@ # Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp from collections import OrderedDict from typing import Dict, List, Optional, Sequence import numpy as np import torch +from mmengine.dist import is_main_process from mmengine.evaluator import BaseMetric from mmengine.logging import MMLogger, print_log +from mmengine.utils import mkdir_or_exist +from PIL import Image from prettytable import PrettyTable from mmseg.registry import METRICS @@ -27,6 +31,12 @@ class IoUMetric(BaseMetric): collect_device (str): Device name used for collecting results from different ranks during distributed training. Must be 'cpu' or 'gpu'. Defaults to 'cpu'. + output_dir (str): The directory for output prediction. Defaults to + None. + format_only (bool): Only format result for results commit without + perform evaluation. It is useful when you want to save the result + to a specific format and submit it to the test server. + Defaults to False. prefix (str, optional): The prefix that will be added in the metric names to disambiguate homonymous metrics of different evaluators. If prefix is not provided in the argument, self.default_prefix @@ -39,13 +49,20 @@ def __init__(self, nan_to_num: Optional[int] = None, beta: int = 1, collect_device: str = 'cpu', - prefix: Optional[str] = None) -> None: + output_dir: Optional[str] = None, + format_only: bool = False, + prefix: Optional[str] = None, + **kwargs) -> None: super().__init__(collect_device=collect_device, prefix=prefix) self.ignore_index = ignore_index self.metrics = iou_metrics self.nan_to_num = nan_to_num self.beta = beta + self.output_dir = output_dir + if self.output_dir and is_main_process(): + mkdir_or_exist(self.output_dir) + self.format_only = format_only def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None: """Process one batch of data and data_samples. @@ -60,10 +77,27 @@ def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None: num_classes = len(self.dataset_meta['classes']) for data_sample in data_samples: pred_label = data_sample['pred_sem_seg']['data'].squeeze() - label = data_sample['gt_sem_seg']['data'].squeeze().to(pred_label) - self.results.append( - self.intersect_and_union(pred_label, label, num_classes, - self.ignore_index)) + # format_only always for test dataset without ground truth + if not self.format_only: + label = data_sample['gt_sem_seg']['data'].squeeze().to( + pred_label) + self.results.append( + self.intersect_and_union(pred_label, label, num_classes, + self.ignore_index)) + # format_result + if self.output_dir is not None: + basename = osp.splitext(osp.basename( + data_sample['img_path']))[0] + png_filename = osp.abspath( + osp.join(self.output_dir, f'{basename}.png')) + output_mask = pred_label.cpu().numpy() + # The index range of official ADE20k dataset is from 0 to 150. + # But the index range of output is from 0 to 149. + # That is because we set reduce_zero_label=True. + if data_sample.get('reduce_zero_label', False): + output_mask = output_mask + 1 + output = Image.fromarray(output_mask.astype(np.uint8)) + output.save(png_filename) def compute_metrics(self, results: list) -> Dict[str, float]: """Compute the metrics from processed results. @@ -78,7 +112,9 @@ def compute_metrics(self, results: list) -> Dict[str, float]: mRecall. """ logger: MMLogger = MMLogger.get_current_instance() - + if self.format_only: + logger.info(f'results are saved to {osp.dirname(self.output_dir)}') + return OrderedDict() # convert list of tuples to tuple of lists, e.g. # [(A_1, B_1, C_1, D_1), ..., (A_n, B_n, C_n, D_n)] to # ([A_1, ..., A_n], ..., [D_1, ..., D_n]) diff --git a/mmseg/models/backbones/__init__.py b/mmseg/models/backbones/__init__.py index bda42bb692..e3107306ea 100644 --- a/mmseg/models/backbones/__init__.py +++ b/mmseg/models/backbones/__init__.py @@ -11,6 +11,8 @@ from .mit import MixVisionTransformer from .mobilenet_v2 import MobileNetV2 from .mobilenet_v3 import MobileNetV3 +from .mscan import MSCAN +from .pidnet import PIDNet from .resnest import ResNeSt from .resnet import ResNet, ResNetV1c, ResNetV1d from .resnext import ResNeXt @@ -26,5 +28,5 @@ 'ResNeSt', 'MobileNetV2', 'UNet', 'CGNet', 'MobileNetV3', 'VisionTransformer', 'SwinTransformer', 'MixVisionTransformer', 'BiSeNetV1', 'BiSeNetV2', 'ICNet', 'TIMMBackbone', 'ERFNet', 'PCPVT', - 'SVT', 'STDCNet', 'STDCContextPathNet', 'BEiT', 'MAE' + 'SVT', 'STDCNet', 'STDCContextPathNet', 'BEiT', 'MAE', 'PIDNet', 'MSCAN' ] diff --git a/mmseg/models/backbones/mscan.py b/mmseg/models/backbones/mscan.py new file mode 100644 index 0000000000..7150cb7a1c --- /dev/null +++ b/mmseg/models/backbones/mscan.py @@ -0,0 +1,467 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# Originally from https://github.com/visual-attention-network/segnext +# Licensed under the Apache License, Version 2.0 (the "License") +import math +import warnings + +import torch +import torch.nn as nn +from mmcv.cnn import build_activation_layer, build_norm_layer +from mmcv.cnn.bricks import DropPath +from mmengine.model import BaseModule +from mmengine.model.weight_init import (constant_init, normal_init, + trunc_normal_init) + +from mmseg.registry import MODELS + + +class Mlp(BaseModule): + """Multi Layer Perceptron (MLP) Module. + + Args: + in_features (int): The dimension of input features. + hidden_features (int): The dimension of hidden features. + Defaults: None. + out_features (int): The dimension of output features. + Defaults: None. + act_cfg (dict): Config dict for activation layer in block. + Default: dict(type='GELU'). + drop (float): The number of dropout rate in MLP block. + Defaults: 0.0. + """ + + def __init__(self, + in_features, + hidden_features=None, + out_features=None, + act_cfg=dict(type='GELU'), + drop=0.): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Conv2d(in_features, hidden_features, 1) + self.dwconv = nn.Conv2d( + hidden_features, + hidden_features, + 3, + 1, + 1, + bias=True, + groups=hidden_features) + self.act = build_activation_layer(act_cfg) + self.fc2 = nn.Conv2d(hidden_features, out_features, 1) + self.drop = nn.Dropout(drop) + + def forward(self, x): + """Forward function.""" + + x = self.fc1(x) + + x = self.dwconv(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + + return x + + +class StemConv(BaseModule): + """Stem Block at the beginning of Semantic Branch. + + Args: + in_channels (int): The dimension of input channels. + out_channels (int): The dimension of output channels. + act_cfg (dict): Config dict for activation layer in block. + Default: dict(type='GELU'). + norm_cfg (dict): Config dict for normalization layer. + Defaults: dict(type='SyncBN', requires_grad=True). + """ + + def __init__(self, + in_channels, + out_channels, + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='SyncBN', requires_grad=True)): + super().__init__() + + self.proj = nn.Sequential( + nn.Conv2d( + in_channels, + out_channels // 2, + kernel_size=(3, 3), + stride=(2, 2), + padding=(1, 1)), + build_norm_layer(norm_cfg, out_channels // 2)[1], + build_activation_layer(act_cfg), + nn.Conv2d( + out_channels // 2, + out_channels, + kernel_size=(3, 3), + stride=(2, 2), + padding=(1, 1)), + build_norm_layer(norm_cfg, out_channels)[1], + ) + + def forward(self, x): + """Forward function.""" + + x = self.proj(x) + _, _, H, W = x.size() + x = x.flatten(2).transpose(1, 2) + return x, H, W + + +class MSCAAttention(BaseModule): + """Attention Module in Multi-Scale Convolutional Attention Module (MSCA). + + Args: + channels (int): The dimension of channels. + kernel_sizes (list): The size of attention + kernel. Defaults: [5, [1, 7], [1, 11], [1, 21]]. + paddings (list): The number of + corresponding padding value in attention module. + Defaults: [2, [0, 3], [0, 5], [0, 10]]. + """ + + def __init__(self, + channels, + kernel_sizes=[5, [1, 7], [1, 11], [1, 21]], + paddings=[2, [0, 3], [0, 5], [0, 10]]): + super().__init__() + self.conv0 = nn.Conv2d( + channels, + channels, + kernel_size=kernel_sizes[0], + padding=paddings[0], + groups=channels) + for i, (kernel_size, + padding) in enumerate(zip(kernel_sizes[1:], paddings[1:])): + kernel_size_ = [kernel_size, kernel_size[::-1]] + padding_ = [padding, padding[::-1]] + conv_name = [f'conv{i}_1', f'conv{i}_2'] + for i_kernel, i_pad, i_conv in zip(kernel_size_, padding_, + conv_name): + self.add_module( + i_conv, + nn.Conv2d( + channels, + channels, + tuple(i_kernel), + padding=i_pad, + groups=channels)) + self.conv3 = nn.Conv2d(channels, channels, 1) + + def forward(self, x): + """Forward function.""" + + u = x.clone() + + attn = self.conv0(x) + + # Multi-Scale Feature extraction + attn_0 = self.conv0_1(attn) + attn_0 = self.conv0_2(attn_0) + + attn_1 = self.conv1_1(attn) + attn_1 = self.conv1_2(attn_1) + + attn_2 = self.conv2_1(attn) + attn_2 = self.conv2_2(attn_2) + + attn = attn + attn_0 + attn_1 + attn_2 + # Channel Mixing + attn = self.conv3(attn) + + # Convolutional Attention + x = attn * u + + return x + + +class MSCASpatialAttention(BaseModule): + """Spatial Attention Module in Multi-Scale Convolutional Attention Module + (MSCA). + + Args: + in_channels (int): The dimension of channels. + attention_kernel_sizes (list): The size of attention + kernel. Defaults: [5, [1, 7], [1, 11], [1, 21]]. + attention_kernel_paddings (list): The number of + corresponding padding value in attention module. + Defaults: [2, [0, 3], [0, 5], [0, 10]]. + act_cfg (dict): Config dict for activation layer in block. + Default: dict(type='GELU'). + """ + + def __init__(self, + in_channels, + attention_kernel_sizes=[5, [1, 7], [1, 11], [1, 21]], + attention_kernel_paddings=[2, [0, 3], [0, 5], [0, 10]], + act_cfg=dict(type='GELU')): + super().__init__() + self.proj_1 = nn.Conv2d(in_channels, in_channels, 1) + self.activation = build_activation_layer(act_cfg) + self.spatial_gating_unit = MSCAAttention(in_channels, + attention_kernel_sizes, + attention_kernel_paddings) + self.proj_2 = nn.Conv2d(in_channels, in_channels, 1) + + def forward(self, x): + """Forward function.""" + + shorcut = x.clone() + x = self.proj_1(x) + x = self.activation(x) + x = self.spatial_gating_unit(x) + x = self.proj_2(x) + x = x + shorcut + return x + + +class MSCABlock(BaseModule): + """Basic Multi-Scale Convolutional Attention Block. It leverage the large- + kernel attention (LKA) mechanism to build both channel and spatial + attention. In each branch, it uses two depth-wise strip convolutions to + approximate standard depth-wise convolutions with large kernels. The kernel + size for each branch is set to 7, 11, and 21, respectively. + + Args: + channels (int): The dimension of channels. + attention_kernel_sizes (list): The size of attention + kernel. Defaults: [5, [1, 7], [1, 11], [1, 21]]. + attention_kernel_paddings (list): The number of + corresponding padding value in attention module. + Defaults: [2, [0, 3], [0, 5], [0, 10]]. + mlp_ratio (float): The ratio of multiple input dimension to + calculate hidden feature in MLP layer. Defaults: 4.0. + drop (float): The number of dropout rate in MLP block. + Defaults: 0.0. + drop_path (float): The ratio of drop paths. + Defaults: 0.0. + act_cfg (dict): Config dict for activation layer in block. + Default: dict(type='GELU'). + norm_cfg (dict): Config dict for normalization layer. + Defaults: dict(type='SyncBN', requires_grad=True). + """ + + def __init__(self, + channels, + attention_kernel_sizes=[5, [1, 7], [1, 11], [1, 21]], + attention_kernel_paddings=[2, [0, 3], [0, 5], [0, 10]], + mlp_ratio=4., + drop=0., + drop_path=0., + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='SyncBN', requires_grad=True)): + super().__init__() + self.norm1 = build_norm_layer(norm_cfg, channels)[1] + self.attn = MSCASpatialAttention(channels, attention_kernel_sizes, + attention_kernel_paddings, act_cfg) + self.drop_path = DropPath( + drop_path) if drop_path > 0. else nn.Identity() + self.norm2 = build_norm_layer(norm_cfg, channels)[1] + mlp_hidden_channels = int(channels * mlp_ratio) + self.mlp = Mlp( + in_features=channels, + hidden_features=mlp_hidden_channels, + act_cfg=act_cfg, + drop=drop) + layer_scale_init_value = 1e-2 + self.layer_scale_1 = nn.Parameter( + layer_scale_init_value * torch.ones(channels), requires_grad=True) + self.layer_scale_2 = nn.Parameter( + layer_scale_init_value * torch.ones(channels), requires_grad=True) + + def forward(self, x, H, W): + """Forward function.""" + + B, N, C = x.shape + x = x.permute(0, 2, 1).view(B, C, H, W) + x = x + self.drop_path( + self.layer_scale_1.unsqueeze(-1).unsqueeze(-1) * + self.attn(self.norm1(x))) + x = x + self.drop_path( + self.layer_scale_2.unsqueeze(-1).unsqueeze(-1) * + self.mlp(self.norm2(x))) + x = x.view(B, C, N).permute(0, 2, 1) + return x + + +class OverlapPatchEmbed(BaseModule): + """Image to Patch Embedding. + + Args: + patch_size (int): The patch size. + Defaults: 7. + stride (int): Stride of the convolutional layer. + Default: 4. + in_channels (int): The number of input channels. + Defaults: 3. + embed_dims (int): The dimensions of embedding. + Defaults: 768. + norm_cfg (dict): Config dict for normalization layer. + Defaults: dict(type='SyncBN', requires_grad=True). + """ + + def __init__(self, + patch_size=7, + stride=4, + in_channels=3, + embed_dim=768, + norm_cfg=dict(type='SyncBN', requires_grad=True)): + super().__init__() + + self.proj = nn.Conv2d( + in_channels, + embed_dim, + kernel_size=patch_size, + stride=stride, + padding=patch_size // 2) + self.norm = build_norm_layer(norm_cfg, embed_dim)[1] + + def forward(self, x): + """Forward function.""" + + x = self.proj(x) + _, _, H, W = x.shape + x = self.norm(x) + + x = x.flatten(2).transpose(1, 2) + + return x, H, W + + +@MODELS.register_module() +class MSCAN(BaseModule): + """SegNeXt Multi-Scale Convolutional Attention Network (MCSAN) backbone. + + This backbone is the implementation of `SegNeXt: Rethinking + Convolutional Attention Design for Semantic + Segmentation `_. + Inspiration from https://github.com/visual-attention-network/segnext. + + Args: + in_channels (int): The number of input channels. Defaults: 3. + embed_dims (list[int]): Embedding dimension. + Defaults: [64, 128, 256, 512]. + mlp_ratios (list[int]): Ratio of mlp hidden dim to embedding dim. + Defaults: [4, 4, 4, 4]. + drop_rate (float): Dropout rate. Defaults: 0. + drop_path_rate (float): Stochastic depth rate. Defaults: 0. + depths (list[int]): Depths of each Swin Transformer stage. + Default: [3, 4, 6, 3]. + num_stages (int): MSCAN stages. Default: 4. + attention_kernel_sizes (list): Size of attention kernel in + Attention Module (Figure 2(b) of original paper). + Defaults: [5, [1, 7], [1, 11], [1, 21]]. + attention_kernel_paddings (list): Size of attention paddings + in Attention Module (Figure 2(b) of original paper). + Defaults: [2, [0, 3], [0, 5], [0, 10]]. + norm_cfg (dict): Config of norm layers. + Defaults: dict(type='SyncBN', requires_grad=True). + pretrained (str, optional): model pretrained path. + Default: None. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + in_channels=3, + embed_dims=[64, 128, 256, 512], + mlp_ratios=[4, 4, 4, 4], + drop_rate=0., + drop_path_rate=0., + depths=[3, 4, 6, 3], + num_stages=4, + attention_kernel_sizes=[5, [1, 7], [1, 11], [1, 21]], + attention_kernel_paddings=[2, [0, 3], [0, 5], [0, 10]], + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='SyncBN', requires_grad=True), + pretrained=None, + init_cfg=None): + super().__init__(init_cfg=init_cfg) + + assert not (init_cfg and pretrained), \ + 'init_cfg and pretrained cannot be set at the same time' + if isinstance(pretrained, str): + warnings.warn('DeprecationWarning: pretrained is deprecated, ' + 'please use "init_cfg" instead') + self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + elif pretrained is not None: + raise TypeError('pretrained must be a str or None') + + self.depths = depths + self.num_stages = num_stages + + dpr = [ + x.item() for x in torch.linspace(0, drop_path_rate, sum(depths)) + ] # stochastic depth decay rule + cur = 0 + + for i in range(num_stages): + if i == 0: + patch_embed = StemConv(3, embed_dims[0], norm_cfg=norm_cfg) + else: + patch_embed = OverlapPatchEmbed( + patch_size=7 if i == 0 else 3, + stride=4 if i == 0 else 2, + in_channels=in_channels if i == 0 else embed_dims[i - 1], + embed_dim=embed_dims[i], + norm_cfg=norm_cfg) + + block = nn.ModuleList([ + MSCABlock( + channels=embed_dims[i], + attention_kernel_sizes=attention_kernel_sizes, + attention_kernel_paddings=attention_kernel_paddings, + mlp_ratio=mlp_ratios[i], + drop=drop_rate, + drop_path=dpr[cur + j], + act_cfg=act_cfg, + norm_cfg=norm_cfg) for j in range(depths[i]) + ]) + norm = nn.LayerNorm(embed_dims[i]) + cur += depths[i] + + setattr(self, f'patch_embed{i + 1}', patch_embed) + setattr(self, f'block{i + 1}', block) + setattr(self, f'norm{i + 1}', norm) + + def init_weights(self): + """Initialize modules of MSCAN.""" + + print('init cfg', self.init_cfg) + if self.init_cfg is None: + for m in self.modules(): + if isinstance(m, nn.Linear): + trunc_normal_init(m, std=.02, bias=0.) + elif isinstance(m, nn.LayerNorm): + constant_init(m, val=1.0, bias=0.) + elif isinstance(m, nn.Conv2d): + fan_out = m.kernel_size[0] * m.kernel_size[ + 1] * m.out_channels + fan_out //= m.groups + normal_init( + m, mean=0, std=math.sqrt(2.0 / fan_out), bias=0) + else: + super().init_weights() + + def forward(self, x): + """Forward function.""" + + B = x.shape[0] + outs = [] + + for i in range(self.num_stages): + patch_embed = getattr(self, f'patch_embed{i + 1}') + block = getattr(self, f'block{i + 1}') + norm = getattr(self, f'norm{i + 1}') + x, H, W = patch_embed(x) + for blk in block: + x = blk(x, H, W) + x = norm(x) + x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous() + outs.append(x) + + return outs diff --git a/mmseg/models/backbones/pidnet.py b/mmseg/models/backbones/pidnet.py new file mode 100644 index 0000000000..0b711a3737 --- /dev/null +++ b/mmseg/models/backbones/pidnet.py @@ -0,0 +1,522 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Tuple, Union + +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule +from mmengine.model import BaseModule +from mmengine.runner import CheckpointLoader +from torch import Tensor + +from mmseg.registry import MODELS +from mmseg.utils import OptConfigType +from ..utils import DAPPM, PAPPM, BasicBlock, Bottleneck + + +class PagFM(BaseModule): + """Pixel-attention-guided fusion module. + + Args: + in_channels (int): The number of input channels. + channels (int): The number of channels. + after_relu (bool): Whether to use ReLU before attention. + Default: False. + with_channel (bool): Whether to use channel attention. + Default: False. + upsample_mode (str): The mode of upsample. Default: 'bilinear'. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer. + Default: dict(typ='ReLU', inplace=True). + init_cfg (dict): Config dict for initialization. Default: None. + """ + + def __init__(self, + in_channels: int, + channels: int, + after_relu: bool = False, + with_channel: bool = False, + upsample_mode: str = 'bilinear', + norm_cfg: OptConfigType = dict(type='BN'), + act_cfg: OptConfigType = dict(typ='ReLU', inplace=True), + init_cfg: OptConfigType = None): + super().__init__(init_cfg) + self.after_relu = after_relu + self.with_channel = with_channel + self.upsample_mode = upsample_mode + self.f_i = ConvModule( + in_channels, channels, 1, norm_cfg=norm_cfg, act_cfg=None) + self.f_p = ConvModule( + in_channels, channels, 1, norm_cfg=norm_cfg, act_cfg=None) + if with_channel: + self.up = ConvModule( + channels, in_channels, 1, norm_cfg=norm_cfg, act_cfg=None) + if after_relu: + self.relu = MODELS.build(act_cfg) + + def forward(self, x_p: Tensor, x_i: Tensor) -> Tensor: + """Forward function. + + Args: + x_p (Tensor): The featrue map from P branch. + x_i (Tensor): The featrue map from I branch. + + Returns: + Tensor: The feature map with pixel-attention-guided fusion. + """ + if self.after_relu: + x_p = self.relu(x_p) + x_i = self.relu(x_i) + + f_i = self.f_i(x_i) + f_i = F.interpolate( + f_i, + size=x_p.shape[2:], + mode=self.upsample_mode, + align_corners=False) + + f_p = self.f_p(x_p) + + if self.with_channel: + sigma = torch.sigmoid(self.up(f_p * f_i)) + else: + sigma = torch.sigmoid(torch.sum(f_p * f_i, dim=1).unsqueeze(1)) + + x_i = F.interpolate( + x_i, + size=x_p.shape[2:], + mode=self.upsample_mode, + align_corners=False) + + out = sigma * x_i + (1 - sigma) * x_p + return out + + +class Bag(BaseModule): + """Boundary-attention-guided fusion module. + + Args: + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + kernel_size (int): The kernel size of the convolution. Default: 3. + padding (int): The padding of the convolution. Default: 1. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU', inplace=True). + conv_cfg (dict): Config dict for convolution layer. + Default: dict(order=('norm', 'act', 'conv')). + init_cfg (dict): Config dict for initialization. Default: None. + """ + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int = 3, + padding: int = 1, + norm_cfg: OptConfigType = dict(type='BN'), + act_cfg: OptConfigType = dict(type='ReLU', inplace=True), + conv_cfg: OptConfigType = dict(order=('norm', 'act', 'conv')), + init_cfg: OptConfigType = None): + super().__init__(init_cfg) + + self.conv = ConvModule( + in_channels, + out_channels, + kernel_size, + padding=padding, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **conv_cfg) + + def forward(self, x_p: Tensor, x_i: Tensor, x_d: Tensor) -> Tensor: + """Forward function. + + Args: + x_p (Tensor): The featrue map from P branch. + x_i (Tensor): The featrue map from I branch. + x_d (Tensor): The featrue map from D branch. + + Returns: + Tensor: The feature map with boundary-attention-guided fusion. + """ + sigma = torch.sigmoid(x_d) + return self.conv(sigma * x_p + (1 - sigma) * x_i) + + +class LightBag(BaseModule): + """Light Boundary-attention-guided fusion module. + + Args: + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer. Default: None. + init_cfg (dict): Config dict for initialization. Default: None. + """ + + def __init__(self, + in_channels: int, + out_channels: int, + norm_cfg: OptConfigType = dict(type='BN'), + act_cfg: OptConfigType = None, + init_cfg: OptConfigType = None): + super().__init__(init_cfg) + self.f_p = ConvModule( + in_channels, + out_channels, + kernel_size=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.f_i = ConvModule( + in_channels, + out_channels, + kernel_size=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + def forward(self, x_p: Tensor, x_i: Tensor, x_d: Tensor) -> Tensor: + """Forward function. + Args: + x_p (Tensor): The featrue map from P branch. + x_i (Tensor): The featrue map from I branch. + x_d (Tensor): The featrue map from D branch. + + Returns: + Tensor: The feature map with light boundary-attention-guided + fusion. + """ + sigma = torch.sigmoid(x_d) + + f_p = self.f_p((1 - sigma) * x_i + x_p) + f_i = self.f_i(x_i + sigma * x_p) + + return f_p + f_i + + +@MODELS.register_module() +class PIDNet(BaseModule): + """PIDNet backbone. + + This backbone is the implementation of `PIDNet: A Real-time Semantic + Segmentation Network Inspired from PID Controller + `_. + Modified from https://github.com/XuJiacong/PIDNet. + + Licensed under the MIT License. + + Args: + in_channels (int): The number of input channels. Default: 3. + channels (int): The number of channels in the stem layer. Default: 64. + ppm_channels (int): The number of channels in the PPM layer. + Default: 96. + num_stem_blocks (int): The number of blocks in the stem layer. + Default: 2. + num_branch_blocks (int): The number of blocks in the branch layer. + Default: 3. + align_corners (bool): The align_corners argument of F.interpolate. + Default: False. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU', inplace=True). + init_cfg (dict): Config dict for initialization. Default: None. + """ + + def __init__(self, + in_channels: int = 3, + channels: int = 64, + ppm_channels: int = 96, + num_stem_blocks: int = 2, + num_branch_blocks: int = 3, + align_corners: bool = False, + norm_cfg: OptConfigType = dict(type='BN'), + act_cfg: OptConfigType = dict(type='ReLU', inplace=True), + init_cfg: OptConfigType = None, + **kwargs): + super().__init__(init_cfg) + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.align_corners = align_corners + + # stem layer + self.stem = self._make_stem_layer(in_channels, channels, + num_stem_blocks) + self.relu = nn.ReLU() + + # I Branch + self.i_branch_layers = nn.ModuleList() + for i in range(3): + self.i_branch_layers.append( + self._make_layer( + block=BasicBlock if i < 2 else Bottleneck, + in_channels=channels * 2**(i + 1), + channels=channels * 8 if i > 0 else channels * 4, + num_blocks=num_branch_blocks if i < 2 else 2, + stride=2)) + + # P Branch + self.p_branch_layers = nn.ModuleList() + for i in range(3): + self.p_branch_layers.append( + self._make_layer( + block=BasicBlock if i < 2 else Bottleneck, + in_channels=channels * 2, + channels=channels * 2, + num_blocks=num_stem_blocks if i < 2 else 1)) + self.compression_1 = ConvModule( + channels * 4, + channels * 2, + kernel_size=1, + bias=False, + norm_cfg=norm_cfg, + act_cfg=None) + self.compression_2 = ConvModule( + channels * 8, + channels * 2, + kernel_size=1, + bias=False, + norm_cfg=norm_cfg, + act_cfg=None) + self.pag_1 = PagFM(channels * 2, channels) + self.pag_2 = PagFM(channels * 2, channels) + + # D Branch + if num_stem_blocks == 2: + self.d_branch_layers = nn.ModuleList([ + self._make_single_layer(BasicBlock, channels * 2, channels), + self._make_layer(Bottleneck, channels, channels, 1) + ]) + channel_expand = 1 + spp_module = PAPPM + dfm_module = LightBag + act_cfg_dfm = None + else: + self.d_branch_layers = nn.ModuleList([ + self._make_single_layer(BasicBlock, channels * 2, + channels * 2), + self._make_single_layer(BasicBlock, channels * 2, channels * 2) + ]) + channel_expand = 2 + spp_module = DAPPM + dfm_module = Bag + act_cfg_dfm = act_cfg + + self.diff_1 = ConvModule( + channels * 4, + channels * channel_expand, + kernel_size=3, + padding=1, + bias=False, + norm_cfg=norm_cfg, + act_cfg=None) + self.diff_2 = ConvModule( + channels * 8, + channels * 2, + kernel_size=3, + padding=1, + bias=False, + norm_cfg=norm_cfg, + act_cfg=None) + + self.spp = spp_module( + channels * 16, ppm_channels, channels * 4, num_scales=5) + self.dfm = dfm_module( + channels * 4, channels * 4, norm_cfg=norm_cfg, act_cfg=act_cfg_dfm) + + self.d_branch_layers.append( + self._make_layer(Bottleneck, channels * 2, channels * 2, 1)) + + def _make_stem_layer(self, in_channels: int, channels: int, + num_blocks: int) -> nn.Sequential: + """Make stem layer. + + Args: + in_channels (int): Number of input channels. + channels (int): Number of output channels. + num_blocks (int): Number of blocks. + + Returns: + nn.Sequential: The stem layer. + """ + + layers = [ + ConvModule( + in_channels, + channels, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + ConvModule( + channels, + channels, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + ] + + layers.append( + self._make_layer(BasicBlock, channels, channels, num_blocks)) + layers.append(nn.ReLU()) + layers.append( + self._make_layer( + BasicBlock, channels, channels * 2, num_blocks, stride=2)) + layers.append(nn.ReLU()) + + return nn.Sequential(*layers) + + def _make_layer(self, + block: BasicBlock, + in_channels: int, + channels: int, + num_blocks: int, + stride: int = 1) -> nn.Sequential: + """Make layer for PIDNet backbone. + Args: + block (BasicBlock): Basic block. + in_channels (int): Number of input channels. + channels (int): Number of output channels. + num_blocks (int): Number of blocks. + stride (int): Stride of the first block. Default: 1. + + Returns: + nn.Sequential: The Branch Layer. + """ + downsample = None + if stride != 1 or in_channels != channels * block.expansion: + downsample = ConvModule( + in_channels, + channels * block.expansion, + kernel_size=1, + stride=stride, + norm_cfg=self.norm_cfg, + act_cfg=None) + + layers = [block(in_channels, channels, stride, downsample)] + in_channels = channels * block.expansion + for i in range(1, num_blocks): + layers.append( + block( + in_channels, + channels, + stride=1, + act_cfg_out=None if i == num_blocks - 1 else self.act_cfg)) + return nn.Sequential(*layers) + + def _make_single_layer(self, + block: Union[BasicBlock, Bottleneck], + in_channels: int, + channels: int, + stride: int = 1) -> nn.Module: + """Make single layer for PIDNet backbone. + Args: + block (BasicBlock or Bottleneck): Basic block or Bottleneck. + in_channels (int): Number of input channels. + channels (int): Number of output channels. + stride (int): Stride of the first block. Default: 1. + + Returns: + nn.Module + """ + + downsample = None + if stride != 1 or in_channels != channels * block.expansion: + downsample = ConvModule( + in_channels, + channels * block.expansion, + kernel_size=1, + stride=stride, + norm_cfg=self.norm_cfg, + act_cfg=None) + return block( + in_channels, channels, stride, downsample, act_cfg_out=None) + + def init_weights(self): + """Initialize the weights in backbone. + + Since the D branch is not initialized by the pre-trained model, we + initialize it with the same method as the ResNet. + """ + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_( + m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + if self.init_cfg is not None: + assert 'checkpoint' in self.init_cfg, f'Only support ' \ + f'specify `Pretrained` in ' \ + f'`init_cfg` in ' \ + f'{self.__class__.__name__} ' + ckpt = CheckpointLoader.load_checkpoint( + self.init_cfg['checkpoint'], map_location='cpu') + self.load_state_dict(ckpt, strict=False) + + def forward(self, x: Tensor) -> Union[Tensor, Tuple[Tensor]]: + """Forward function. + + Args: + x (Tensor): Input tensor with shape (B, C, H, W). + + Returns: + Tensor or tuple[Tensor]: If self.training is True, return + tuple[Tensor], else return Tensor. + """ + w_out = x.shape[-1] // 8 + h_out = x.shape[-2] // 8 + + # stage 0-2 + x = self.stem(x) + + # stage 3 + x_i = self.relu(self.i_branch_layers[0](x)) + x_p = self.p_branch_layers[0](x) + x_d = self.d_branch_layers[0](x) + + comp_i = self.compression_1(x_i) + x_p = self.pag_1(x_p, comp_i) + diff_i = self.diff_1(x_i) + x_d += F.interpolate( + diff_i, + size=[h_out, w_out], + mode='bilinear', + align_corners=self.align_corners) + if self.training: + temp_p = x_p.clone() + + # stage 4 + x_i = self.relu(self.i_branch_layers[1](x_i)) + x_p = self.p_branch_layers[1](self.relu(x_p)) + x_d = self.d_branch_layers[1](self.relu(x_d)) + + comp_i = self.compression_2(x_i) + x_p = self.pag_2(x_p, comp_i) + diff_i = self.diff_2(x_i) + x_d += F.interpolate( + diff_i, + size=[h_out, w_out], + mode='bilinear', + align_corners=self.align_corners) + if self.training: + temp_d = x_d.clone() + + # stage 5 + x_i = self.i_branch_layers[2](x_i) + x_p = self.p_branch_layers[2](self.relu(x_p)) + x_d = self.d_branch_layers[2](self.relu(x_d)) + + x_i = self.spp(x_i) + x_i = F.interpolate( + x_i, + size=[h_out, w_out], + mode='bilinear', + align_corners=self.align_corners) + out = self.dfm(x_p, x_i, x_d) + return (temp_p, out, temp_d) if self.training else out diff --git a/mmseg/models/decode_heads/__init__.py b/mmseg/models/decode_heads/__init__.py index b18152d7d9..18235456bc 100644 --- a/mmseg/models/decode_heads/__init__.py +++ b/mmseg/models/decode_heads/__init__.py @@ -12,6 +12,7 @@ from .fcn_head import FCNHead from .fpn_head import FPNHead from .gc_head import GCHead +from .ham_head import LightHamHead from .isa_head import ISAHead from .knet_head import IterativeDecodeHead, KernelUpdateHead, KernelUpdator from .lraspp_head import LRASPPHead @@ -19,6 +20,7 @@ from .maskformer_head import MaskFormerHead from .nl_head import NLHead from .ocr_head import OCRHead +from .pid_head import PIDHead from .point_head import PointHead from .psa_head import PSAHead from .psp_head import PSPHead @@ -38,5 +40,6 @@ 'PointHead', 'APCHead', 'DMHead', 'LRASPPHead', 'SETRUPHead', 'SETRMLAHead', 'DPTHead', 'SETRMLAHead', 'SegmenterMaskTransformerHead', 'SegformerHead', 'ISAHead', 'STDCHead', 'IterativeDecodeHead', - 'KernelUpdateHead', 'KernelUpdator', 'MaskFormerHead', 'Mask2FormerHead' + 'KernelUpdateHead', 'KernelUpdator', 'MaskFormerHead', 'Mask2FormerHead', + 'LightHamHead', 'PIDHead' ] diff --git a/mmseg/models/decode_heads/ham_head.py b/mmseg/models/decode_heads/ham_head.py new file mode 100644 index 0000000000..d80025f77d --- /dev/null +++ b/mmseg/models/decode_heads/ham_head.py @@ -0,0 +1,257 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# Originally from https://github.com/visual-attention-network/segnext +# Licensed under the Apache License, Version 2.0 (the "License") +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule + +from mmseg.registry import MODELS +from ..utils import resize +from .decode_head import BaseDecodeHead + + +class Matrix_Decomposition_2D_Base(nn.Module): + """Base class of 2D Matrix Decomposition. + + Args: + MD_S (int): The number of spatial coefficient in + Matrix Decomposition, it may be used for calculation + of the number of latent dimension D in Matrix + Decomposition. Defaults: 1. + MD_R (int): The number of latent dimension R in + Matrix Decomposition. Defaults: 64. + train_steps (int): The number of iteration steps in + Multiplicative Update (MU) rule to solve Non-negative + Matrix Factorization (NMF) in training. Defaults: 6. + eval_steps (int): The number of iteration steps in + Multiplicative Update (MU) rule to solve Non-negative + Matrix Factorization (NMF) in evaluation. Defaults: 7. + inv_t (int): Inverted multiple number to make coefficient + smaller in softmax. Defaults: 100. + rand_init (bool): Whether to initialize randomly. + Defaults: True. + """ + + def __init__(self, + MD_S=1, + MD_R=64, + train_steps=6, + eval_steps=7, + inv_t=100, + rand_init=True): + super().__init__() + + self.S = MD_S + self.R = MD_R + + self.train_steps = train_steps + self.eval_steps = eval_steps + + self.inv_t = inv_t + + self.rand_init = rand_init + + def _build_bases(self, B, S, D, R, cuda=False): + raise NotImplementedError + + def local_step(self, x, bases, coef): + raise NotImplementedError + + def local_inference(self, x, bases): + # (B * S, D, N)^T @ (B * S, D, R) -> (B * S, N, R) + coef = torch.bmm(x.transpose(1, 2), bases) + coef = F.softmax(self.inv_t * coef, dim=-1) + + steps = self.train_steps if self.training else self.eval_steps + for _ in range(steps): + bases, coef = self.local_step(x, bases, coef) + + return bases, coef + + def compute_coef(self, x, bases, coef): + raise NotImplementedError + + def forward(self, x, return_bases=False): + """Forward Function.""" + B, C, H, W = x.shape + + # (B, C, H, W) -> (B * S, D, N) + D = C // self.S + N = H * W + x = x.view(B * self.S, D, N) + cuda = 'cuda' in str(x.device) + if not self.rand_init and not hasattr(self, 'bases'): + bases = self._build_bases(1, self.S, D, self.R, cuda=cuda) + self.register_buffer('bases', bases) + + # (S, D, R) -> (B * S, D, R) + if self.rand_init: + bases = self._build_bases(B, self.S, D, self.R, cuda=cuda) + else: + bases = self.bases.repeat(B, 1, 1) + + bases, coef = self.local_inference(x, bases) + + # (B * S, N, R) + coef = self.compute_coef(x, bases, coef) + + # (B * S, D, R) @ (B * S, N, R)^T -> (B * S, D, N) + x = torch.bmm(bases, coef.transpose(1, 2)) + + # (B * S, D, N) -> (B, C, H, W) + x = x.view(B, C, H, W) + + return x + + +class NMF2D(Matrix_Decomposition_2D_Base): + """Non-negative Matrix Factorization (NMF) module. + + It is inherited from ``Matrix_Decomposition_2D_Base`` module. + """ + + def __init__(self, args=dict()): + super().__init__(**args) + + self.inv_t = 1 + + def _build_bases(self, B, S, D, R, cuda=False): + """Build bases in initialization.""" + if cuda: + bases = torch.rand((B * S, D, R)).cuda() + else: + bases = torch.rand((B * S, D, R)) + + bases = F.normalize(bases, dim=1) + + return bases + + def local_step(self, x, bases, coef): + """Local step in iteration to renew bases and coefficient.""" + # (B * S, D, N)^T @ (B * S, D, R) -> (B * S, N, R) + numerator = torch.bmm(x.transpose(1, 2), bases) + # (B * S, N, R) @ [(B * S, D, R)^T @ (B * S, D, R)] -> (B * S, N, R) + denominator = coef.bmm(bases.transpose(1, 2).bmm(bases)) + # Multiplicative Update + coef = coef * numerator / (denominator + 1e-6) + + # (B * S, D, N) @ (B * S, N, R) -> (B * S, D, R) + numerator = torch.bmm(x, coef) + # (B * S, D, R) @ [(B * S, N, R)^T @ (B * S, N, R)] -> (B * S, D, R) + denominator = bases.bmm(coef.transpose(1, 2).bmm(coef)) + # Multiplicative Update + bases = bases * numerator / (denominator + 1e-6) + + return bases, coef + + def compute_coef(self, x, bases, coef): + """Compute coefficient.""" + # (B * S, D, N)^T @ (B * S, D, R) -> (B * S, N, R) + numerator = torch.bmm(x.transpose(1, 2), bases) + # (B * S, N, R) @ (B * S, D, R)^T @ (B * S, D, R) -> (B * S, N, R) + denominator = coef.bmm(bases.transpose(1, 2).bmm(bases)) + # multiplication update + coef = coef * numerator / (denominator + 1e-6) + + return coef + + +class Hamburger(nn.Module): + """Hamburger Module. It consists of one slice of "ham" (matrix + decomposition) and two slices of "bread" (linear transformation). + + Args: + ham_channels (int): Input and output channels of feature. + ham_kwargs (dict): Config of matrix decomposition module. + norm_cfg (dict | None): Config of norm layers. + """ + + def __init__(self, + ham_channels=512, + ham_kwargs=dict(), + norm_cfg=None, + **kwargs): + super().__init__() + + self.ham_in = ConvModule( + ham_channels, ham_channels, 1, norm_cfg=None, act_cfg=None) + + self.ham = NMF2D(ham_kwargs) + + self.ham_out = ConvModule( + ham_channels, ham_channels, 1, norm_cfg=norm_cfg, act_cfg=None) + + def forward(self, x): + enjoy = self.ham_in(x) + enjoy = F.relu(enjoy, inplace=True) + enjoy = self.ham(enjoy) + enjoy = self.ham_out(enjoy) + ham = F.relu(x + enjoy, inplace=True) + + return ham + + +@MODELS.register_module() +class LightHamHead(BaseDecodeHead): + """SegNeXt decode head. + + This decode head is the implementation of `SegNeXt: Rethinking + Convolutional Attention Design for Semantic + Segmentation `_. + Inspiration from https://github.com/visual-attention-network/segnext. + + Specifically, LightHamHead is inspired by HamNet from + `Is Attention Better Than Matrix Decomposition? + `. + + Args: + ham_channels (int): input channels for Hamburger. + Defaults: 512. + ham_kwargs (int): kwagrs for Ham. Defaults: dict(). + """ + + def __init__(self, ham_channels=512, ham_kwargs=dict(), **kwargs): + super().__init__(input_transform='multiple_select', **kwargs) + self.ham_channels = ham_channels + + self.squeeze = ConvModule( + sum(self.in_channels), + self.ham_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + self.hamburger = Hamburger(ham_channels, ham_kwargs, **kwargs) + + self.align = ConvModule( + self.ham_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + inputs = self._transform_inputs(inputs) + + inputs = [ + resize( + level, + size=inputs[0].shape[2:], + mode='bilinear', + align_corners=self.align_corners) for level in inputs + ] + + inputs = torch.cat(inputs, dim=1) + # apply a conv block to squeeze feature map + x = self.squeeze(inputs) + # apply hamburger module + x = self.hamburger(x) + + # apply a conv block to align feature map + output = self.align(x) + output = self.cls_seg(output) + return output diff --git a/mmseg/models/decode_heads/pid_head.py b/mmseg/models/decode_heads/pid_head.py new file mode 100644 index 0000000000..c092cb32d0 --- /dev/null +++ b/mmseg/models/decode_heads/pid_head.py @@ -0,0 +1,183 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Optional, Tuple, Union + +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule, build_activation_layer, build_norm_layer +from mmengine.model import BaseModule +from torch import Tensor + +from mmseg.models.decode_heads.decode_head import BaseDecodeHead +from mmseg.models.losses import accuracy +from mmseg.models.utils import resize +from mmseg.registry import MODELS +from mmseg.utils import OptConfigType, SampleList + + +class BasePIDHead(BaseModule): + """Base class for PID head. + + Args: + in_channels (int): Number of input channels. + channels (int): Number of output channels. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU', inplace=True). + init_cfg (dict or list[dict], optional): Init config dict. + Default: None. + """ + + def __init__(self, + in_channels: int, + channels: int, + norm_cfg: OptConfigType = dict(type='BN'), + act_cfg: OptConfigType = dict(type='ReLU', inplace=True), + init_cfg: OptConfigType = None): + super().__init__(init_cfg) + self.conv = ConvModule( + in_channels, + channels, + kernel_size=3, + padding=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + order=('norm', 'act', 'conv')) + _, self.norm = build_norm_layer(norm_cfg, num_features=channels) + self.act = build_activation_layer(act_cfg) + + def forward(self, x: Tensor, cls_seg: Optional[nn.Module]) -> Tensor: + """Forward function. + Args: + x (Tensor): Input tensor. + cls_seg (nn.Module, optional): The classification head. + + Returns: + Tensor: Output tensor. + """ + x = self.conv(x) + x = self.norm(x) + x = self.act(x) + if cls_seg is not None: + x = cls_seg(x) + return x + + +@MODELS.register_module() +class PIDHead(BaseDecodeHead): + """Decode head for PIDNet. + + Args: + in_channels (int): Number of input channels. + channels (int): Number of output channels. + num_classes (int): Number of classes. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU', inplace=True). + """ + + def __init__(self, + in_channels: int, + channels: int, + num_classes: int, + norm_cfg: OptConfigType = dict(type='BN'), + act_cfg: OptConfigType = dict(type='ReLU', inplace=True), + **kwargs): + super().__init__( + in_channels, + channels, + num_classes=num_classes, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **kwargs) + self.i_head = BasePIDHead(in_channels, channels, norm_cfg, act_cfg) + self.p_head = BasePIDHead(in_channels // 2, channels, norm_cfg, + act_cfg) + self.d_head = BasePIDHead( + in_channels // 2, + in_channels // 4, + norm_cfg, + ) + self.p_cls_seg = nn.Conv2d(channels, self.out_channels, kernel_size=1) + self.d_cls_seg = nn.Conv2d(in_channels // 4, 1, kernel_size=1) + + def init_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_( + m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + def forward( + self, + inputs: Union[Tensor, + Tuple[Tensor]]) -> Union[Tensor, Tuple[Tensor]]: + """Forward function. + Args: + inputs (Tensor | tuple[Tensor]): Input tensor or tuple of + Tensor. When training, the input is a tuple of three tensors, + (p_feat, i_feat, d_feat), and the output is a tuple of three + tensors, (p_seg_logit, i_seg_logit, d_seg_logit). + When inference, only the head of integral branch is used, and + input is a tensor of integral feature map, and the output is + the segmentation logit. + + Returns: + Tensor | tuple[Tensor]: Output tensor or tuple of tensors. + """ + if self.training: + x_p, x_i, x_d = inputs + x_p = self.p_head(x_p, self.p_cls_seg) + x_i = self.i_head(x_i, self.cls_seg) + x_d = self.d_head(x_d, self.d_cls_seg) + return x_p, x_i, x_d + else: + return self.i_head(inputs, self.cls_seg) + + def _stack_batch_gt(self, batch_data_samples: SampleList) -> Tuple[Tensor]: + gt_semantic_segs = [ + data_sample.gt_sem_seg.data for data_sample in batch_data_samples + ] + gt_edge_segs = [ + data_sample.gt_edge_map.data for data_sample in batch_data_samples + ] + gt_sem_segs = torch.stack(gt_semantic_segs, dim=0) + gt_edge_segs = torch.stack(gt_edge_segs, dim=0) + return gt_sem_segs, gt_edge_segs + + def loss_by_feat(self, seg_logits: Tuple[Tensor], + batch_data_samples: SampleList) -> dict: + loss = dict() + p_logit, i_logit, d_logit = seg_logits + sem_label, bd_label = self._stack_batch_gt(batch_data_samples) + p_logit = resize( + input=p_logit, + size=sem_label.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + i_logit = resize( + input=i_logit, + size=sem_label.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + d_logit = resize( + input=d_logit, + size=bd_label.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + sem_label = sem_label.squeeze(1) + bd_label = bd_label.squeeze(1) + loss['loss_sem_p'] = self.loss_decode[0]( + p_logit, sem_label, ignore_index=self.ignore_index) + loss['loss_sem_i'] = self.loss_decode[1](i_logit, sem_label) + loss['loss_bd'] = self.loss_decode[2](d_logit, bd_label) + filler = torch.ones_like(sem_label) * self.ignore_index + sem_bd_label = torch.where( + torch.sigmoid(d_logit[:, 0, :, :]) > 0.8, sem_label, filler) + loss['loss_sem_bd'] = self.loss_decode[3](i_logit, sem_bd_label) + loss['acc_seg'] = accuracy( + i_logit, sem_label, ignore_index=self.ignore_index) + return loss diff --git a/mmseg/models/losses/__init__.py b/mmseg/models/losses/__init__.py index d7e019747d..2f7e39cb28 100644 --- a/mmseg/models/losses/__init__.py +++ b/mmseg/models/losses/__init__.py @@ -1,10 +1,12 @@ # Copyright (c) OpenMMLab. All rights reserved. from .accuracy import Accuracy, accuracy +from .boundary_loss import BoundaryLoss from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy, cross_entropy, mask_cross_entropy) from .dice_loss import DiceLoss from .focal_loss import FocalLoss from .lovasz_loss import LovaszLoss +from .ohem_cross_entropy_loss import OhemCrossEntropy from .tversky_loss import TverskyLoss from .utils import reduce_loss, weight_reduce_loss, weighted_loss @@ -12,5 +14,5 @@ 'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy', 'mask_cross_entropy', 'CrossEntropyLoss', 'reduce_loss', 'weight_reduce_loss', 'weighted_loss', 'LovaszLoss', 'DiceLoss', - 'FocalLoss', 'TverskyLoss' + 'FocalLoss', 'TverskyLoss', 'OhemCrossEntropy', 'BoundaryLoss' ] diff --git a/mmseg/models/losses/boundary_loss.py b/mmseg/models/losses/boundary_loss.py new file mode 100644 index 0000000000..e86b850d87 --- /dev/null +++ b/mmseg/models/losses/boundary_loss.py @@ -0,0 +1,62 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch import Tensor + +from mmseg.registry import MODELS + + +@MODELS.register_module() +class BoundaryLoss(nn.Module): + """Boundary loss. + + This function is modified from + `PIDNet `_. # noqa + Licensed under the MIT License. + + + Args: + loss_weight (float): Weight of the loss. Defaults to 1.0. + loss_name (str): Name of the loss item. If you want this loss + item to be included into the backward graph, `loss_` must be the + prefix of the name. Defaults to 'loss_boundary'. + """ + + def __init__(self, + loss_weight: float = 1.0, + loss_name: str = 'loss_boundary'): + super().__init__() + self.loss_weight = loss_weight + self.loss_name_ = loss_name + + def forward(self, bd_pre: Tensor, bd_gt: Tensor) -> Tensor: + """Forward function. + Args: + bd_pre (Tensor): Predictions of the boundary head. + bd_gt (Tensor): Ground truth of the boundary. + + Returns: + Tensor: Loss tensor. + """ + log_p = bd_pre.permute(0, 2, 3, 1).contiguous().view(1, -1) + target_t = bd_gt.view(1, -1).float() + + pos_index = (target_t == 1) + neg_index = (target_t == 0) + + weight = torch.zeros_like(log_p) + pos_num = pos_index.sum() + neg_num = neg_index.sum() + sum_num = pos_num + neg_num + weight[pos_index] = neg_num * 1.0 / sum_num + weight[neg_index] = pos_num * 1.0 / sum_num + + loss = F.binary_cross_entropy_with_logits( + log_p, target_t, weight, reduction='mean') + + return self.loss_weight * loss + + @property + def loss_name(self): + return self.loss_name_ diff --git a/mmseg/models/losses/ohem_cross_entropy_loss.py b/mmseg/models/losses/ohem_cross_entropy_loss.py new file mode 100644 index 0000000000..a519b4d84e --- /dev/null +++ b/mmseg/models/losses/ohem_cross_entropy_loss.py @@ -0,0 +1,94 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import List, Optional, Union + +import torch.nn as nn +import torch.nn.functional as F +from torch import Tensor + +from mmseg.registry import MODELS + + +@MODELS.register_module() +class OhemCrossEntropy(nn.Module): + """OhemCrossEntropy loss. + + This func is modified from + `PIDNet `_. # noqa + + Licensed under the MIT License. + + Args: + ignore_label (int): Labels to ignore when computing the loss. + Default: 255 + thresh (float, optional): The threshold for hard example selection. + Below which, are prediction with low confidence. If not + specified, the hard examples will be pixels of top ``min_kept`` + loss. Default: 0.7. + min_kept (int, optional): The minimum number of predictions to keep. + Default: 100000. + loss_weight (float): Weight of the loss. Defaults to 1.0. + class_weight (list[float] | str, optional): Weight of each class. If in + str format, read them from a file. Defaults to None. + loss_name (str): Name of the loss item. If you want this loss + item to be included into the backward graph, `loss_` must be the + prefix of the name. Defaults to 'loss_boundary'. + """ + + def __init__(self, + ignore_label: int = 255, + thres: float = 0.7, + min_kept: int = 100000, + loss_weight: float = 1.0, + class_weight: Optional[Union[List[float], str]] = None, + loss_name: str = 'loss_ohem'): + super().__init__() + self.thresh = thres + self.min_kept = max(1, min_kept) + self.ignore_label = ignore_label + self.loss_weight = loss_weight + self.loss_name_ = loss_name + self.class_weight = class_weight + + def forward(self, score: Tensor, target: Tensor) -> Tensor: + """Forward function. + Args: + score (Tensor): Predictions of the segmentation head. + target (Tensor): Ground truth of the image. + + Returns: + Tensor: Loss tensor. + """ + # score: (N, C, H, W) + pred = F.softmax(score, dim=1) + if self.class_weight is not None: + class_weight = score.new_tensor(self.class_weight) + else: + class_weight = None + + pixel_losses = F.cross_entropy( + score, + target, + weight=class_weight, + ignore_index=self.ignore_label, + reduction='none').contiguous().view(-1) # (N*H*W) + mask = target.contiguous().view(-1) != self.ignore_label # (N*H*W) + + tmp_target = target.clone() # (N, H, W) + tmp_target[tmp_target == self.ignore_label] = 0 + # pred: (N, C, H, W) -> (N*H*W, C) + pred = pred.gather(1, tmp_target.unsqueeze(1)) + # pred: (N*H*W, C) -> (N*H*W), ind: (N*H*W) + pred, ind = pred.contiguous().view(-1, )[mask].contiguous().sort() + if pred.numel() > 0: + min_value = pred[min(self.min_kept, pred.numel() - 1)] + else: + return score.new_tensor(0.0) + threshold = max(min_value, self.thresh) + + pixel_losses = pixel_losses[mask][ind] + pixel_losses = pixel_losses[pred < threshold] + return self.loss_weight * pixel_losses.mean() + + @property + def loss_name(self): + return self.loss_name_ diff --git a/mmseg/models/utils/__init__.py b/mmseg/models/utils/__init__.py index 7aaa600c2d..fc142f16fc 100644 --- a/mmseg/models/utils/__init__.py +++ b/mmseg/models/utils/__init__.py @@ -1,8 +1,10 @@ # Copyright (c) OpenMMLab. All rights reserved. +from .basic_block import BasicBlock, Bottleneck from .embed import PatchEmbed from .encoding import Encoding from .inverted_residual import InvertedResidual, InvertedResidualV3 from .make_divisible import make_divisible +from .ppm import DAPPM, PAPPM from .res_layer import ResLayer from .se_layer import SELayer from .self_attention_block import SelfAttentionBlock @@ -15,5 +17,5 @@ 'ResLayer', 'SelfAttentionBlock', 'make_divisible', 'InvertedResidual', 'UpConvBlock', 'InvertedResidualV3', 'SELayer', 'PatchEmbed', 'nchw_to_nlc', 'nlc_to_nchw', 'nchw2nlc2nchw', 'nlc2nchw2nlc', 'Encoding', - 'Upsample', 'resize' + 'Upsample', 'resize', 'DAPPM', 'PAPPM', 'BasicBlock', 'Bottleneck' ] diff --git a/mmseg/models/utils/basic_block.py b/mmseg/models/utils/basic_block.py new file mode 100644 index 0000000000..4e1ad8146d --- /dev/null +++ b/mmseg/models/utils/basic_block.py @@ -0,0 +1,143 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Optional + +import torch.nn as nn +from mmcv.cnn import ConvModule +from mmengine.model import BaseModule +from torch import Tensor + +from mmseg.registry import MODELS +from mmseg.utils import OptConfigType + + +class BasicBlock(BaseModule): + """Basic block from `ResNet `_. + + Args: + in_channels (int): Input channels. + channels (int): Output channels. + stride (int): Stride of the first block. Default: 1. + downsample (nn.Module, optional): Downsample operation on identity. + Default: None. + norm_cfg (dict, optional): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict, optional): Config dict for activation layer in + ConvModule. Default: dict(type='ReLU', inplace=True). + act_cfg_out (dict, optional): Config dict for activation layer at the + last of the block. Default: None. + init_cfg (dict, optional): Initialization config dict. Default: None. + """ + + expansion = 1 + + def __init__(self, + in_channels: int, + channels: int, + stride: int = 1, + downsample: nn.Module = None, + norm_cfg: OptConfigType = dict(type='BN'), + act_cfg: OptConfigType = dict(type='ReLU', inplace=True), + act_cfg_out: OptConfigType = dict(type='ReLU', inplace=True), + init_cfg: OptConfigType = None): + super().__init__(init_cfg) + self.conv1 = ConvModule( + in_channels, + channels, + kernel_size=3, + stride=stride, + padding=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.conv2 = ConvModule( + channels, + channels, + kernel_size=3, + padding=1, + norm_cfg=norm_cfg, + act_cfg=None) + self.downsample = downsample + if act_cfg_out: + self.act = MODELS.build(act_cfg_out) + + def forward(self, x: Tensor) -> Tensor: + residual = x + out = self.conv1(x) + out = self.conv2(out) + + if self.downsample: + residual = self.downsample(x) + + out += residual + + if hasattr(self, 'act'): + out = self.act(out) + + return out + + +class Bottleneck(BaseModule): + """Bottleneck block from `ResNet `_. + + Args: + in_channels (int): Input channels. + channels (int): Output channels. + stride (int): Stride of the first block. Default: 1. + downsample (nn.Module, optional): Downsample operation on identity. + Default: None. + norm_cfg (dict, optional): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict, optional): Config dict for activation layer in + ConvModule. Default: dict(type='ReLU', inplace=True). + act_cfg_out (dict, optional): Config dict for activation layer at + the last of the block. Default: None. + init_cfg (dict, optional): Initialization config dict. Default: None. + """ + + expansion = 2 + + def __init__(self, + in_channels: int, + channels: int, + stride: int = 1, + downsample: Optional[nn.Module] = None, + norm_cfg: OptConfigType = dict(type='BN'), + act_cfg: OptConfigType = dict(type='ReLU', inplace=True), + act_cfg_out: OptConfigType = None, + init_cfg: OptConfigType = None): + super().__init__(init_cfg) + self.conv1 = ConvModule( + in_channels, channels, 1, norm_cfg=norm_cfg, act_cfg=act_cfg) + self.conv2 = ConvModule( + channels, + channels, + 3, + stride, + 1, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.conv3 = ConvModule( + channels, + channels * self.expansion, + 1, + norm_cfg=norm_cfg, + act_cfg=None) + if act_cfg_out: + self.act = MODELS.build(act_cfg_out) + self.downsample = downsample + + def forward(self, x: Tensor) -> Tensor: + residual = x + + out = self.conv1(x) + out = self.conv2(out) + out = self.conv3(out) + + if self.downsample: + residual = self.downsample(x) + + out += residual + + if hasattr(self, 'act'): + out = self.act(out) + + return out diff --git a/mmseg/models/utils/ppm.py b/mmseg/models/utils/ppm.py new file mode 100644 index 0000000000..5fe6ff26fa --- /dev/null +++ b/mmseg/models/utils/ppm.py @@ -0,0 +1,193 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Dict, List + +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule +from mmengine.model import BaseModule, ModuleList, Sequential +from torch import Tensor + + +class DAPPM(BaseModule): + """DAPPM module in `DDRNet `_. + + Args: + in_channels (int): Input channels. + branch_channels (int): Branch channels. + out_channels (int): Output channels. + num_scales (int): Number of scales. + kernel_sizes (list[int]): Kernel sizes of each scale. + strides (list[int]): Strides of each scale. + paddings (list[int]): Paddings of each scale. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU', inplace=True). + conv_cfg (dict): Config dict for convolution layer in ConvModule. + Default: dict(order=('norm', 'act', 'conv'), bias=False). + upsample_mode (str): Upsample mode. Default: 'bilinear'. + """ + + def __init__(self, + in_channels: int, + branch_channels: int, + out_channels: int, + num_scales: int, + kernel_sizes: List[int] = [5, 9, 17], + strides: List[int] = [2, 4, 8], + paddings: List[int] = [2, 4, 8], + norm_cfg: Dict = dict(type='BN', momentum=0.1), + act_cfg: Dict = dict(type='ReLU', inplace=True), + conv_cfg: Dict = dict( + order=('norm', 'act', 'conv'), bias=False), + upsample_mode: str = 'bilinear'): + super().__init__() + + self.num_scales = num_scales + self.unsample_mode = upsample_mode + self.in_channels = in_channels + self.branch_channels = branch_channels + self.out_channels = out_channels + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.conv_cfg = conv_cfg + + self.scales = ModuleList([ + ConvModule( + in_channels, + branch_channels, + kernel_size=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **conv_cfg) + ]) + for i in range(1, num_scales - 1): + self.scales.append( + Sequential(*[ + nn.AvgPool2d( + kernel_size=kernel_sizes[i - 1], + stride=strides[i - 1], + padding=paddings[i - 1]), + ConvModule( + in_channels, + branch_channels, + kernel_size=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **conv_cfg) + ])) + self.scales.append( + Sequential(*[ + nn.AdaptiveAvgPool2d((1, 1)), + ConvModule( + in_channels, + branch_channels, + kernel_size=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **conv_cfg) + ])) + self.processes = ModuleList() + for i in range(num_scales - 1): + self.processes.append( + ConvModule( + branch_channels, + branch_channels, + kernel_size=3, + padding=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **conv_cfg)) + + self.compression = ConvModule( + branch_channels * num_scales, + out_channels, + kernel_size=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **conv_cfg) + + self.shortcut = ConvModule( + in_channels, + out_channels, + kernel_size=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **conv_cfg) + + def forward(self, inputs: Tensor): + feats = [] + feats.append(self.scales[0](inputs)) + + for i in range(1, self.num_scales): + feat_up = F.interpolate( + self.scales[i](inputs), + size=inputs.shape[2:], + mode=self.unsample_mode) + feats.append(self.processes[i - 1](feat_up + feats[i - 1])) + + return self.compression(torch.cat(feats, + dim=1)) + self.shortcut(inputs) + + +class PAPPM(DAPPM): + """PAPPM module in `PIDNet `_. + + Args: + in_channels (int): Input channels. + branch_channels (int): Branch channels. + out_channels (int): Output channels. + num_scales (int): Number of scales. + kernel_sizes (list[int]): Kernel sizes of each scale. + strides (list[int]): Strides of each scale. + paddings (list[int]): Paddings of each scale. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN', momentum=0.1). + act_cfg (dict): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU', inplace=True). + conv_cfg (dict): Config dict for convolution layer in ConvModule. + Default: dict(order=('norm', 'act', 'conv'), bias=False). + upsample_mode (str): Upsample mode. Default: 'bilinear'. + """ + + def __init__(self, + in_channels: int, + branch_channels: int, + out_channels: int, + num_scales: int, + kernel_sizes: List[int] = [5, 9, 17], + strides: List[int] = [2, 4, 8], + paddings: List[int] = [2, 4, 8], + norm_cfg: Dict = dict(type='BN', momentum=0.1), + act_cfg: Dict = dict(type='ReLU', inplace=True), + conv_cfg: Dict = dict( + order=('norm', 'act', 'conv'), bias=False), + upsample_mode: str = 'bilinear'): + super().__init__(in_channels, branch_channels, out_channels, + num_scales, kernel_sizes, strides, paddings, norm_cfg, + act_cfg, conv_cfg, upsample_mode) + + self.processes = ConvModule( + self.branch_channels * (self.num_scales - 1), + self.branch_channels * (self.num_scales - 1), + kernel_size=3, + padding=1, + groups=self.num_scales - 1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + **self.conv_cfg) + + def forward(self, inputs: Tensor): + x_ = self.scales[0](inputs) + feats = [] + for i in range(1, self.num_scales): + feat_up = F.interpolate( + self.scales[i](inputs), + size=inputs.shape[2:], + mode=self.unsample_mode, + align_corners=False) + feats.append(feat_up + x_) + scale_out = self.processes(torch.cat(feats, dim=1)) + return self.compression(torch.cat([x_, scale_out], + dim=1)) + self.shortcut(inputs) diff --git a/mmseg/utils/class_names.py b/mmseg/utils/class_names.py index 662199f21e..961a08520d 100644 --- a/mmseg/utils/class_names.py +++ b/mmseg/utils/class_names.py @@ -126,6 +126,126 @@ def stare_classes(): return ['background', 'vessel'] +def mapillary_v1_classes(): + """mapillary_v1 class names for external use.""" + return [ + 'Bird', 'Ground Animal', 'Curb', 'Fence', 'Guard Rail', 'Barrier', + 'Wall', 'Bike Lane', 'Crosswalk - Plain', 'Curb Cut', 'Parking', + 'Pedestrian Area', 'Rail Track', 'Road', 'Service Lane', 'Sidewalk', + 'Bridge', 'Building', 'Tunnel', 'Person', 'Bicyclist', 'Motorcyclist', + 'Other Rider', 'Lane Marking - Crosswalk', 'Lane Marking - General', + 'Mountain', 'Sand', 'Sky', 'Snow', 'Terrain', 'Vegetation', 'Water', + 'Banner', 'Bench', 'Bike Rack', 'Billboard', 'Catch Basin', + 'CCTV Camera', 'Fire Hydrant', 'Junction Box', 'Mailbox', 'Manhole', + 'Phone Booth', 'Pothole', 'Street Light', 'Pole', 'Traffic Sign Frame', + 'Utility Pole', 'Traffic Light', 'Traffic Sign (Back)', + 'Traffic Sign (Front)', 'Trash Can', 'Bicycle', 'Boat', 'Bus', 'Car', + 'Caravan', 'Motorcycle', 'On Rails', 'Other Vehicle', 'Trailer', + 'Truck', 'Wheeled Slow', 'Car Mount', 'Ego Vehicle', 'Unlabeled' + ] + + +def mapillary_v1_palette(): + """mapillary_v1_ palette for external use.""" + return [[165, 42, 42], [0, 192, 0], [196, 196, 196], [190, 153, 153], + [180, 165, 180], [90, 120, 150], [102, 102, 156], [128, 64, 255], + [140, 140, 200], [170, 170, 170], [250, 170, 160], [96, 96, 96], + [230, 150, 140], [128, 64, 128], [110, 110, 110], [244, 35, 232], + [150, 100, 100], [70, 70, 70], [150, 120, 90], [220, 20, 60], + [255, 0, 0], [255, 0, 100], [255, 0, 200], [200, 128, 128], + [255, 255, 255], [64, 170, 64], [230, 160, 50], [70, 130, 180], + [190, 255, 255], [152, 251, 152], [107, 142, 35], [0, 170, 30], + [255, 255, 128], [250, 0, 30], [100, 140, 180], [220, 220, 220], + [220, 128, 128], [222, 40, 40], [100, 170, 30], [40, 40, 40], + [33, 33, 33], [100, 128, 160], [142, 0, 0], [70, 100, 150], + [210, 170, 100], [153, 153, 153], [128, 128, 128], [0, 0, 80], + [250, 170, 30], [192, 192, 192], [220, 220, 0], [140, 140, 20], + [119, 11, 32], [150, 0, 255], [0, 60, 100], [0, 0, 142], + [0, 0, 90], [0, 0, 230], [0, 80, 100], [128, 64, 64], [0, 0, 110], + [0, 0, 70], [0, 0, 192], [32, 32, 32], [120, 10, 10], [0, 0, 0]] + + +def mapillary_v2_classes(): + """mapillary_v2 class names for external use.""" + return [ + 'Bird', 'Ground Animal', 'Ambiguous Barrier', 'Concrete Block', 'Curb', + 'Fence', 'Guard Rail', 'Barrier', 'Road Median', 'Road Side', + 'Lane Separator', 'Temporary Barrier', 'Wall', 'Bike Lane', + 'Crosswalk - Plain', 'Curb Cut', 'Driveway', 'Parking', + 'Parking Aisle', 'Pedestrian Area', 'Rail Track', 'Road', + 'Road Shoulder', 'Service Lane', 'Sidewalk', 'Traffic Island', + 'Bridge', 'Building', 'Garage', 'Tunnel', 'Person', 'Person Group', + 'Bicyclist', 'Motorcyclist', 'Other Rider', + 'Lane Marking - Dashed Line', 'Lane Marking - Straight Line', + 'Lane Marking - Zigzag Line', 'Lane Marking - Ambiguous', + 'Lane Marking - Arrow (Left)', 'Lane Marking - Arrow (Other)', + 'Lane Marking - Arrow (Right)', + 'Lane Marking - Arrow (Split Left or Straight)', + 'Lane Marking - Arrow (Split Right or Straight)', + 'Lane Marking - Arrow (Straight)', 'Lane Marking - Crosswalk', + 'Lane Marking - Give Way (Row)', 'Lane Marking - Give Way (Single)', + 'Lane Marking - Hatched (Chevron)', + 'Lane Marking - Hatched (Diagonal)', 'Lane Marking - Other', + 'Lane Marking - Stop Line', 'Lane Marking - Symbol (Bicycle)', + 'Lane Marking - Symbol (Other)', 'Lane Marking - Text', + 'Lane Marking (only) - Dashed Line', 'Lane Marking (only) - Crosswalk', + 'Lane Marking (only) - Other', 'Lane Marking (only) - Test', + 'Mountain', 'Sand', 'Sky', 'Snow', 'Terrain', 'Vegetation', 'Water', + 'Banner', 'Bench', 'Bike Rack', 'Catch Basin', 'CCTV Camera', + 'Fire Hydrant', 'Junction Box', 'Mailbox', 'Manhole', 'Parking Meter', + 'Phone Booth', 'Pothole', 'Signage - Advertisement', + 'Signage - Ambiguous', 'Signage - Back', 'Signage - Information', + 'Signage - Other', 'Signage - Store', 'Street Light', 'Pole', + 'Pole Group', 'Traffic Sign Frame', 'Utility Pole', 'Traffic Cone', + 'Traffic Light - General (Single)', 'Traffic Light - Pedestrians', + 'Traffic Light - General (Upright)', + 'Traffic Light - General (Horizontal)', 'Traffic Light - Cyclists', + 'Traffic Light - Other', 'Traffic Sign - Ambiguous', + 'Traffic Sign (Back)', 'Traffic Sign - Direction (Back)', + 'Traffic Sign - Direction (Front)', 'Traffic Sign (Front)', + 'Traffic Sign - Parking', 'Traffic Sign - Temporary (Back)', + 'Traffic Sign - Temporary (Front)', 'Trash Can', 'Bicycle', 'Boat', + 'Bus', 'Car', 'Caravan', 'Motorcycle', 'On Rails', 'Other Vehicle', + 'Trailer', 'Truck', 'Vehicle Group', 'Wheeled Slow', 'Water Valve', + 'Car Mount', 'Dynamic', 'Ego Vehicle', 'Ground', 'Static', 'Unlabeled' + ] + + +def mapillary_v2_palette(): + """mapillary_v2_ palette for external use.""" + return [[165, 42, 42], [0, 192, 0], [250, 170, 31], [250, 170, 32], + [196, 196, 196], [190, 153, 153], [180, 165, 180], [90, 120, 150], + [250, 170, 33], [250, 170, 34], [128, 128, 128], [250, 170, 35], + [102, 102, 156], [128, 64, 255], [140, 140, 200], [170, 170, 170], + [250, 170, 36], [250, 170, 160], [250, 170, 37], [96, 96, 96], + [230, 150, 140], [128, 64, 128], [110, 110, 110], [110, 110, 110], + [244, 35, 232], [128, 196, 128], [150, 100, 100], [70, 70, 70], + [150, 150, 150], [150, 120, 90], [220, 20, 60], [220, 20, 60], + [255, 0, 0], [255, 0, 100], [255, 0, 200], [255, 255, 255], + [255, 255, 255], [250, 170, 29], [250, 170, 28], [250, 170, 26], + [250, 170, 25], [250, 170, 24], [250, 170, 22], [250, 170, 21], + [250, 170, 20], [255, 255, 255], [250, 170, 19], [250, 170, 18], + [250, 170, 12], [250, 170, 11], [255, 255, 255], [255, 255, 255], + [250, 170, 16], [250, 170, 15], [250, 170, 15], [255, 255, 255], + [255, 255, 255], [255, 255, 255], [255, 255, 255], [64, 170, 64], + [230, 160, 50], [70, 130, 180], [190, 255, 255], [152, 251, 152], + [107, 142, 35], [0, 170, 30], [255, 255, 128], [250, 0, 30], + [100, 140, 180], [220, 128, 128], [222, 40, 40], [100, 170, 30], + [40, 40, 40], [33, 33, 33], [100, 128, 160], [20, 20, 255], + [142, 0, 0], [70, 100, 150], [250, 171, 30], [250, 172, 30], + [250, 173, 30], [250, 174, 30], [250, 175, 30], [250, 176, 30], + [210, 170, 100], [153, 153, 153], [153, 153, 153], [128, 128, 128], + [0, 0, 80], [210, 60, 60], [250, 170, 30], [250, 170, 30], + [250, 170, 30], [250, 170, 30], [250, 170, 30], [250, 170, 30], + [192, 192, 192], [192, 192, 192], [192, 192, 192], [220, 220, 0], + [220, 220, 0], [0, 0, 196], [192, 192, 192], [220, 220, 0], + [140, 140, 20], [119, 11, 32], [150, 0, 255], [0, 60, 100], + [0, 0, 142], [0, 0, 90], [0, 0, 230], [0, 80, 100], [128, 64, 64], + [0, 0, 110], [0, 0, 70], [0, 0, 142], [0, 0, 192], [170, 170, 170], + [32, 32, 32], [111, 74, 0], [120, 10, 10], [81, 0, 81], + [111, 111, 0], [0, 0, 0]] + + def cityscapes_palette(): """Cityscapes palette for external use.""" return [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], @@ -313,7 +433,9 @@ def lip_palette(): ], 'isaid': ['isaid', 'iSAID'], 'stare': ['stare', 'STARE'], - 'lip': ['LIP', 'lip'] + 'lip': ['LIP', 'lip'], + 'mapillary_v1': ['mapillary_v1'], + 'mapillary_v2': ['mapillary_v2'] } diff --git a/mmseg/visualization/local_visualizer.py b/mmseg/visualization/local_visualizer.py index d11ad79c81..504004dfcb 100644 --- a/mmseg/visualization/local_visualizer.py +++ b/mmseg/visualization/local_visualizer.py @@ -31,7 +31,7 @@ class SegLocalVisualizer(Visualizer): `cityscapes` classes by default. Defaults to None. palette (list, optional): Input palette for result rendering, which is a list of color palette responding to the classes. Defaults to None. - dataset_name (str, optional): `Dataset name or alias `_ + dataset_name (str, optional): `Dataset name or alias `_ visulizer will use the meta information of the dataset i.e. classes and palette, but the `classes` and `palette` have higher priority. Defaults to None. @@ -133,7 +133,7 @@ def set_dataset_meta(self, palette (list, optional): Input palette for result rendering, which is a list of color palette responding to the classes. Defaults to None. - dataset_name (str, optional): `Dataset name or alias `_ + dataset_name (str, optional): `Dataset name or alias `_ visulizer will use the meta information of the dataset i.e. classes and palette, but the `classes` and `palette` have higher priority. Defaults to None. diff --git a/model-index.yml b/model-index.yml index ae96bd30f7..5e87c386dd 100644 --- a/model-index.yml +++ b/model-index.yml @@ -1,48 +1,50 @@ Import: -- configs/ann/ann.yml -- configs/apcnet/apcnet.yml -- configs/beit/beit.yml -- configs/bisenetv1/bisenetv1.yml -- configs/bisenetv2/bisenetv2.yml -- configs/ccnet/ccnet.yml -- configs/cgnet/cgnet.yml -- configs/convnext/convnext.yml -- configs/danet/danet.yml -- configs/deeplabv3/deeplabv3.yml -- configs/deeplabv3plus/deeplabv3plus.yml -- configs/dmnet/dmnet.yml -- configs/dnlnet/dnlnet.yml -- configs/dpt/dpt.yml -- configs/emanet/emanet.yml -- configs/encnet/encnet.yml -- configs/erfnet/erfnet.yml -- configs/fastfcn/fastfcn.yml -- configs/fastscnn/fastscnn.yml -- configs/fcn/fcn.yml -- configs/gcnet/gcnet.yml -- configs/hrnet/hrnet.yml -- configs/icnet/icnet.yml -- configs/isanet/isanet.yml -- configs/knet/knet.yml -- configs/mae/mae.yml -- configs/mask2former/mask2former.yml -- configs/maskformer/maskformer.yml -- configs/mobilenet_v2/mobilenet_v2.yml -- configs/mobilenet_v3/mobilenet_v3.yml -- configs/nonlocal_net/nonlocal_net.yml -- configs/ocrnet/ocrnet.yml -- configs/point_rend/point_rend.yml -- configs/poolformer/poolformer.yml -- configs/psanet/psanet.yml -- configs/pspnet/pspnet.yml -- configs/resnest/resnest.yml -- configs/segformer/segformer.yml -- configs/segmenter/segmenter.yml -- configs/sem_fpn/sem_fpn.yml -- configs/setr/setr.yml -- configs/stdc/stdc.yml -- configs/swin/swin.yml -- configs/twins/twins.yml -- configs/unet/unet.yml -- configs/upernet/upernet.yml -- configs/vit/vit.yml +- configs/ann/metafile.yaml +- configs/apcnet/metafile.yaml +- configs/beit/metafile.yaml +- configs/bisenetv1/metafile.yaml +- configs/bisenetv2/metafile.yaml +- configs/ccnet/metafile.yaml +- configs/cgnet/metafile.yaml +- configs/convnext/metafile.yaml +- configs/danet/metafile.yaml +- configs/deeplabv3/metafile.yaml +- configs/deeplabv3plus/metafile.yaml +- configs/dmnet/metafile.yaml +- configs/dnlnet/metafile.yaml +- configs/dpt/metafile.yaml +- configs/emanet/metafile.yaml +- configs/encnet/metafile.yaml +- configs/erfnet/metafile.yaml +- configs/fastfcn/metafile.yaml +- configs/fastscnn/metafile.yaml +- configs/fcn/metafile.yaml +- configs/gcnet/metafile.yaml +- configs/hrnet/metafile.yaml +- configs/icnet/metafile.yaml +- configs/isanet/metafile.yaml +- configs/knet/metafile.yaml +- configs/mae/metafile.yaml +- configs/mask2former/metafile.yaml +- configs/maskformer/metafile.yaml +- configs/mobilenet_v2/metafile.yaml +- configs/mobilenet_v3/metafile.yaml +- configs/nonlocal_net/metafile.yaml +- configs/ocrnet/metafile.yaml +- configs/pidnet/metafile.yaml +- configs/point_rend/metafile.yaml +- configs/poolformer/metafile.yaml +- configs/psanet/metafile.yaml +- configs/pspnet/metafile.yaml +- configs/resnest/metafile.yaml +- configs/segformer/metafile.yaml +- configs/segmenter/metafile.yaml +- configs/segnext/metafile.yaml +- configs/sem_fpn/metafile.yaml +- configs/setr/metafile.yaml +- configs/stdc/metafile.yaml +- configs/swin/metafile.yaml +- configs/twins/metafile.yaml +- configs/unet/metafile.yaml +- configs/upernet/metafile.yaml +- configs/vit/metafile.yaml diff --git a/projects/example_project/README.md b/projects/example_project/README.md index 4338b8acac..e4fd03cf4a 100644 --- a/projects/example_project/README.md +++ b/projects/example_project/README.md @@ -53,7 +53,7 @@ mim train mmsegmentation configs/fcn_dummy-r50-d8_4xb2-40k_cityscapes-512x1024.p mim test mmsegmentation configs/fcn_dummy-r50-d8_4xb2-40k_cityscapes-512x1024.py --work-dir work_dirs/dummy_resnet --checkpoint ${CHECKPOINT_PATH} ``` -> List the results as usually done in other model's README. \[Example\](https://github.com/open-mmlab/mmsegmentation/tree/dev-1.x/configs/fcn#results-and-models +> List the results as usually done in other model's README. \[Example\](https://github.com/open-mmlab/mmsegmentation/tree/main/configs/fcn#results-and-models > You should claim whether this is based on the pre-trained weights, which are converted from the official release; or it's a reproduced result obtained from retraining the model in this project | Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | @@ -113,11 +113,11 @@ Here is a checklist illustrating a usual development workflow of a successful pr - [ ] Type hints and docstrings -> Ideally *all* the methods should have [type hints](https://www.pythontutorial.net/python-basics/python-type-hints/) and [docstrings](https://google.github.io/styleguide/pyguide.html#381-docstrings). [Example](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/utils/io.py#L9) +> Ideally *all* the methods should have [type hints](https://www.pythontutorial.net/python-basics/python-type-hints/) and [docstrings](https://google.github.io/styleguide/pyguide.html#381-docstrings). [Example](https://github.com/open-mmlab/mmsegmentation/blob/main/mmseg/utils/io.py#L9) - [ ] Unit tests -> Unit tests for each module are required. [Example](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/tests/test_utils/test_io.py#L14) +> Unit tests for each module are required. [Example](https://github.com/open-mmlab/mmsegmentation/blob/main/tests/test_utils/test_io.py#L14) - [ ] Code polishing @@ -125,10 +125,10 @@ Here is a checklist illustrating a usual development workflow of a successful pr - [ ] Metafile.yml -> It will be parsed by MIM and Inferencer. [Example](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn.yml) +> It will be parsed by MIM and Inferencer. [Example](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/fcn.yml) - [ ] Move your modules into the core package following the codebase's file hierarchy structure. -> In particular, you may have to refactor this README into a standard one. [Example](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/README.md) +> In particular, you may have to refactor this README into a standard one. [Example](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/fcn/README.md) - [ ] Refactor your modules into the core package following the codebase's file hierarchy structure. diff --git a/projects/faq.md b/projects/faq.md index 724c1cf6a5..dfda48ed09 100644 --- a/projects/faq.md +++ b/projects/faq.md @@ -1,6 +1,6 @@ Q1: Why set up `projects/` folder? -Implementing new models and features into OpenMMLab's algorithm libraries could be troublesome due to the rigorous requirements on code quality, which could hinder the fast iteration of SOTA models and might discourage our members from sharing their latest outcomes here. And that's why we have this `projects/` folder now, where some experimental features, frameworks and models are placed, only needed to satisfy the minimum requirement on the code quality, and can be used as standalone libraries. Users are welcome to use them if they [use MMSegmentation from source](https://mmsegmentation.readthedocs.io/en/dev-1.x/get_started.html#best-practices). +Implementing new models and features into OpenMMLab's algorithm libraries could be troublesome due to the rigorous requirements on code quality, which could hinder the fast iteration of SOTA models and might discourage our members from sharing their latest outcomes here. And that's why we have this `projects/` folder now, where some experimental features, frameworks and models are placed, only needed to satisfy the minimum requirement on the code quality, and can be used as standalone libraries. Users are welcome to use them if they [use MMSegmentation from source](https://mmsegmentation.readthedocs.io/en/main/get_started.html#best-practices). Q2: Why should there be a checklist for a project? diff --git a/projects/hssn/README.md b/projects/hssn/README.md index c2a74c69f9..9dcbf37de0 100644 --- a/projects/hssn/README.md +++ b/projects/hssn/README.md @@ -41,9 +41,9 @@ bash tools/dist_test.sh projects/hssn/configs/hssn/hieraseg_deeplabv3plus_r101-d ### Cityscapes -| Method | Backbone | Crop Size | mIoU | mIoU (ms+flip) | config | model | -| :--------: | :------: | :-------: | :---: | :------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------: | -| DeeplabV3+ | R-101-D8 | 512x1024 | 81.61 | 82.71 | [config](https://github.com/open-mmlab/mmsegmentation/tree/dev-1.x/projects/HieraSeg/configs/hieraseg/hieraseg_deeplabv3plus_r101-d8_4xb2-80l_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hieraseg/hieraseg_deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024_20230112_125023-bc59a3d1.pth) | +| Method | Backbone | Crop Size | mIoU | mIoU (ms+flip) | config | model | +| :--------: | :------: | :-------: | :---: | :------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------: | +| DeeplabV3+ | R-101-D8 | 512x1024 | 81.61 | 82.71 | [config](https://github.com/open-mmlab/mmsegmentation/tree/main/projects/HieraSeg/configs/hieraseg/hieraseg_deeplabv3plus_r101-d8_4xb2-80l_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hieraseg/hieraseg_deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024_20230112_125023-bc59a3d1.pth) | diff --git a/projects/isnet/README.md b/projects/isnet/README.md index 3a3172a9d9..0a79ad6a4f 100644 --- a/projects/isnet/README.md +++ b/projects/isnet/README.md @@ -96,11 +96,11 @@ A project does not necessarily have to be finished in a single PR, but it's esse - [ ] Type hints and docstrings - + - [ ] Unit tests - + - [ ] Code polishing @@ -108,10 +108,10 @@ A project does not necessarily have to be finished in a single PR, but it's esse - [ ] Metafile.yml - + - [ ] Move your modules into the core package following the codebase's file hierarchy structure. - + - [ ] Refactor your modules into the core package following the codebase's file hierarchy structure. diff --git a/projects/mapillary_dataset/README.md b/projects/mapillary_dataset/README.md index 2b3099522e..44a1e33ef9 100644 --- a/projects/mapillary_dataset/README.md +++ b/projects/mapillary_dataset/README.md @@ -10,7 +10,7 @@ This project implements **`Mapillary Vistas Dataset`** ### Dataset preparing -Preparing `Mapillary Vistas Dataset` dataset following [Mapillary Vistas Dataset Preparing Guide](https://github.com/open-mmlab/mmsegmentation/tree/dev-1.x/projects/mapillary_dataset/docs/en/user_guides/2_dataset_prepare.md) +Preparing `Mapillary Vistas Dataset` dataset following [Mapillary Vistas Dataset Preparing Guide](https://github.com/open-mmlab/mmsegmentation/tree/main/projects/mapillary_dataset/docs/en/user_guides/2_dataset_prepare.md) ```none mmsegmentation @@ -34,6 +34,7 @@ Preparing `Mapillary Vistas Dataset` dataset following [Mapillary Vistas Dataset | │   │   │ └── polygons │ │ ├── validation │ │ │ ├── images + │ │ │ ├── v1.2 | │ │ │ ├── instances | │ │ │ ├── labels | │ │ │ ├── labels_mask @@ -46,12 +47,12 @@ Preparing `Mapillary Vistas Dataset` dataset following [Mapillary Vistas Dataset | │   │   │ └── polygons ``` -### Training commands with `deeplabv3plus_r101-d8_4xb2-240k_mapillay-512x1024.py` +### Training commands ```bash # Dataset train commands # at `mmsegmentation` folder -bash tools/dist_train.sh projects/mapillary_dataset/configs/deeplabv3plus_r101-d8_4xb2-240k_mapillay-512x1024.py 4 +bash tools/dist_train.sh projects/mapillary_dataset/configs/deeplabv3plus_r101-d8_4xb2-240k_mapillay_v1-512x1024.py 4 ``` ## Checklist @@ -66,20 +67,20 @@ bash tools/dist_train.sh projects/mapillary_dataset/configs/deeplabv3plus_r101-d - [x] A full README -- [ ] Milestone 2: Indicates a successful model implementation. +- [x] Milestone 2: Indicates a successful model implementation. - - [ ] Training-time correctness + - [x] Training-time correctness -- [ ] Milestone 3: Good to be a part of our core package! +- [x] Milestone 3: Good to be a part of our core package! - - [ ] Type hints and docstrings + - [x] Type hints and docstrings - - [ ] Unit tests + - [x] Unit tests - - [ ] Code polishing + - [x] Code polishing - - [ ] Metafile.yml + - [x] Metafile.yml -- [ ] Move your modules into the core package following the codebase's file hierarchy structure. +- [x] Move your modules into the core package following the codebase's file hierarchy structure. -- [ ] Refactor your modules into the core package following the codebase's file hierarchy structure. +- [x] Refactor your modules into the core package following the codebase's file hierarchy structure. diff --git a/projects/mapillary_dataset/configs/_base_/datasets/mapillary_v1.py b/projects/mapillary_dataset/configs/_base_/datasets/mapillary_v1.py new file mode 100644 index 0000000000..611aa4741b --- /dev/null +++ b/projects/mapillary_dataset/configs/_base_/datasets/mapillary_v1.py @@ -0,0 +1,68 @@ +# dataset settings +dataset_type = 'MapillaryDataset_v1' +data_root = 'data/mapillary/' +crop_size = (512, 1024) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + type='RandomResize', + scale=(2048, 1024), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2048, 1024), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] +img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] +tta_pipeline = [ + dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale_factor=r, keep_ratio=True) + for r in img_ratios + ], + [ + dict(type='RandomFlip', prob=0., direction='horizontal'), + dict(type='RandomFlip', prob=1., direction='horizontal') + ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] + ]) +] +train_dataloader = dict( + batch_size=2, + num_workers=4, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='training/images', seg_map_path='training/v1.2/labels'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='validation/images', + seg_map_path='validation/v1.2/labels'), + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/projects/mapillary_dataset/configs/_base_/datasets/mapillary_v1_65.py b/projects/mapillary_dataset/configs/_base_/datasets/mapillary_v1_65.py new file mode 100644 index 0000000000..f594f37333 --- /dev/null +++ b/projects/mapillary_dataset/configs/_base_/datasets/mapillary_v1_65.py @@ -0,0 +1,37 @@ +# dataset settings +_base_ = './mapillary_v1.py' +metainfo = dict( + classes=('Bird', 'Ground Animal', 'Curb', 'Fence', 'Guard Rail', 'Barrier', + 'Wall', 'Bike Lane', 'Crosswalk - Plain', 'Curb Cut', 'Parking', + 'Pedestrian Area', 'Rail Track', 'Road', 'Service Lane', + 'Sidewalk', 'Bridge', 'Building', 'Tunnel', 'Person', 'Bicyclist', + 'Motorcyclist', 'Other Rider', 'Lane Marking - Crosswalk', + 'Lane Marking - General', 'Mountain', 'Sand', 'Sky', 'Snow', + 'Terrain', 'Vegetation', 'Water', 'Banner', 'Bench', 'Bike Rack', + 'Billboard', 'Catch Basin', 'CCTV Camera', 'Fire Hydrant', + 'Junction Box', 'Mailbox', 'Manhole', 'Phone Booth', 'Pothole', + 'Street Light', 'Pole', 'Traffic Sign Frame', 'Utility Pole', + 'Traffic Light', 'Traffic Sign (Back)', 'Traffic Sign (Front)', + 'Trash Can', 'Bicycle', 'Boat', 'Bus', 'Car', 'Caravan', + 'Motorcycle', 'On Rails', 'Other Vehicle', 'Trailer', 'Truck', + 'Wheeled Slow', 'Car Mount', 'Ego Vehicle'), + palette=[[165, 42, 42], [0, 192, 0], [196, 196, 196], [190, 153, 153], + [180, 165, 180], [90, 120, 150], [102, 102, 156], [128, 64, 255], + [140, 140, 200], [170, 170, 170], [250, 170, 160], [96, 96, 96], + [230, 150, 140], [128, 64, 128], [110, 110, 110], [244, 35, 232], + [150, 100, 100], [70, 70, 70], [150, 120, 90], [220, 20, 60], + [255, 0, 0], [255, 0, 100], [255, 0, 200], [200, 128, 128], + [255, 255, 255], [64, 170, 64], [230, 160, 50], [70, 130, 180], + [190, 255, 255], [152, 251, 152], [107, 142, 35], [0, 170, 30], + [255, 255, 128], [250, 0, 30], [100, 140, 180], [220, 220, 220], + [220, 128, 128], [222, 40, 40], [100, 170, 30], [40, 40, 40], + [33, 33, 33], [100, 128, 160], [142, 0, 0], [70, 100, 150], + [210, 170, 100], [153, 153, 153], [128, 128, 128], [0, 0, 80], + [250, 170, 30], [192, 192, 192], [220, 220, 0], [140, 140, 20], + [119, 11, 32], [150, 0, 255], [0, 60, 100], [0, 0, 142], + [0, 0, 90], [0, 0, 230], [0, 80, 100], [128, 64, 64], [0, 0, 110], + [0, 0, 70], [0, 0, 192], [32, 32, 32], [120, 10, 10]]) + +train_dataloader = dict(dataset=dict(metainfo=metainfo)) +val_dataloader = dict(dataset=dict(metainfo=metainfo)) +test_dataloader = val_dataloader diff --git a/projects/mapillary_dataset/configs/_base_/datasets/mapillary_v2.py b/projects/mapillary_dataset/configs/_base_/datasets/mapillary_v2.py new file mode 100644 index 0000000000..7cb7a958e5 --- /dev/null +++ b/projects/mapillary_dataset/configs/_base_/datasets/mapillary_v2.py @@ -0,0 +1,68 @@ +# dataset settings +dataset_type = 'MapillaryDataset_v2' +data_root = 'data/mapillary/' +crop_size = (512, 1024) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + type='RandomResize', + scale=(2048, 1024), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(2048, 1024), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] +img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] +tta_pipeline = [ + dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale_factor=r, keep_ratio=True) + for r in img_ratios + ], + [ + dict(type='RandomFlip', prob=0., direction='horizontal'), + dict(type='RandomFlip', prob=1., direction='horizontal') + ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] + ]) +] +train_dataloader = dict( + batch_size=2, + num_workers=4, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='training/images', seg_map_path='training/v2.0/labels'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='validation/images', + seg_map_path='validation/v2.0/labels'), + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/projects/mapillary_dataset/configs/deeplabv3plus_r101-d8_4xb2-240k_mapillay-512x1024.py b/projects/mapillary_dataset/configs/deeplabv3plus_r101-d8_4xb2-240k_mapillay-512x1024.py deleted file mode 100644 index 6f7ad65ed8..0000000000 --- a/projects/mapillary_dataset/configs/deeplabv3plus_r101-d8_4xb2-240k_mapillay-512x1024.py +++ /dev/null @@ -1,103 +0,0 @@ -_base_ = ['./_base_/datasets/mapillary_v1_2.py'] # v 1.2 labels -# _base_ = ['./_base_/datasets/mapillary_v2_0.py'] # v2.0 labels -custom_imports = dict(imports=[ - 'projects.mapillary_dataset.mmseg.datasets.mapillary_v1_2', - 'projects.mapillary_dataset.mmseg.datasets.mapillary_v2_0', -]) - -norm_cfg = dict(type='SyncBN', requires_grad=True) -data_preprocessor = dict( - type='SegDataPreProcessor', - mean=[123.675, 116.28, 103.53], - std=[58.395, 57.12, 57.375], - bgr_to_rgb=True, - pad_val=0, - seg_pad_val=255, - size=(512, 1024)) - -model = dict( - type='EncoderDecoder', - data_preprocessor=data_preprocessor, - pretrained=None, - backbone=dict( - type='ResNet', - depth=101, - num_stages=4, - out_indices=(0, 1, 2, 3), - dilations=(1, 1, 2, 4), - strides=(1, 2, 1, 1), - norm_cfg=norm_cfg, - norm_eval=False, - style='pytorch', - contract_dilation=True), - decode_head=dict( - type='DepthwiseSeparableASPPHead', - in_channels=2048, - in_index=3, - channels=512, - dilations=(1, 12, 24, 36), - c1_in_channels=256, - c1_channels=48, - dropout_ratio=0.1, - num_classes=66, # v1.2 - # num_classes=124, # v2.0 - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), - auxiliary_head=dict( - type='FCNHead', - in_channels=1024, - in_index=2, - channels=256, - num_convs=1, - concat_input=False, - dropout_ratio=0.1, - num_classes=66, # v1.2 - # num_classes=124, # v2.0 - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), - train_cfg=dict(), - test_cfg=dict(mode='whole')) -default_scope = 'mmseg' -env_cfg = dict( - cudnn_benchmark=True, - mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), - dist_cfg=dict(backend='nccl')) -vis_backends = [dict(type='LocalVisBackend')] -visualizer = dict( - type='SegLocalVisualizer', - vis_backends=[dict(type='LocalVisBackend')], - name='visualizer') -log_processor = dict(by_epoch=False) -log_level = 'INFO' -load_from = None -resume = False -tta_model = dict(type='SegTTAModel') -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optim_wrapper = dict( - type='OptimWrapper', - optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001), - clip_grad=None) -param_scheduler = [ - dict( - type='PolyLR', - eta_min=0.0001, - power=0.9, - begin=0, - end=240000, - by_epoch=False) -] -train_cfg = dict( - type='IterBasedTrainLoop', max_iters=240000, val_interval=24000) -val_cfg = dict(type='ValLoop') -test_cfg = dict(type='TestLoop') -default_hooks = dict( - timer=dict(type='IterTimerHook'), - logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), - param_scheduler=dict(type='ParamSchedulerHook'), - checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=24000), - sampler_seed=dict(type='DistSamplerSeedHook'), - visualization=dict(type='SegVisualizationHook')) diff --git a/projects/mapillary_dataset/configs/deeplabv3plus_r101-d8_4xb2-240k_mapillay_v1-512x1024.py b/projects/mapillary_dataset/configs/deeplabv3plus_r101-d8_4xb2-240k_mapillay_v1-512x1024.py new file mode 100644 index 0000000000..b559e0d6aa --- /dev/null +++ b/projects/mapillary_dataset/configs/deeplabv3plus_r101-d8_4xb2-240k_mapillay_v1-512x1024.py @@ -0,0 +1,17 @@ +_base_ = [ + '../../../configs/_base_/models/deeplabv3plus_r50-d8.py', + './_base_/datasets/mapillary_v1.py', + '../../../configs/_base_/default_runtime.py', + '../../../configs/_base_/schedules/schedule_240k.py' +] +custom_imports = dict( + imports=['projects.mapillary_dataset.mmseg.datasets.mapillary']) + +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet101_v1c', + backbone=dict(depth=101), + decode_head=dict(num_classes=66), + auxiliary_head=dict(num_classes=66)) diff --git a/projects/mapillary_dataset/configs/deeplabv3plus_r101-d8_4xb2-240k_mapillay_v2-512x1024.py b/projects/mapillary_dataset/configs/deeplabv3plus_r101-d8_4xb2-240k_mapillay_v2-512x1024.py new file mode 100644 index 0000000000..cfe31a2c12 --- /dev/null +++ b/projects/mapillary_dataset/configs/deeplabv3plus_r101-d8_4xb2-240k_mapillay_v2-512x1024.py @@ -0,0 +1,16 @@ +_base_ = [ + '../../../configs/_base_/models/deeplabv3plus_r50-d8.py', + './_base_/datasets/mapillary_v2.py', + '../../../configs/_base_/default_runtime.py', + '../../../configs/_base_/schedules/schedule_240k.py' +] +custom_imports = dict( + imports=['projects.mapillary_dataset.mmseg.datasets.mapillary']) +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet101_v1c', + backbone=dict(depth=101), + decode_head=dict(num_classes=124), + auxiliary_head=dict(num_classes=124)) diff --git a/projects/mapillary_dataset/configs/pspnet_r101-d8_4xb2-240k_mapillay_v1-512x1024.py b/projects/mapillary_dataset/configs/pspnet_r101-d8_4xb2-240k_mapillay_v1-512x1024.py new file mode 100644 index 0000000000..1ca2b57f73 --- /dev/null +++ b/projects/mapillary_dataset/configs/pspnet_r101-d8_4xb2-240k_mapillay_v1-512x1024.py @@ -0,0 +1,16 @@ +_base_ = [ + '../../../configs/_base_/models/pspnet_r50-d8.py', + './_base_/datasets/mapillary_v1.py', + '../../../configs/_base_/default_runtime.py', + '../../../configs/_base_/schedules/schedule_240k.py' +] +custom_imports = dict( + imports=['projects.mapillary_dataset.mmseg.datasets.mapillary']) +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet101_v1c', + backbone=dict(depth=101), + decode_head=dict(num_classes=66), + auxiliary_head=dict(num_classes=66)) diff --git a/projects/mapillary_dataset/configs/pspnet_r101-d8_4xb2-240k_mapillay_v2-512x1024.py b/projects/mapillary_dataset/configs/pspnet_r101-d8_4xb2-240k_mapillay_v2-512x1024.py new file mode 100644 index 0000000000..c04746a3dc --- /dev/null +++ b/projects/mapillary_dataset/configs/pspnet_r101-d8_4xb2-240k_mapillay_v2-512x1024.py @@ -0,0 +1,16 @@ +_base_ = [ + '../../../configs/_base_/models/pspnet_r50-d8.py', + './_base_/datasets/mapillary_v2.py', + '../../../configs/_base_/default_runtime.py', + '../../../configs/_base_/schedules/schedule_240k.py' +] +custom_imports = dict( + imports=['projects.mapillary_dataset.mmseg.datasets.mapillary']) +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +model = dict( + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet101_v1c', + backbone=dict(depth=101), + decode_head=dict(num_classes=124), + auxiliary_head=dict(num_classes=124)) diff --git a/projects/mapillary_dataset/docs/en/user_guides/2_dataset_prepare.md b/projects/mapillary_dataset/docs/en/user_guides/2_dataset_prepare.md index 405e533156..c5cbc0f9b8 100644 --- a/projects/mapillary_dataset/docs/en/user_guides/2_dataset_prepare.md +++ b/projects/mapillary_dataset/docs/en/user_guides/2_dataset_prepare.md @@ -1,87 +1,20 @@ -## Prepare datasets +## Mapillary Vistas Datasets -It is recommended to symlink the dataset root to `$MMSEGMENTATION/data`. -If your folder structure is different, you may need to change the corresponding paths in config files. +- The dataset could be download [here](https://www.mapillary.com/dataset/vistas) after registration. -```none -mmsegmentation -├── mmseg -├── tools -├── configs -├── data -│ ├── mapillary -│ │ ├── training -│ │ │ ├── images -│ │ │ ├── v1.2 -| │ │ │ ├── instances -| │ │ │ ├── labels -| │ │ │ ├── labels_mask -| │   │   │ └── panoptic -│ │ │ ├── v2.0 -| │ │ │ ├── instances -| │ │ │ ├── labels -| │ │ │ ├── labels_mask -| │ │ │ ├── panoptic -| │   │   │ └── polygons -│ │ ├── validation -│ │ │ ├── images -| │ │ │ ├── instances -| │ │ │ ├── labels -| │ │ │ ├── labels_mask -| │   │   │ └── panoptic -│ │ │ ├── v2.0 -| │ │ │ ├── instances -| │ │ │ ├── labels -| │ │ │ ├── labels_mask -| │ │ │ ├── panoptic -| │   │   │ └── polygons -``` +- Mapillary Vistas Dataset use 8-bit with color-palette to store labels. No conversion operation is required. -## Mapillary Vistas Datasets +- Assumption you have put the dataset zip file in `mmsegmentation/data/mapillary` -- The dataset could be download [here](https://www.mapillary.com/dataset/vistas) after registration. -- Assumption you have put the dataset zip file in `mmsegmentation/data` - Please run the following commands to unzip dataset. + ```bash - cd data - mkdir mapillary - unzip -d mapillary An-ZjB1Zm61yAZG0ozTymz8I8NqI4x0MrYrh26dq7kPgfu8vf9ImrdaOAVOFYbJ2pNAgUnVGBmbue9lTgdBOb5BbKXIpFs0fpYWqACbrQDChAA2fdX0zS9PcHu7fY8c-FOvyBVxPNYNFQuM.zip - ``` -- After unzip, you will get Mapillary Vistas Dataset like this structure. - ```none - ├── data - │ ├── mapillary - │ │ ├── training - │ │ │ ├── images - │ │ │ ├── v1.2 - | │ │ │ ├── instances - | │ │ │ ├── labels - | │   │   │ └── panoptic - │ │ │ ├── v2.0 - | │ │ │ ├── instances - | │ │ │ ├── labels - | │ │ │ ├── panoptic - | │   │   │ └── polygons - │ │ ├── validation - │ │ │ ├── images - | │ │ │ ├── instances - | │ │ │ ├── labels - | │   │   │ └── panoptic - │ │ │ ├── v2.0 - | │ │ │ ├── instances - | │ │ │ ├── labels - | │ │ │ ├── panoptic - | │   │   │ └── polygons - ``` -- run following commands to convert RGB labels to mask labels - ```bash - # --nproc optional, default 1, whether use multi-progress - # --version optional, 'v1.2', 'v2.0','all', default 'all', choose convert which version labels - # run this command at 'mmsegmentation/projects/Mapillary_dataset' folder - cd mmsegmentation/projects/mapillary_dataset - python tools/dataset_converters/mapillary.py ../../data/mapillary --nproc 8 --version all + cd data/mapillary + unzip An-ZjB1Zm61yAZG0ozTymz8I8NqI4x0MrYrh26dq7kPgfu8vf9ImrdaOAVOFYbJ2pNAgUnVGBmbue9lTgdBOb5BbKXIpFs0fpYWqACbrQDChAA2fdX0zS9PcHu7fY8c-FOvyBVxPNYNFQuM.zip ``` - After then, you will get this structure + +- After unzip, you will get Mapillary Vistas Dataset like this structure. Semantic segmentation mask labels in `labels` folder. + ```none mmsegmentation ├── mmseg @@ -94,24 +27,229 @@ mmsegmentation │ │ │ ├── v1.2 | │ │ │ ├── instances | │ │ │ ├── labels - | │ │ │ ├── labels_mask | │   │   │ └── panoptic │ │ │ ├── v2.0 | │ │ │ ├── instances | │ │ │ ├── labels - | │ │ │ ├── labels_mask | │ │ │ ├── panoptic | │   │   │ └── polygons │ │ ├── validation │ │ │ ├── images + | │ │ ├── v1.2 | │ │ │ ├── instances | │ │ │ ├── labels - | │ │ │ ├── labels_mask | │   │   │ └── panoptic │ │ │ ├── v2.0 | │ │ │ ├── instances | │ │ │ ├── labels - | │ │ │ ├── labels_mask | │ │ │ ├── panoptic | │   │   │ └── polygons ``` + +- You could set Datasets version with `MapillaryDataset_v1` and `MapillaryDataset_v2` in your configs. + View the Mapillary Vistas Datasets config file here [V1.2](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/_base_/datasets/mapillary_v1.py) and [V2.0](https://github.com/open-mmlab/mmsegmentation/blob/main/configs/_base_/datasets/mapillary_v2.py) + +- **View datasets labels index and palette** + +- **Mapillary Vistas Datasets labels information** + **v1.2 information** + + ```none + There are 66 labels classes in v1.2 + 0--Bird--[165, 42, 42], + 1--Ground Animal--[0, 192, 0], + 2--Curb--[196, 196, 196], + 3--Fence--[190, 153, 153], + 4--Guard Rail--[180, 165, 180], + 5--Barrier--[90, 120, 150], + 6--Wall--[102, 102, 156], + 7--Bike Lane--[128, 64, 255], + 8--Crosswalk - Plain--[140, 140, 200], + 9--Curb Cut--[170, 170, 170], + 10--Parking--[250, 170, 160], + 11--Pedestrian Area--[96, 96, 96], + 12--Rail Track--[230, 150, 140], + 13--Road--[128, 64, 128], + 14--Service Lane--[110, 110, 110], + 15--Sidewalk--[244, 35, 232], + 16--Bridge--[150, 100, 100], + 17--Building--[70, 70, 70], + 18--Tunnel--[150, 120, 90], + 19--Person--[220, 20, 60], + 20--Bicyclist--[255, 0, 0], + 21--Motorcyclist--[255, 0, 100], + 22--Other Rider--[255, 0, 200], + 23--Lane Marking - Crosswalk--[200, 128, 128], + 24--Lane Marking - General--[255, 255, 255], + 25--Mountain--[64, 170, 64], + 26--Sand--[230, 160, 50], + 27--Sky--[70, 130, 180], + 28--Snow--[190, 255, 255], + 29--Terrain--[152, 251, 152], + 30--Vegetation--[107, 142, 35], + 31--Water--[0, 170, 30], + 32--Banner--[255, 255, 128], + 33--Bench--[250, 0, 30], + 34--Bike Rack--[100, 140, 180], + 35--Billboard--[220, 220, 220], + 36--Catch Basin--[220, 128, 128], + 37--CCTV Camera--[222, 40, 40], + 38--Fire Hydrant--[100, 170, 30], + 39--Junction Box--[40, 40, 40], + 40--Mailbox--[33, 33, 33], + 41--Manhole--[100, 128, 160], + 42--Phone Booth--[142, 0, 0], + 43--Pothole--[70, 100, 150], + 44--Street Light--[210, 170, 100], + 45--Pole--[153, 153, 153], + 46--Traffic Sign Frame--[128, 128, 128], + 47--Utility Pole--[0, 0, 80], + 48--Traffic Light--[250, 170, 30], + 49--Traffic Sign (Back)--[192, 192, 192], + 50--Traffic Sign (Front)--[220, 220, 0], + 51--Trash Can--[140, 140, 20], + 52--Bicycle--[119, 11, 32], + 53--Boat--[150, 0, 255], + 54--Bus--[0, 60, 100], + 55--Car--[0, 0, 142], + 56--Caravan--[0, 0, 90], + 57--Motorcycle--[0, 0, 230], + 58--On Rails--[0, 80, 100], + 59--Other Vehicle--[128, 64, 64], + 60--Trailer--[0, 0, 110], + 61--Truck--[0, 0, 70], + 62--Wheeled Slow--[0, 0, 192], + 63--Car Mount--[32, 32, 32], + 64--Ego Vehicle--[120, 10, 10], + 65--Unlabeled--[0, 0, 0] + ``` + + **v2.0 information** + + ```none + There are 124 labels classes in v2.0 + 0--Bird--[165, 42, 42], + 1--Ground Animal--[0, 192, 0], + 2--Ambiguous Barrier--[250, 170, 31], + 3--Concrete Block--[250, 170, 32], + 4--Curb--[196, 196, 196], + 5--Fence--[190, 153, 153], + 6--Guard Rail--[180, 165, 180], + 7--Barrier--[90, 120, 150], + 8--Road Median--[250, 170, 33], + 9--Road Side--[250, 170, 34], + 10--Lane Separator--[128, 128, 128], + 11--Temporary Barrier--[250, 170, 35], + 12--Wall--[102, 102, 156], + 13--Bike Lane--[128, 64, 255], + 14--Crosswalk - Plain--[140, 140, 200], + 15--Curb Cut--[170, 170, 170], + 16--Driveway--[250, 170, 36], + 17--Parking--[250, 170, 160], + 18--Parking Aisle--[250, 170, 37], + 19--Pedestrian Area--[96, 96, 96], + 20--Rail Track--[230, 150, 140], + 21--Road--[128, 64, 128], + 22--Road Shoulder--[110, 110, 110], + 23--Service Lane--[110, 110, 110], + 24--Sidewalk--[244, 35, 232], + 25--Traffic Island--[128, 196, 128], + 26--Bridge--[150, 100, 100], + 27--Building--[70, 70, 70], + 28--Garage--[150, 150, 150], + 29--Tunnel--[150, 120, 90], + 30--Person--[220, 20, 60], + 31--Person Group--[220, 20, 60], + 32--Bicyclist--[255, 0, 0], + 33--Motorcyclist--[255, 0, 100], + 34--Other Rider--[255, 0, 200], + 35--Lane Marking - Dashed Line--[255, 255, 255], + 36--Lane Marking - Straight Line--[255, 255, 255], + 37--Lane Marking - Zigzag Line--[250, 170, 29], + 38--Lane Marking - Ambiguous--[250, 170, 28], + 39--Lane Marking - Arrow (Left)--[250, 170, 26], + 40--Lane Marking - Arrow (Other)--[250, 170, 25], + 41--Lane Marking - Arrow (Right)--[250, 170, 24], + 42--Lane Marking - Arrow (Split Left or Straight)--[250, 170, 22], + 43--Lane Marking - Arrow (Split Right or Straight)--[250, 170, 21], + 44--Lane Marking - Arrow (Straight)--[250, 170, 20], + 45--Lane Marking - Crosswalk--[255, 255, 255], + 46--Lane Marking - Give Way (Row)--[250, 170, 19], + 47--Lane Marking - Give Way (Single)--[250, 170, 18], + 48--Lane Marking - Hatched (Chevron)--[250, 170, 12], + 49--Lane Marking - Hatched (Diagonal)--[250, 170, 11], + 50--Lane Marking - Other--[255, 255, 255], + 51--Lane Marking - Stop Line--[255, 255, 255], + 52--Lane Marking - Symbol (Bicycle)--[250, 170, 16], + 53--Lane Marking - Symbol (Other)--[250, 170, 15], + 54--Lane Marking - Text--[250, 170, 15], + 55--Lane Marking (only) - Dashed Line--[255, 255, 255], + 56--Lane Marking (only) - Crosswalk--[255, 255, 255], + 57--Lane Marking (only) - Other--[255, 255, 255], + 58--Lane Marking (only) - Test--[255, 255, 255], + 59--Mountain--[64, 170, 64], + 60--Sand--[230, 160, 50], + 61--Sky--[70, 130, 180], + 62--Snow--[190, 255, 255], + 63--Terrain--[152, 251, 152], + 64--Vegetation--[107, 142, 35], + 65--Water--[0, 170, 30], + 66--Banner--[255, 255, 128], + 67--Bench--[250, 0, 30], + 68--Bike Rack--[100, 140, 180], + 69--Catch Basin--[220, 128, 128], + 70--CCTV Camera--[222, 40, 40], + 71--Fire Hydrant--[100, 170, 30], + 72--Junction Box--[40, 40, 40], + 73--Mailbox--[33, 33, 33], + 74--Manhole--[100, 128, 160], + 75--Parking Meter--[20, 20, 255], + 76--Phone Booth--[142, 0, 0], + 77--Pothole--[70, 100, 150], + 78--Signage - Advertisement--[250, 171, 30], + 79--Signage - Ambiguous--[250, 172, 30], + 80--Signage - Back--[250, 173, 30], + 81--Signage - Information--[250, 174, 30], + 82--Signage - Other--[250, 175, 30], + 83--Signage - Store--[250, 176, 30], + 84--Street Light--[210, 170, 100], + 85--Pole--[153, 153, 153], + 86--Pole Group--[153, 153, 153], + 87--Traffic Sign Frame--[128, 128, 128], + 88--Utility Pole--[0, 0, 80], + 89--Traffic Cone--[210, 60, 60], + 90--Traffic Light - General (Single)--[250, 170, 30], + 91--Traffic Light - Pedestrians--[250, 170, 30], + 92--Traffic Light - General (Upright)--[250, 170, 30], + 93--Traffic Light - General (Horizontal)--[250, 170, 30], + 94--Traffic Light - Cyclists--[250, 170, 30], + 95--Traffic Light - Other--[250, 170, 30], + 96--Traffic Sign - Ambiguous--[192, 192, 192], + 97--Traffic Sign (Back)--[192, 192, 192], + 98--Traffic Sign - Direction (Back)--[192, 192, 192], + 99--Traffic Sign - Direction (Front)--[220, 220, 0], + 100--Traffic Sign (Front)--[220, 220, 0], + 101--Traffic Sign - Parking--[0, 0, 196], + 102--Traffic Sign - Temporary (Back)--[192, 192, 192], + 103--Traffic Sign - Temporary (Front)--[220, 220, 0], + 104--Trash Can--[140, 140, 20], + 105--Bicycle--[119, 11, 32], + 106--Boat--[150, 0, 255], + 107--Bus--[0, 60, 100], + 108--Car--[0, 0, 142], + 109--Caravan--[0, 0, 90], + 110--Motorcycle--[0, 0, 230], + 111--On Rails--[0, 80, 100], + 112--Other Vehicle--[128, 64, 64], + 113--Trailer--[0, 0, 110], + 114--Truck--[0, 0, 70], + 115--Vehicle Group--[0, 0, 142], + 116--Wheeled Slow--[0, 0, 192], + 117--Water Valve--[170, 170, 170], + 118--Car Mount--[32, 32, 32], + 119--Dynamic--[111, 74, 0], + 120--Ego Vehicle--[120, 10, 10], + 121--Ground--[81, 0, 81], + 122--Static--[111, 111, 0], + 123--Unlabeled--[0, 0, 0] + ``` diff --git a/projects/mapillary_dataset/mmseg/datasets/mapillary.py b/projects/mapillary_dataset/mmseg/datasets/mapillary.py new file mode 100644 index 0000000000..f49bd54451 --- /dev/null +++ b/projects/mapillary_dataset/mmseg/datasets/mapillary.py @@ -0,0 +1,177 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmseg.datasets.basesegdataset import BaseSegDataset + +# from mmseg.registry import DATASETS + + +# @DATASETS.register_module() +class MapillaryDataset_v1(BaseSegDataset): + """Mapillary Vistas Dataset. + + Dataset paper link: + http://ieeexplore.ieee.org/document/8237796/ + + v1.2 contain 66 object classes. + (37 instance-specific) + + v2.0 contain 124 object classes. + (70 instance-specific, 46 stuff, 8 void or crowd). + + The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is + fixed to '.png' for Mapillary Vistas Dataset. + """ + METAINFO = dict( + classes=('Bird', 'Ground Animal', 'Curb', 'Fence', 'Guard Rail', + 'Barrier', 'Wall', 'Bike Lane', 'Crosswalk - Plain', + 'Curb Cut', 'Parking', 'Pedestrian Area', 'Rail Track', + 'Road', 'Service Lane', 'Sidewalk', 'Bridge', 'Building', + 'Tunnel', 'Person', 'Bicyclist', 'Motorcyclist', + 'Other Rider', 'Lane Marking - Crosswalk', + 'Lane Marking - General', 'Mountain', 'Sand', 'Sky', 'Snow', + 'Terrain', 'Vegetation', 'Water', 'Banner', 'Bench', + 'Bike Rack', 'Billboard', 'Catch Basin', 'CCTV Camera', + 'Fire Hydrant', 'Junction Box', 'Mailbox', 'Manhole', + 'Phone Booth', 'Pothole', 'Street Light', 'Pole', + 'Traffic Sign Frame', 'Utility Pole', 'Traffic Light', + 'Traffic Sign (Back)', 'Traffic Sign (Front)', 'Trash Can', + 'Bicycle', 'Boat', 'Bus', 'Car', 'Caravan', 'Motorcycle', + 'On Rails', 'Other Vehicle', 'Trailer', 'Truck', + 'Wheeled Slow', 'Car Mount', 'Ego Vehicle', 'Unlabeled'), + palette=[[165, 42, 42], [0, 192, 0], [196, 196, 196], [190, 153, 153], + [180, 165, 180], [90, 120, 150], [102, 102, 156], + [128, 64, 255], [140, 140, 200], [170, 170, 170], + [250, 170, 160], [96, 96, 96], + [230, 150, 140], [128, 64, 128], [110, 110, 110], + [244, 35, 232], [150, 100, 100], [70, 70, 70], [150, 120, 90], + [220, 20, 60], [255, 0, 0], [255, 0, 100], [255, 0, 200], + [200, 128, 128], [255, 255, 255], [64, 170, + 64], [230, 160, 50], + [70, 130, 180], [190, 255, 255], [152, 251, 152], + [107, 142, 35], [0, 170, 30], [255, 255, 128], [250, 0, 30], + [100, 140, 180], [220, 220, 220], [220, 128, 128], + [222, 40, 40], [100, 170, 30], [40, 40, 40], [33, 33, 33], + [100, 128, 160], [142, 0, 0], [70, 100, 150], [210, 170, 100], + [153, 153, 153], [128, 128, 128], [0, 0, 80], [250, 170, 30], + [192, 192, 192], [220, 220, 0], [140, 140, 20], [119, 11, 32], + [150, 0, 255], [0, 60, 100], [0, 0, 142], [0, 0, 90], + [0, 0, 230], [0, 80, 100], [128, 64, 64], [0, 0, 110], + [0, 0, 70], [0, 0, 192], [32, 32, 32], [120, 10, + 10], [0, 0, 0]]) + + def __init__(self, + img_suffix='.jpg', + seg_map_suffix='.png', + **kwargs) -> None: + super().__init__( + img_suffix=img_suffix, seg_map_suffix=seg_map_suffix, **kwargs) + + +# @DATASETS.register_module() +class MapillaryDataset_v2(BaseSegDataset): + """Mapillary Vistas Dataset. + + Dataset paper link: + http://ieeexplore.ieee.org/document/8237796/ + + v1.2 contain 66 object classes. + (37 instance-specific) + + v2.0 contain 124 object classes. + (70 instance-specific, 46 stuff, 8 void or crowd). + + The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is + fixed to '.png' for Mapillary Vistas Dataset. + """ + METAINFO = dict( + classes=( + 'Bird', 'Ground Animal', 'Ambiguous Barrier', 'Concrete Block', + 'Curb', 'Fence', 'Guard Rail', 'Barrier', 'Road Median', + 'Road Side', 'Lane Separator', 'Temporary Barrier', 'Wall', + 'Bike Lane', 'Crosswalk - Plain', 'Curb Cut', 'Driveway', + 'Parking', 'Parking Aisle', 'Pedestrian Area', 'Rail Track', + 'Road', 'Road Shoulder', 'Service Lane', 'Sidewalk', + 'Traffic Island', 'Bridge', 'Building', 'Garage', 'Tunnel', + 'Person', 'Person Group', 'Bicyclist', 'Motorcyclist', + 'Other Rider', 'Lane Marking - Dashed Line', + 'Lane Marking - Straight Line', 'Lane Marking - Zigzag Line', + 'Lane Marking - Ambiguous', 'Lane Marking - Arrow (Left)', + 'Lane Marking - Arrow (Other)', 'Lane Marking - Arrow (Right)', + 'Lane Marking - Arrow (Split Left or Straight)', + 'Lane Marking - Arrow (Split Right or Straight)', + 'Lane Marking - Arrow (Straight)', 'Lane Marking - Crosswalk', + 'Lane Marking - Give Way (Row)', + 'Lane Marking - Give Way (Single)', + 'Lane Marking - Hatched (Chevron)', + 'Lane Marking - Hatched (Diagonal)', 'Lane Marking - Other', + 'Lane Marking - Stop Line', 'Lane Marking - Symbol (Bicycle)', + 'Lane Marking - Symbol (Other)', 'Lane Marking - Text', + 'Lane Marking (only) - Dashed Line', + 'Lane Marking (only) - Crosswalk', 'Lane Marking (only) - Other', + 'Lane Marking (only) - Test', 'Mountain', 'Sand', 'Sky', 'Snow', + 'Terrain', 'Vegetation', 'Water', 'Banner', 'Bench', 'Bike Rack', + 'Catch Basin', 'CCTV Camera', 'Fire Hydrant', 'Junction Box', + 'Mailbox', 'Manhole', 'Parking Meter', 'Phone Booth', 'Pothole', + 'Signage - Advertisement', 'Signage - Ambiguous', 'Signage - Back', + 'Signage - Information', 'Signage - Other', 'Signage - Store', + 'Street Light', 'Pole', 'Pole Group', 'Traffic Sign Frame', + 'Utility Pole', 'Traffic Cone', 'Traffic Light - General (Single)', + 'Traffic Light - Pedestrians', 'Traffic Light - General (Upright)', + 'Traffic Light - General (Horizontal)', 'Traffic Light - Cyclists', + 'Traffic Light - Other', 'Traffic Sign - Ambiguous', + 'Traffic Sign (Back)', 'Traffic Sign - Direction (Back)', + 'Traffic Sign - Direction (Front)', 'Traffic Sign (Front)', + 'Traffic Sign - Parking', 'Traffic Sign - Temporary (Back)', + 'Traffic Sign - Temporary (Front)', 'Trash Can', 'Bicycle', 'Boat', + 'Bus', 'Car', 'Caravan', 'Motorcycle', 'On Rails', 'Other Vehicle', + 'Trailer', 'Truck', 'Vehicle Group', 'Wheeled Slow', 'Water Valve', + 'Car Mount', 'Dynamic', 'Ego Vehicle', 'Ground', 'Static', + 'Unlabeled'), + palette=[[165, 42, 42], [0, 192, 0], [250, 170, 31], [250, 170, 32], + [196, 196, 196], [190, 153, 153], [180, 165, 180], + [90, 120, 150], [250, 170, 33], [250, 170, 34], + [128, 128, 128], [250, 170, 35], [102, 102, 156], + [128, 64, 255], [140, 140, 200], [170, 170, 170], + [250, 170, 36], [250, 170, 160], [250, 170, 37], [96, 96, 96], + [230, 150, 140], [128, 64, 128], [110, 110, 110], + [110, 110, 110], [244, 35, 232], [128, 196, + 128], [150, 100, 100], + [70, 70, 70], [150, 150, 150], [150, 120, 90], [220, 20, 60], + [220, 20, 60], [255, 0, 0], [255, 0, 100], [255, 0, 200], + [255, 255, 255], [255, 255, 255], [250, 170, 29], + [250, 170, 28], [250, 170, 26], [250, 170, + 25], [250, 170, 24], + [250, 170, 22], [250, 170, 21], [250, 170, + 20], [255, 255, 255], + [250, 170, 19], [250, 170, 18], [250, 170, + 12], [250, 170, 11], + [255, 255, 255], [255, 255, 255], [250, 170, 16], + [250, 170, 15], [250, 170, 15], [255, 255, 255], + [255, 255, 255], [255, 255, 255], [255, 255, 255], + [64, 170, 64], [230, 160, 50], + [70, 130, 180], [190, 255, 255], [152, 251, 152], + [107, 142, 35], [0, 170, 30], [255, 255, 128], [250, 0, 30], + [100, 140, 180], [220, 128, 128], [222, 40, + 40], [100, 170, 30], + [40, 40, 40], [33, 33, 33], [100, 128, 160], [20, 20, 255], + [142, 0, 0], [70, 100, 150], [250, 171, 30], [250, 172, 30], + [250, 173, 30], [250, 174, 30], [250, 175, + 30], [250, 176, 30], + [210, 170, 100], [153, 153, 153], [153, 153, 153], + [128, 128, 128], [0, 0, 80], [210, 60, 60], [250, 170, 30], + [250, 170, 30], [250, 170, 30], [250, 170, + 30], [250, 170, 30], + [250, 170, 30], [192, 192, 192], [192, 192, 192], + [192, 192, 192], [220, 220, 0], [220, 220, 0], [0, 0, 196], + [192, 192, 192], [220, 220, 0], [140, 140, 20], [119, 11, 32], + [150, 0, 255], [0, 60, 100], [0, 0, 142], [0, 0, 90], + [0, 0, 230], [0, 80, 100], [128, 64, 64], [0, 0, 110], + [0, 0, 70], [0, 0, 142], [0, 0, 192], [170, 170, 170], + [32, 32, 32], [111, 74, 0], [120, 10, 10], [81, 0, 81], + [111, 111, 0], [0, 0, 0]]) + + def __init__(self, + img_suffix='.jpg', + seg_map_suffix='.png', + **kwargs) -> None: + super().__init__( + img_suffix=img_suffix, seg_map_suffix=seg_map_suffix, **kwargs) diff --git a/projects/mapillary_dataset/mmseg/datasets/mapillary_v1_2.py b/projects/mapillary_dataset/mmseg/datasets/mapillary_v1_2.py deleted file mode 100644 index 975d07b24e..0000000000 --- a/projects/mapillary_dataset/mmseg/datasets/mapillary_v1_2.py +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from mmseg.datasets.basesegdataset import BaseSegDataset -from mmseg.registry import DATASETS - - -@DATASETS.register_module() -class MapillaryDataset_v1_2(BaseSegDataset): - """Mapillary Vistas Dataset. - - Dataset paper link: - http://ieeexplore.ieee.org/document/8237796/ - - v1.2 contain 66 object classes. - (37 instance-specific) - - v2.0 contain 124 object classes. - (70 instance-specific, 46 stuff, 8 void or crowd). - - The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is - fixed to '.png' for Mapillary Vistas Dataset. - """ - METAINFO = dict( - classes=('Bird', 'Ground Animal', 'Curb', 'Fence', 'Guard Rail', - 'Barrier', 'Wall', 'Bike Lane', 'Crosswalk - Plain', - 'Curb Cut', 'Parking', 'Pedestrian Area', 'Rail Track', - 'Road', 'Service Lane', 'Sidewalk', 'Bridge', 'Building', - 'Tunnel', 'Person', 'Bicyclist', 'Motorcyclist', - 'Other Rider', 'Lane Marking - Crosswalk', - 'Lane Marking - General', 'Mountain', 'Sand', 'Sky', 'Snow', - 'Terrain', 'Vegetation', 'Water', 'Banner', 'Bench', - 'Bike Rack', 'Billboard', 'Catch Basin', 'CCTV Camera', - 'Fire Hydrant', 'Junction Box', 'Mailbox', 'Manhole', - 'Phone Booth', 'Pothole', 'Street Light', 'Pole', - 'Traffic Sign Frame', 'Utility Pole', 'Traffic Light', - 'Traffic Sign (Back)', 'Traffic Sign (Front)', 'Trash Can', - 'Bicycle', 'Boat', 'Bus', 'Car', 'Caravan', 'Motorcycle', - 'On Rails', 'Other Vehicle', 'Trailer', 'Truck', - 'Wheeled Slow', 'Car Mount', 'Ego Vehicle', 'Unlabeled'), - palette=[[165, 42, 42], [0, 192, 0], [196, 196, 196], [190, 153, 153], - [180, 165, 180], [90, 120, 150], [102, 102, 156], - [128, 64, 255], [140, 140, 200], [170, 170, 170], - [250, 170, 160], [96, 96, 96], - [230, 150, 140], [128, 64, 128], [110, 110, 110], - [244, 35, 232], [150, 100, 100], [70, 70, 70], [150, 120, 90], - [220, 20, 60], [255, 0, 0], [255, 0, 100], [255, 0, 200], - [200, 128, 128], [255, 255, 255], [64, 170, - 64], [230, 160, 50], - [70, 130, 180], [190, 255, 255], [152, 251, 152], - [107, 142, 35], [0, 170, 30], [255, 255, 128], [250, 0, 30], - [100, 140, 180], [220, 220, 220], [220, 128, 128], - [222, 40, 40], [100, 170, 30], [40, 40, 40], [33, 33, 33], - [100, 128, 160], [142, 0, 0], [70, 100, 150], [210, 170, 100], - [153, 153, 153], [128, 128, 128], [0, 0, 80], [250, 170, 30], - [192, 192, 192], [220, 220, 0], [140, 140, 20], [119, 11, 32], - [150, 0, 255], [0, 60, 100], [0, 0, 142], [0, 0, 90], - [0, 0, 230], [0, 80, 100], [128, 64, 64], [0, 0, 110], - [0, 0, 70], [0, 0, 192], [32, 32, 32], [120, 10, - 10], [0, 0, 0]]) - - def __init__(self, - img_suffix='.jpg', - seg_map_suffix='.png', - **kwargs) -> None: - super().__init__( - img_suffix=img_suffix, seg_map_suffix=seg_map_suffix, **kwargs) diff --git a/projects/mapillary_dataset/tools/dataset_converters/mapillary.py b/projects/mapillary_dataset/tools/dataset_converters/mapillary.py deleted file mode 100644 index 3ccb2d67b3..0000000000 --- a/projects/mapillary_dataset/tools/dataset_converters/mapillary.py +++ /dev/null @@ -1,245 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import argparse -import os.path as osp -from functools import partial - -import mmcv -import numpy as np -from mmengine.utils import (mkdir_or_exist, scandir, track_parallel_progress, - track_progress) - -colormap_v1_2 = np.array([[165, 42, 42], [0, 192, 0], [196, 196, 196], - [190, 153, 153], [180, 165, 180], [90, 120, 150], - [102, 102, 156], [128, 64, 255], [140, 140, 200], - [170, 170, 170], [250, 170, 160], [96, 96, 96], - [230, 150, 140], [128, 64, 128], [110, 110, 110], - [244, 35, 232], [150, 100, 100], [70, 70, 70], - [150, 120, 90], [220, 20, 60], [255, 0, 0], - [255, 0, 100], [255, 0, 200], [200, 128, 128], - [255, 255, 255], [64, 170, 64], [230, 160, 50], - [70, 130, 180], [190, 255, 255], [152, 251, 152], - [107, 142, 35], [0, 170, 30], [255, 255, 128], - [250, 0, 30], [100, 140, 180], [220, 220, 220], - [220, 128, 128], [222, 40, 40], [100, 170, 30], - [40, 40, 40], [33, 33, 33], [100, 128, 160], - [142, 0, 0], [70, 100, 150], [210, 170, 100], - [153, 153, 153], [128, 128, 128], [0, 0, 80], - [250, 170, 30], [192, 192, 192], [220, 220, 0], - [140, 140, 20], [119, 11, 32], [150, 0, 255], - [0, 60, 100], [0, 0, 142], [0, 0, 90], [0, 0, 230], - [0, 80, 100], [128, 64, 64], [0, 0, 110], [0, 0, 70], - [0, 0, 192], [32, 32, 32], [120, 10, 10], [0, 0, 0]]) - -colormap_v2_0 = np.array([[165, 42, 42], [0, 192, 0], [250, 170, 31], - [250, 170, 32], [196, 196, 196], [190, 153, 153], - [180, 165, 180], [90, 120, 150], [250, 170, 33], - [250, 170, 34], [128, 128, 128], [250, 170, 35], - [102, 102, 156], [128, 64, 255], [140, 140, 200], - [170, 170, 170], [250, 170, 36], [250, 170, 160], - [250, 170, 37], [96, 96, 96], [230, 150, 140], - [128, 64, 128], [110, 110, 110], [110, 110, 110], - [244, 35, 232], [128, 196, 128], [150, 100, 100], - [70, 70, 70], [150, 150, 150], [150, 120, 90], - [220, 20, 60], [220, 20, 60], [255, 0, 0], - [255, 0, 100], [255, 0, 200], [255, 255, 255], - [255, 255, 255], [250, 170, 29], [250, 170, 28], - [250, 170, 26], [250, 170, 25], [250, 170, 24], - [250, 170, 22], [250, 170, 21], [250, 170, 20], - [255, 255, 255], [250, 170, 19], [250, 170, 18], - [250, 170, 12], [250, 170, 11], [255, 255, 255], - [255, 255, 255], [250, 170, 16], [250, 170, 15], - [250, 170, 15], [255, 255, 255], [255, 255, 255], - [255, 255, 255], [255, 255, 255], [64, 170, 64], - [230, 160, 50], [70, 130, 180], [190, 255, 255], - [152, 251, 152], [107, 142, 35], [0, 170, 30], - [255, 255, 128], [250, 0, 30], [100, 140, 180], - [220, 128, 128], [222, 40, 40], [100, 170, 30], - [40, 40, 40], [33, 33, 33], [100, 128, 160], - [20, 20, 255], [142, 0, 0], [70, 100, 150], - [250, 171, 30], [250, 172, 30], [250, 173, 30], - [250, 174, 30], [250, 175, 30], [250, 176, 30], - [210, 170, 100], [153, 153, 153], [153, 153, 153], - [128, 128, 128], [0, 0, 80], [210, 60, 60], - [250, 170, 30], [250, 170, 30], [250, 170, 30], - [250, 170, 30], [250, 170, 30], [250, 170, 30], - [192, 192, 192], [192, 192, 192], [192, 192, 192], - [220, 220, 0], [220, 220, 0], [0, 0, 196], - [192, 192, 192], [220, 220, 0], [140, 140, 20], - [119, 11, 32], [150, 0, 255], [0, 60, 100], - [0, 0, 142], [0, 0, 90], [0, 0, 230], [0, 80, 100], - [128, 64, 64], [0, 0, 110], [0, 0, 70], [0, 0, 142], - [0, 0, 192], [170, 170, 170], [32, 32, 32], - [111, 74, 0], [120, 10, 10], [81, 0, 81], - [111, 111, 0], [0, 0, 0]]) - - -def parse_args(): - parser = argparse.ArgumentParser( - description='Convert Mapillary dataset to mmsegmentation format') - parser.add_argument('dataset_path', help='Mapillary folder path') - parser.add_argument( - '--version', - default='all', - help="Mapillary labels version, 'v1.2','v2.0','all'") - parser.add_argument('-o', '--out_dir', help='output path') - parser.add_argument( - '--nproc', default=1, type=int, help='number of process') - args = parser.parse_args() - return args - - -def mapillary_colormap2label(colormap: np.ndarray) -> list: - """Create a `list` shaped (256^3, 1), convert each color palette to a - number, which can use to find the correct label value. - - For example labels 0--Bird--[165, 42, 42] - (165*256 + 42) * 256 + 42 = 10824234 (This is list's index]) - `colormap2label[10824234] = 0` - - In converting, if a RGB pixel value is [165, 42, 42], - through colormap2label[10824234]-->can quickly find - this labels value is 0. - Through matrix multiply to compute a img is very fast. - - Args: - colormap (np.ndarray): Mapillary Vistas Dataset palette - - Returns: - list: values are mask labels, - indexes are palette's convert results.、 - """ - colormap2label = np.zeros(256**3, dtype=np.longlong) - for i, colormap_ in enumerate(colormap): - colormap2label[(colormap_[0] * 256 + colormap_[1]) * 256 + - colormap_[2]] = i - return colormap2label - - -def mapillary_masklabel(rgb_label: np.ndarray, - colormap2label: list) -> np.ndarray: - """Computing a img mask label through `colormap2label` get in - `mapillary_colormap2label(COLORMAP: np.ndarray)` - - Args: - rgb_label (np.array): a RGB labels img. - colormap2label (list): get in mapillary_colormap2label(colormap) - - Returns: - np.ndarray: mask labels array. - """ - colormap_ = rgb_label.astype('uint32') - idx = np.array((colormap_[:, :, 0] * 256 + colormap_[:, :, 1]) * 256 + - colormap_[:, :, 2]).astype('uint32') - return colormap2label[idx] - - -def RGB2Mask(rgb_label_path: str, colormap2label: list) -> None: - """Mapillary Vistas Dataset provide 8-bit with color-palette class-specific - labels for semantic segmentation. However, semantic segmentation needs - single channel mask labels. - - This code is about converting mapillary RGB labels - {traing,validation/v1.2,v2.0/labels} to mask labels - {{traing,validation/v1.2,v2.0/labels_mask} - - Args: - rgb_label_path (str): image absolute path. - dataset_version (str): v1.2 or v2.0 to choose color_map . - """ - rgb_label = mmcv.imread(rgb_label_path, channel_order='rgb') - - masks_label = mapillary_masklabel(rgb_label, colormap2label) - - mmcv.imwrite( - masks_label.astype(np.uint8), - rgb_label_path.replace('labels', 'labels_mask')) - - -def main(): - colormap2label_v1_2 = mapillary_colormap2label(colormap_v1_2) - colormap2label_v2_0 = mapillary_colormap2label(colormap_v2_0) - - dataset_path = args.dataset_path - if args.out_dir is None: - out_dir = dataset_path - else: - out_dir = args.out_dir - - RGB_labels_path = [] - RGB_labels_v1_2_path = [] - RGB_labels_v2_0_path = [] - print('Scanning labels path....') - for label_path in scandir(dataset_path, suffix='.png', recursive=True): - if 'labels' in label_path: - rgb_label_path = osp.join(dataset_path, label_path) - RGB_labels_path.append(rgb_label_path) - if 'v1.2' in label_path: - RGB_labels_v1_2_path.append(rgb_label_path) - elif 'v2.0' in label_path: - RGB_labels_v2_0_path.append(rgb_label_path) - - if args.version == 'all': - print(f'Totaly found {len(RGB_labels_path)} {args.version} RGB labels') - elif args.version == 'v1.2': - print(f'Found {len(RGB_labels_v1_2_path)} {args.version} RGB labels') - elif args.version == 'v2.0': - print(f'Found {len(RGB_labels_v2_0_path)} {args.version} RGB labels') - print('Making directories...') - mkdir_or_exist(osp.join(out_dir, 'training', 'v1.2', 'labels_mask')) - mkdir_or_exist(osp.join(out_dir, 'validation', 'v1.2', 'labels_mask')) - mkdir_or_exist(osp.join(out_dir, 'training', 'v2.0', 'labels_mask')) - mkdir_or_exist(osp.join(out_dir, 'validation', 'v2.0', 'labels_mask')) - print('Directories Have Made...') - - if args.nproc > 1: - if args.version == 'all': - print('Converting v1.2 ....') - track_parallel_progress( - partial(RGB2Mask, colormap2label=colormap2label_v1_2), - RGB_labels_v1_2_path, - nproc=args.nproc) - print('Converting v2.0 ....') - track_parallel_progress( - partial(RGB2Mask, colormap2label=colormap2label_v2_0), - RGB_labels_v2_0_path, - nproc=args.nproc) - elif args.version == 'v1.2': - print('Converting v1.2 ....') - track_parallel_progress( - partial(RGB2Mask, colormap2label=colormap2label_v1_2), - RGB_labels_v1_2_path, - nproc=args.nproc) - elif args.version == 'v2.0': - print('Converting v2.0 ....') - track_parallel_progress( - partial(RGB2Mask, colormap2label=colormap2label_v2_0), - RGB_labels_v2_0_path, - nproc=args.nproc) - - else: - if args.version == 'all': - print('Converting v1.2 ....') - track_progress( - partial(RGB2Mask, colormap2label=colormap2label_v1_2), - RGB_labels_v1_2_path) - print('Converting v2.0 ....') - track_progress( - partial(RGB2Mask, colormap2label=colormap2label_v2_0), - RGB_labels_v2_0_path) - elif args.version == 'v1.2': - print('Converting v1.2 ....') - track_progress( - partial(RGB2Mask, colormap2label=colormap2label_v1_2), - RGB_labels_v1_2_path) - elif args.version == 'v2.0': - print('Converting v2.0 ....') - track_progress( - partial(RGB2Mask, colormap2label=colormap2label_v2_0), - RGB_labels_v2_0_path) - - print('Have convert Mapillary Vistas Datasets RGB labels to Mask labels!') - - -if __name__ == '__main__': - args = parse_args() - main() diff --git a/tests/data/pseudo_mapillary_dataset/images/__CRyFzoDOXn6unQ6a3DnQ.jpg b/tests/data/pseudo_mapillary_dataset/images/__CRyFzoDOXn6unQ6a3DnQ.jpg new file mode 100644 index 0000000000..c3cf31a170 Binary files /dev/null and b/tests/data/pseudo_mapillary_dataset/images/__CRyFzoDOXn6unQ6a3DnQ.jpg differ diff --git a/tests/data/pseudo_mapillary_dataset/v1.2/__CRyFzoDOXn6unQ6a3DnQ.png b/tests/data/pseudo_mapillary_dataset/v1.2/__CRyFzoDOXn6unQ6a3DnQ.png new file mode 100644 index 0000000000..2c648b7ef8 Binary files /dev/null and b/tests/data/pseudo_mapillary_dataset/v1.2/__CRyFzoDOXn6unQ6a3DnQ.png differ diff --git a/tests/data/pseudo_mapillary_dataset/v2.0/__CRyFzoDOXn6unQ6a3DnQ.png b/tests/data/pseudo_mapillary_dataset/v2.0/__CRyFzoDOXn6unQ6a3DnQ.png new file mode 100644 index 0000000000..809256d931 Binary files /dev/null and b/tests/data/pseudo_mapillary_dataset/v2.0/__CRyFzoDOXn6unQ6a3DnQ.png differ diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py index b97cbae3a4..db4a779906 100644 --- a/tests/test_datasets/test_dataset.py +++ b/tests/test_datasets/test_dataset.py @@ -7,7 +7,8 @@ from mmseg.datasets import (ADE20KDataset, BaseSegDataset, CityscapesDataset, COCOStuffDataset, DecathlonDataset, ISPRSDataset, - LIPDataset, LoveDADataset, PascalVOCDataset, + LIPDataset, LoveDADataset, MapillaryDataset_v1, + MapillaryDataset_v2, PascalVOCDataset, PotsdamDataset, REFUGEDataset, SynapseDataset, iSAIDDataset) from mmseg.registry import DATASETS @@ -27,6 +28,10 @@ def test_classes(): assert list(PotsdamDataset.METAINFO['classes']) == get_classes('potsdam') assert list(ISPRSDataset.METAINFO['classes']) == get_classes('vaihingen') assert list(iSAIDDataset.METAINFO['classes']) == get_classes('isaid') + assert list( + MapillaryDataset_v1.METAINFO['classes']) == get_classes('mapillary_v1') + assert list( + MapillaryDataset_v2.METAINFO['classes']) == get_classes('mapillary_v2') with pytest.raises(ValueError): get_classes('unsupported') @@ -80,6 +85,10 @@ def test_palette(): assert PotsdamDataset.METAINFO['palette'] == get_palette('potsdam') assert COCOStuffDataset.METAINFO['palette'] == get_palette('cocostuff') assert iSAIDDataset.METAINFO['palette'] == get_palette('isaid') + assert list( + MapillaryDataset_v1.METAINFO['palette']) == get_palette('mapillary_v1') + assert list( + MapillaryDataset_v2.METAINFO['palette']) == get_palette('mapillary_v2') with pytest.raises(ValueError): get_palette('unsupported') @@ -304,6 +313,19 @@ def test_lip(): assert len(test_dataset) == 1 +def test_mapillary(): + test_dataset = MapillaryDataset_v1( + pipeline=[], + data_prefix=dict( + img_path=osp.join( + osp.dirname(__file__), + '../data/pseudo_mapillary_dataset/images'), + seg_map_path=osp.join( + osp.dirname(__file__), + '../data/pseudo_mapillary_dataset/v1.2'))) + assert len(test_dataset) == 1 + + @pytest.mark.parametrize('dataset, classes', [ ('ADE20KDataset', ('wall', 'building')), ('CityscapesDataset', ('road', 'sidewalk')), diff --git a/tests/test_datasets/test_formatting.py b/tests/test_datasets/test_formatting.py index 4babaad269..d0e5820ec7 100644 --- a/tests/test_datasets/test_formatting.py +++ b/tests/test_datasets/test_formatting.py @@ -4,6 +4,7 @@ import unittest import numpy as np +import pytest from mmengine.structures import BaseDataElement from mmseg.datasets.transforms import PackSegInputs @@ -45,6 +46,14 @@ def test_transform(self): BaseDataElement) self.assertEqual(results['data_samples'].ori_shape, results['data_samples'].gt_sem_seg.shape) + results = copy.deepcopy(self.results) + # test dataset shape is not 2D + results['gt_seg_map'] = np.random.rand(3, 300, 400) + msg = 'the segmentation map is 2D' + with pytest.warns(UserWarning, match=msg): + results = transform(results) + self.assertEqual(results['data_samples'].ori_shape, + results['data_samples'].gt_sem_seg.shape) def test_repr(self): transform = PackSegInputs(meta_keys=self.meta_keys) diff --git a/tests/test_datasets/test_transform.py b/tests/test_datasets/test_transform.py index a9136bebc8..92d6c6106d 100644 --- a/tests/test_datasets/test_transform.py +++ b/tests/test_datasets/test_transform.py @@ -639,6 +639,17 @@ def test_mosaic(): results = mosaic_module(results) assert results['img'].shape[:2] == (20, 24) + results = dict() + results['img'] = np.concatenate((img, img), axis=2) + results['gt_semantic_seg'] = seg + results['seg_fields'] = ['gt_semantic_seg'] + + transform = dict(type='RandomMosaic', prob=1, img_scale=(10, 12)) + mosaic_module = TRANSFORMS.build(transform) + results['mix_results'] = [copy.deepcopy(results)] * 3 + results = mosaic_module(results) + assert results['img'].shape[2] == 6 + def test_cutout(): # test prob diff --git a/tests/test_evaluation/test_metrics/test_citys_metric.py b/tests/test_evaluation/test_metrics/test_citys_metric.py index a6d6db5caa..06f956f54a 100644 --- a/tests/test_evaluation/test_metrics/test_citys_metric.py +++ b/tests/test_evaluation/test_metrics/test_citys_metric.py @@ -1,15 +1,18 @@ # Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import shutil from unittest import TestCase import numpy as np +import pytest import torch -from mmengine.structures import BaseDataElement, PixelData +from mmengine.structures import PixelData -from mmseg.evaluation import CitysMetric +from mmseg.evaluation import CityscapesMetric from mmseg.structures import SegDataSample -class TestCitysMetric(TestCase): +class TestCityscapesMetric(TestCase): def _demo_mm_inputs(self, batch_size=1, @@ -34,21 +37,16 @@ def _demo_mm_inputs(self, image_shape = image_shapes[idx] _, h, w = image_shape - mm_inputs = dict() data_sample = SegDataSample() gt_semantic_seg = np.random.randint( 0, num_classes, (1, h, w), dtype=np.uint8) gt_semantic_seg = torch.LongTensor(gt_semantic_seg) gt_sem_seg_data = dict(data=gt_semantic_seg) data_sample.gt_sem_seg = PixelData(**gt_sem_seg_data) - mm_inputs['data_sample'] = data_sample.to_dict() - mm_inputs['data_sample']['seg_map_path'] = \ - 'tests/data/pseudo_cityscapes_dataset/gtFine/val/\ - frankfurt/frankfurt_000000_000294_gtFine_labelTrainIds.png' - - mm_inputs['seg_map_path'] = mm_inputs['data_sample'][ - 'seg_map_path'] - packed_inputs.append(mm_inputs) + data_sample = data_sample.to_dict() + data_sample[ + 'seg_map_path'] = 'tests/data/pseudo_cityscapes_dataset/gtFine/val/frankfurt/frankfurt_000000_000294_gtFine_labelTrainIds.png' # noqa + packed_inputs.append(data_sample) return packed_inputs @@ -84,15 +82,11 @@ def _demo_mm_model_output(self, _predictions = [] for pred in batch_datasampes: - if isinstance(pred, BaseDataElement): - test_data = pred.to_dict() - test_data['img_path'] = \ - 'tests/data/pseudo_cityscapes_dataset/leftImg8bit/val/\ - frankfurt/frankfurt_000000_000294_leftImg8bit.png' - - _predictions.append(test_data) - else: - _predictions.append(pred) + test_data = pred.to_dict() + test_data[ + 'img_path'] = 'tests/data/pseudo_cityscapes_dataset/leftImg8bit/val/frankfurt/frankfurt_000000_000294_leftImg8bit.png' # noqa + _predictions.append(test_data) + return _predictions def test_evaluate(self): @@ -104,15 +98,22 @@ def test_evaluate(self): dict(**data, **result) for data, result in zip(data_batch, predictions) ] - iou_metric = CitysMetric(citys_metrics=['cityscapes']) - iou_metric.process(data_batch, data_samples) - res = iou_metric.evaluate(6) + # test keep_results should be True when format_only is True + with pytest.raises(AssertionError): + CityscapesMetric( + output_dir='tmp', format_only=True, keep_results=False) + + # test evaluate with cityscape metric + metric = CityscapesMetric(output_dir='tmp') + metric.process(data_batch, data_samples) + res = metric.evaluate(2) self.assertIsInstance(res, dict) - # test to_label_id = True - iou_metric = CitysMetric( - citys_metrics=['cityscapes'], to_label_id=True) - iou_metric.process(data_batch, data_samples) - res = iou_metric.evaluate(6) - self.assertIsInstance(res, dict) - import shutil - shutil.rmtree('.format_cityscapes') + + # test format_only + metric = CityscapesMetric( + output_dir='tmp', format_only=True, keep_results=True) + metric.process(data_batch, data_samples) + metric.evaluate(2) + assert osp.exists('tmp') + assert osp.isfile('tmp/frankfurt_000000_000294_leftImg8bit.png') + shutil.rmtree('tmp') diff --git a/tests/test_evaluation/test_metrics/test_iou_metric.py b/tests/test_evaluation/test_metrics/test_iou_metric.py index a0bc922c31..7a0e9d53e3 100644 --- a/tests/test_evaluation/test_metrics/test_iou_metric.py +++ b/tests/test_evaluation/test_metrics/test_iou_metric.py @@ -1,4 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import shutil from unittest import TestCase import numpy as np @@ -58,6 +60,8 @@ def _demo_mm_model_output(self, data=torch.randn(num_classes, h, w)) data_sample['pred_sem_seg'] = dict( data=torch.randint(0, num_classes, (1, h, w))) + data_sample[ + 'img_path'] = 'tests/data/pseudo_dataset/imgs/00000_img.jpg' return data_samples def test_evaluate(self): @@ -72,5 +76,29 @@ def test_evaluate(self): label_map=dict(), reduce_zero_label=False) iou_metric.process([0] * len(data_samples), data_samples) - res = iou_metric.evaluate(6) + res = iou_metric.evaluate(2) self.assertIsInstance(res, dict) + + # test save segment file in output_dir + iou_metric = IoUMetric(iou_metrics=['mIoU'], output_dir='tmp') + iou_metric.dataset_meta = dict( + classes=['wall', 'building', 'sky', 'floor', 'tree'], + label_map=dict(), + reduce_zero_label=False) + iou_metric.process([0] * len(data_samples), data_samples) + assert osp.exists('tmp') + assert osp.isfile('tmp/00000_img.png') + shutil.rmtree('tmp') + + # test format_only + iou_metric = IoUMetric( + iou_metrics=['mIoU'], output_dir='tmp', format_only=True) + iou_metric.dataset_meta = dict( + classes=['wall', 'building', 'sky', 'floor', 'tree'], + label_map=dict(), + reduce_zero_label=False) + iou_metric.process([0] * len(data_samples), data_samples) + assert iou_metric.results == [] + assert osp.exists('tmp') + assert osp.isfile('tmp/00000_img.png') + shutil.rmtree('tmp') diff --git a/tests/test_models/test_backbones/test_mscan.py b/tests/test_models/test_backbones/test_mscan.py new file mode 100644 index 0000000000..84dfb8e450 --- /dev/null +++ b/tests/test_models/test_backbones/test_mscan.py @@ -0,0 +1,69 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from mmseg.models.backbones import MSCAN +from mmseg.models.backbones.mscan import (MSCAAttention, MSCASpatialAttention, + OverlapPatchEmbed, StemConv) + + +def test_mscan_backbone(): + # Test MSCAN Standard Forward + model = MSCAN( + embed_dims=[8, 16, 32, 64], + norm_cfg=dict(type='BN', requires_grad=True)) + model.init_weights() + model.train() + batch_size = 2 + imgs = torch.randn(batch_size, 3, 64, 128) + feat = model(imgs) + + assert len(feat) == 4 + # output for segment Head + assert feat[0].shape == torch.Size([batch_size, 8, 16, 32]) + assert feat[1].shape == torch.Size([batch_size, 16, 8, 16]) + assert feat[2].shape == torch.Size([batch_size, 32, 4, 8]) + assert feat[3].shape == torch.Size([batch_size, 64, 2, 4]) + + # Test input with rare shape + batch_size = 2 + imgs = torch.randn(batch_size, 3, 95, 27) + feat = model(imgs) + assert len(feat) == 4 + + +def test_mscan_overlap_patch_embed_module(): + x_overlap_patch_embed = OverlapPatchEmbed( + norm_cfg=dict(type='BN', requires_grad=True)) + assert x_overlap_patch_embed.proj.in_channels == 3 + assert x_overlap_patch_embed.norm.weight.shape == torch.Size([768]) + x = torch.randn(2, 3, 16, 32) + x_out, H, W = x_overlap_patch_embed(x) + assert x_out.shape == torch.Size([2, 32, 768]) + + +def test_mscan_spatial_attention_module(): + x_spatial_attention = MSCASpatialAttention(8) + assert x_spatial_attention.proj_1.kernel_size == (1, 1) + assert x_spatial_attention.proj_2.stride == (1, 1) + x = torch.randn(2, 8, 16, 32) + x_out = x_spatial_attention(x) + assert x_out.shape == torch.Size([2, 8, 16, 32]) + + +def test_mscan_attention_module(): + x_attention = MSCAAttention(8) + assert x_attention.conv0.weight.shape[0] == 8 + assert x_attention.conv3.kernel_size == (1, 1) + x = torch.randn(2, 8, 16, 32) + x_out = x_attention(x) + assert x_out.shape == torch.Size([2, 8, 16, 32]) + + +def test_mscan_stem_module(): + x_stem = StemConv(8, 8, norm_cfg=dict(type='BN', requires_grad=True)) + assert x_stem.proj[0].weight.shape[0] == 4 + assert x_stem.proj[-1].weight.shape[0] == 8 + x = torch.randn(2, 8, 16, 32) + x_out, H, W = x_stem(x) + assert x_out.shape == torch.Size([2, 32, 8]) + assert (H, W) == (4, 8) diff --git a/tests/test_models/test_backbones/test_pidnet.py b/tests/test_models/test_backbones/test_pidnet.py new file mode 100644 index 0000000000..208dfc7814 --- /dev/null +++ b/tests/test_models/test_backbones/test_pidnet.py @@ -0,0 +1,87 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os +import tempfile + +import torch +from mmengine.registry import init_default_scope + +from mmseg.registry import MODELS + +init_default_scope('mmseg') + + +def test_pidnet_backbone(): + # Test PIDNet Standard Forward + norm_cfg = dict(type='BN', requires_grad=True) + backbone_cfg = dict( + type='PIDNet', + in_channels=3, + channels=32, + ppm_channels=96, + num_stem_blocks=2, + num_branch_blocks=3, + align_corners=False, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU', inplace=True)) + model = MODELS.build(backbone_cfg) + model.init_weights() + + # Test init weights + temp_file = tempfile.NamedTemporaryFile() + temp_file.close() + torch.save(model.state_dict(), temp_file.name) + backbone_cfg.update( + init_cfg=dict(type='Pretrained', checkpoint=temp_file.name)) + model = MODELS.build(backbone_cfg) + model.init_weights() + os.remove(temp_file.name) + + # Test eval mode + model.eval() + batch_size = 1 + imgs = torch.randn(batch_size, 3, 64, 128) + feats = model(imgs) + + assert type(feats) == torch.Tensor + assert feats.shape == torch.Size([batch_size, 128, 8, 16]) + + # Test train mode + model.train() + batch_size = 2 + imgs = torch.randn(batch_size, 3, 64, 128) + feats = model(imgs) + + assert len(feats) == 3 + # test output for P branch + assert feats[0].shape == torch.Size([batch_size, 64, 8, 16]) + # test output for I branch + assert feats[1].shape == torch.Size([batch_size, 128, 8, 16]) + # test output for D branch + assert feats[2].shape == torch.Size([batch_size, 64, 8, 16]) + + # Test pidnet-m + backbone_cfg.update(channels=64) + model = MODELS.build(backbone_cfg) + feats = model(imgs) + + assert len(feats) == 3 + # test output for P branch + assert feats[0].shape == torch.Size([batch_size, 128, 8, 16]) + # test output for I branch + assert feats[1].shape == torch.Size([batch_size, 256, 8, 16]) + # test output for D branch + assert feats[2].shape == torch.Size([batch_size, 128, 8, 16]) + + # Test pidnet-l + backbone_cfg.update( + channels=64, ppm_channesl=112, num_stem_blocks=3, num_branch_blocks=4) + model = MODELS.build(backbone_cfg) + feats = model(imgs) + + assert len(feats) == 3 + # test output for P branch + assert feats[0].shape == torch.Size([batch_size, 128, 8, 16]) + # test output for I branch + assert feats[1].shape == torch.Size([batch_size, 256, 8, 16]) + # test output for D branch + assert feats[2].shape == torch.Size([batch_size, 128, 8, 16]) diff --git a/tests/test_models/test_heads/test_ham_head.py b/tests/test_models/test_heads/test_ham_head.py new file mode 100644 index 0000000000..f802d2d8db --- /dev/null +++ b/tests/test_models/test_heads/test_ham_head.py @@ -0,0 +1,44 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from mmseg.models.decode_heads import LightHamHead +from .utils import _conv_has_norm, to_cuda + +ham_norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) + + +def test_ham_head(): + + # test without sync_bn + head = LightHamHead( + in_channels=[16, 32, 64], + in_index=[1, 2, 3], + channels=64, + ham_channels=64, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=ham_norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ham_kwargs=dict( + MD_S=1, + MD_R=64, + train_steps=6, + eval_steps=7, + inv_t=100, + rand_init=True)) + assert not _conv_has_norm(head, sync_bn=False) + + inputs = [ + torch.randn(1, 8, 32, 32), + torch.randn(1, 16, 16, 16), + torch.randn(1, 32, 8, 8), + torch.randn(1, 64, 4, 4) + ] + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + assert head.in_channels == [16, 32, 64] + assert head.hamburger.ham_in.in_channels == 64 + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 16, 16) diff --git a/tests/test_models/test_heads/test_pidnet_head.py b/tests/test_models/test_heads/test_pidnet_head.py new file mode 100644 index 0000000000..a6247371c5 --- /dev/null +++ b/tests/test_models/test_heads/test_pidnet_head.py @@ -0,0 +1,89 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from mmengine.registry import init_default_scope + +from mmseg.registry import MODELS + + +def test_pidnet_head(): + init_default_scope('mmseg') + + # Test PIDNet decode head Standard Forward + norm_cfg = dict(type='BN', requires_grad=True) + backbone_cfg = dict( + type='PIDNet', + in_channels=3, + channels=32, + ppm_channels=96, + num_stem_blocks=2, + num_branch_blocks=3, + align_corners=False, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU', inplace=True)) + decode_head_cfg = dict( + type='PIDHead', + in_channels=128, + channels=128, + num_classes=19, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU', inplace=True), + align_corners=True, + loss_decode=[ + dict( + type='CrossEntropyLoss', + use_sigmoid=False, + class_weight=[ + 0.8373, 0.918, 0.866, 1.0345, 1.0166, 0.9969, 0.9754, + 1.0489, 0.8786, 1.0023, 0.9539, 0.9843, 1.1116, 0.9037, + 1.0865, 1.0955, 1.0865, 1.1529, 1.0507 + ], + loss_weight=0.4), + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=[ + 0.8373, 0.918, 0.866, 1.0345, 1.0166, 0.9969, 0.9754, + 1.0489, 0.8786, 1.0023, 0.9539, 0.9843, 1.1116, 0.9037, + 1.0865, 1.0955, 1.0865, 1.1529, 1.0507 + ], + loss_weight=1.0), + dict(type='BoundaryLoss', loss_weight=20.0), + dict( + type='OhemCrossEntropy', + thres=0.9, + min_kept=131072, + class_weight=[ + 0.8373, 0.918, 0.866, 1.0345, 1.0166, 0.9969, 0.9754, + 1.0489, 0.8786, 1.0023, 0.9539, 0.9843, 1.1116, 0.9037, + 1.0865, 1.0955, 1.0865, 1.1529, 1.0507 + ], + loss_weight=1.0) + ]) + backbone = MODELS.build(backbone_cfg) + head = MODELS.build(decode_head_cfg) + + # Test train mode + backbone.train() + head.train() + batch_size = 2 + imgs = torch.randn(batch_size, 3, 64, 128) + feats = backbone(imgs) + seg_logit = head(feats) + + assert isinstance(seg_logit, tuple) + assert len(seg_logit) == 3 + + p_logits, i_logits, d_logits = seg_logit + assert p_logits.shape == (batch_size, 19, 8, 16) + assert i_logits.shape == (batch_size, 19, 8, 16) + assert d_logits.shape == (batch_size, 1, 8, 16) + + # Test eval mode + backbone.eval() + head.eval() + feats = backbone(imgs) + seg_logit = head(feats) + + assert isinstance(seg_logit, torch.Tensor) + assert seg_logit.shape == (batch_size, 19, 8, 16) diff --git a/tools/analysis_tools/get_flops.py b/tools/analysis_tools/get_flops.py index 1e8f188e18..66b2d52fcd 100644 --- a/tools/analysis_tools/get_flops.py +++ b/tools/analysis_tools/get_flops.py @@ -1,10 +1,23 @@ # Copyright (c) OpenMMLab. All rights reserved. import argparse +import tempfile +from pathlib import Path -from mmcv.cnn import get_model_complexity_info -from mmengine import Config +import torch +from mmengine import Config, DictAction +from mmengine.logging import MMLogger +from mmengine.model import revert_sync_batchnorm +from mmengine.registry import init_default_scope -from mmseg.models import build_segmentor +from mmseg.models import BaseSegmentor +from mmseg.registry import MODELS +from mmseg.structures import SegDataSample + +try: + from mmengine.analysis import get_model_complexity_info + from mmengine.analysis.print_helper import _format_size +except ImportError: + raise ImportError('Please upgrade mmengine >= 0.6.0 to use this script.') def parse_args(): @@ -17,13 +30,33 @@ def parse_args(): nargs='+', default=[2048, 1024], help='input image size') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') args = parser.parse_args() return args -def main(): +def inference(args: argparse.Namespace, logger: MMLogger) -> dict: + config_name = Path(args.config) - args = parse_args() + if not config_name.exists(): + logger.error(f'Config file {config_name} does not exist') + + cfg: Config = Config.fromfile(config_name) + cfg.work_dir = tempfile.TemporaryDirectory().name + cfg.log_level = 'WARN' + if args.cfg_options is not None: + cfg.merge_from_dict(args.cfg_options) + + init_default_scope(cfg.get('scope', 'mmseg')) if len(args.shape) == 1: input_shape = (3, args.shape[0], args.shape[0]) @@ -31,29 +64,60 @@ def main(): input_shape = (3, ) + tuple(args.shape) else: raise ValueError('invalid input shape') + result = {} - cfg = Config.fromfile(args.config) - cfg.model.pretrained = None - model = build_segmentor( - cfg.model, - train_cfg=cfg.get('train_cfg'), - test_cfg=cfg.get('test_cfg')).cuda() + model: BaseSegmentor = MODELS.build(cfg.model) + if hasattr(model, 'auxiliary_head'): + model.auxiliary_head = None + if torch.cuda.is_available(): + model.cuda() + model = revert_sync_batchnorm(model) + result['ori_shape'] = input_shape[-2:] + result['pad_shape'] = input_shape[-2:] + data_batch = { + 'inputs': [torch.rand(input_shape)], + 'data_samples': [SegDataSample(metainfo=result)] + } + data = model.data_preprocessor(data_batch) model.eval() + if cfg.model.decode_head.type in ['MaskFormerHead', 'Mask2FormerHead']: + # TODO: Support MaskFormer and Mask2Former + raise NotImplementedError('MaskFormer and Mask2Former are not ' + 'supported yet.') + outputs = get_model_complexity_info( + model, + input_shape, + inputs=data['inputs'], + show_table=False, + show_arch=False) + result['flops'] = _format_size(outputs['flops']) + result['params'] = _format_size(outputs['params']) + result['compute_type'] = 'direct: randomly generate a picture' + return result - if hasattr(model, 'forward_dummy'): - model.forward = model.forward_dummy - else: - raise NotImplementedError( - 'FLOPs counter is currently not currently supported with {}'. - format(model.__class__.__name__)) - flops, params = get_model_complexity_info(model, input_shape) +def main(): + + args = parse_args() + logger = MMLogger.get_instance(name='MMLogger') + + result = inference(args, logger) split_line = '=' * 30 - print('{0}\nInput shape: {1}\nFlops: {2}\nParams: {3}\n{0}'.format( - split_line, input_shape, flops, params)) + ori_shape = result['ori_shape'] + pad_shape = result['pad_shape'] + flops = result['flops'] + params = result['params'] + compute_type = result['compute_type'] + + if pad_shape != ori_shape: + print(f'{split_line}\nUse size divisor set input shape ' + f'from {ori_shape} to {pad_shape}') + print(f'{split_line}\nCompute type: {compute_type}\n' + f'Input shape: {pad_shape}\nFlops: {flops}\n' + f'Params: {params}\n{split_line}') print('!!!Please be cautious if you use the results in papers. ' - 'You may need to check if all ops are supported and verify that the ' - 'flops computation is correct.') + 'You may need to check if all ops are supported and verify ' + 'that the flops computation is correct.') if __name__ == '__main__': diff --git a/tools/test.py b/tools/test.py index 7bfde58206..058fdfc864 100644 --- a/tools/test.py +++ b/tools/test.py @@ -17,6 +17,10 @@ def parse_args(): '--work-dir', help=('if specified, the evaluation metric results will be dumped' 'into the directory as json')) + parser.add_argument( + '--out', + type=str, + help='The directory to save output prediction for offline evaluation') parser.add_argument( '--show', action='store_true', help='show prediction results') parser.add_argument( @@ -43,7 +47,10 @@ def parse_args(): help='job launcher') parser.add_argument( '--tta', action='store_true', help='Test time augmentation') - parser.add_argument('--local_rank', type=int, default=0) + # When using PyTorch version >= 2.0.0, the `torch.distributed.launch` + # will pass the `--local-rank` parameter to `tools/train.py` instead + # of `--local_rank`. + parser.add_argument('--local_rank', '--local-rank', type=int, default=0) args = parser.parse_args() if 'LOCAL_RANK' not in os.environ: os.environ['LOCAL_RANK'] = str(args.local_rank) @@ -100,6 +107,11 @@ def main(): cfg.tta_model.module = cfg.model cfg.model = cfg.tta_model + # add output_dir in metric + if args.out is not None: + cfg.test_evaluator['output_dir'] = args.out + cfg.test_evaluator['keep_results'] = True + # build the runner from config runner = Runner.from_cfg(cfg) diff --git a/tools/train.py b/tools/train.py index 1721306664..10fdaa1874 100644 --- a/tools/train.py +++ b/tools/train.py @@ -40,7 +40,10 @@ def parse_args(): choices=['none', 'pytorch', 'slurm', 'mpi'], default='none', help='job launcher') - parser.add_argument('--local_rank', type=int, default=0) + # When using PyTorch version >= 2.0.0, the `torch.distributed.launch` + # will pass the `--local-rank` parameter to `tools/train.py` instead + # of `--local_rank`. + parser.add_argument('--local_rank', '--local-rank', type=int, default=0) args = parser.parse_args() if 'LOCAL_RANK' not in os.environ: os.environ['LOCAL_RANK'] = str(args.local_rank)