Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean train set #3682

Open
wants to merge 27 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
2 changes: 1 addition & 1 deletion configs/_base_/datasets/ade20k.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# dataset settings
dataset_type = 'ADE20KDataset'
data_root = 'data/ade/ADEChallengeData2016'
data_root = "/media/ids/Ubuntu files/data/ADEChallengeData2016/"
crop_size = (512, 512)
train_pipeline = [
dict(type='LoadImageFromFile'),
Expand Down
73 changes: 73 additions & 0 deletions configs/_base_/datasets/hots_v1_640x480.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
dataset_type = "HOTSDataset"
data_root = "/media/ids/Ubuntu files/data/HOTS_v1/SemanticSegmentation/"
crop_size = (640, 480)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(
type='RandomResize',
scale=(2048, 480),
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(2048, 480), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='Resize', scale_factor=r, keep_ratio=True)
for r in img_ratios
],
[
dict(type='RandomFlip', prob=0., direction='horizontal'),
dict(type='RandomFlip', prob=1., direction='horizontal')
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
])
]
train_dataloader = dict(
batch_size=2,
num_workers=2,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='img_dir/train', seg_map_path='ann_dir/train'),
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=2,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(img_path='img_dir/eval', seg_map_path='ann_dir/eval'),
pipeline=test_pipeline))
test_dataloader = dict(
batch_size=1,
num_workers=2,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(img_path='img_dir/test', seg_map_path='ann_dir/test'),
pipeline=test_pipeline))

val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator
73 changes: 73 additions & 0 deletions configs/_base_/datasets/irl_vision_sim_512x512.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
dataset_type = "IRLVisionSimDataset"
data_root = "/media/ids/Ubuntu files/data/irl_vision_sim/SemanticSegmentation/"
crop_size = (512, 512)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(
type='RandomResize',
scale=(2048, 512),
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(2048, 512), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='Resize', scale_factor=r, keep_ratio=True)
for r in img_ratios
],
[
dict(type='RandomFlip', prob=0., direction='horizontal'),
dict(type='RandomFlip', prob=1., direction='horizontal')
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
])
]
train_dataloader = dict(
batch_size=2,
num_workers=2,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='img_dir/train', seg_map_path='ann_dir/train'),
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=2,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(img_path='img_dir/eval', seg_map_path='ann_dir/eval'),
pipeline=test_pipeline))
test_dataloader = dict(
batch_size=1,
num_workers=2,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(img_path='img_dir/test', seg_map_path='ann_dir/test'),
pipeline=test_pipeline))

val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator
73 changes: 73 additions & 0 deletions configs/_base_/datasets/irl_vision_sim_640x480.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
dataset_type = "IRLVisionSimDataset"
data_root = "/media/ids/Ubuntu files/data/irl_vision_sim/SemanticSegmentation/"
crop_size = (640, 480)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(
type='RandomResize',
scale=(2048, 480),
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(2048, 480), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='Resize', scale_factor=r, keep_ratio=True)
for r in img_ratios
],
[
dict(type='RandomFlip', prob=0., direction='horizontal'),
dict(type='RandomFlip', prob=1., direction='horizontal')
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
])
]
train_dataloader = dict(
batch_size=2,
num_workers=2,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='img_dir/train', seg_map_path='ann_dir/train'),
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=2,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(img_path='img_dir/eval', seg_map_path='ann_dir/eval'),
pipeline=test_pipeline))
test_dataloader = dict(
batch_size=1,
num_workers=2,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(img_path='img_dir/test', seg_map_path='ann_dir/test'),
pipeline=test_pipeline))

val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator
2 changes: 1 addition & 1 deletion configs/_base_/models/dpt_vit-b16.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained='pretrain/vit-b16_p16_224-80ecf9dd.pth', # noqa
# pretrained='pretrain/vit-b16_p16_224-80ecf9dd.pth', # noqa
backbone=dict(
type='VisionTransformer',
img_size=224,
Expand Down
3 changes: 2 additions & 1 deletion configs/_base_/models/fast_scnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1)),
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1)
),
auxiliary_head=[
dict(
type='FCNHead',
Expand Down
2 changes: 1 addition & 1 deletion configs/_base_/models/upernet_vit-b16_ln_mln.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained='pretrain/jx_vit_base_p16_224-80ecf9dd.pth',
# pretrained='pretrain/jx_vit_base_p16_224-80ecf9dd.pth',
backbone=dict(
type='VisionTransformer',
img_size=(512, 512),
Expand Down
24 changes: 24 additions & 0 deletions configs/_base_/schedules/schedule_10k.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# optimizer
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
# learning policy
param_scheduler = [
dict(
type='PolyLR',
eta_min=1e-4,
power=0.9,
begin=0,
end=10000,
by_epoch=False)
]
# training schedule for 10k
train_cfg = dict(type='IterBasedTrainLoop', max_iters=10000, val_interval=1000)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
default_hooks = dict(
timer=dict(type='IterTimerHook'),
logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
param_scheduler=dict(type='ParamSchedulerHook'),
checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=1000),
sampler_seed=dict(type='DistSamplerSeedHook'),
visualization=dict(type='SegVisualizationHook'))
24 changes: 24 additions & 0 deletions configs/_base_/schedules/schedule_1k.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# optimizer
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
# learning policy
param_scheduler = [
dict(
type='PolyLR',
eta_min=1e-4,
power=0.9,
begin=0,
end=1000,
by_epoch=False)
]
# training schedule for 1k
train_cfg = dict(type='IterBasedTrainLoop', max_iters=1000, val_interval=200)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
default_hooks = dict(
timer=dict(type='IterTimerHook'),
logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
param_scheduler=dict(type='ParamSchedulerHook'),
checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=200),
sampler_seed=dict(type='DistSamplerSeedHook'),
visualization=dict(type='SegVisualizationHook'))
24 changes: 24 additions & 0 deletions configs/_base_/schedules/schedule_5k.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# optimizer
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
# learning policy
param_scheduler = [
dict(
type='PolyLR',
eta_min=1e-4,
power=0.9,
begin=0,
end=5000,
by_epoch=False)
]
# training schedule for 5k
train_cfg = dict(type='IterBasedTrainLoop', max_iters=5000, val_interval=1000)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
default_hooks = dict(
timer=dict(type='IterTimerHook'),
logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
param_scheduler=dict(type='ParamSchedulerHook'),
checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=1000),
sampler_seed=dict(type='DistSamplerSeedHook'),
visualization=dict(type='SegVisualizationHook'))
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
_base_ = [
'../_base_/models/upernet_convnext.py', '../_base_/datasets/hots_v1_640x480.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_10k.py'
]
crop_size = (640, 480)
data_preprocessor = dict(size=crop_size)
checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-tiny_3rdparty_32xb128-noema_in1k_20220301-795e9634.pth' # noqa
model = dict(
data_preprocessor=data_preprocessor,
backbone=dict(
type='mmpretrain.ConvNeXt',
arch='tiny',
out_indices=[0, 1, 2, 3],
drop_path_rate=0.4,
layer_scale_init_value=1.0,
gap_before_final_norm=False,
init_cfg=dict(
type='Pretrained', checkpoint=checkpoint_file,
prefix='backbone.')),
decode_head=dict(
in_channels=[96, 192, 384, 768],
num_classes=46,
),
auxiliary_head=dict(in_channels=384, num_classes=46),
test_cfg=dict(mode='slide', crop_size=crop_size, stride=(341, 341)),
)

optim_wrapper = dict(
_delete_=True,
type='AmpOptimWrapper',
optimizer=dict(
type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05),
paramwise_cfg={
'decay_rate': 0.9,
'decay_type': 'stage_wise',
'num_layers': 6
},
constructor='LearningRateDecayOptimizerConstructor',
loss_scale='dynamic')

param_scheduler = [
dict(
type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500),
dict(
type='PolyLR',
power=1.0,
begin=1500,
end=10000,
eta_min=0.0,
by_epoch=False,
)
]


Loading