You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I use mmsegmentation and add focalnet in it, then I run tools/train.py, but I found the results on most of my dataset was zero.
I don't konw why。 I changed the mmseg/model/backbone and related init.py, and all configs and dataset set,
and also I check the encoder and decoder, find the output format was right, but the val result on most of categories was zero.
my dataset was a Video dataset which contains 124 categories. I will give the configs as follows.
This discussion was converted from issue #2661 on March 06, 2023 04:57.
Heading
Bold
Italic
Quote
Code
Link
Numbered list
Unordered list
Task list
Attach files
Mention
Reference
Menu
reacted with thumbs up emoji reacted with thumbs down emoji reacted with laugh emoji reacted with hooray emoji reacted with confused emoji reacted with heart emoji reacted with rocket emoji reacted with eyes emoji
-
I use mmsegmentation and add focalnet in it, then I run tools/train.py, but I found the results on most of my dataset was zero.
I don't konw why。 I changed the mmseg/model/backbone and related init.py, and all configs and dataset set,
and also I check the encoder and decoder, find the output format was right, but the val result on most of categories was zero.
my dataset was a Video dataset which contains 124 categories. I will give the configs as follows.
model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
model = dict(
type='EncoderDecoder',
pretrained=None,
backbone=dict(
type='FocalNet',
embed_dim=96,
depths=[2, 2, 6, 2],
mlp_ratio=4.,
drop_rate=0.,
drop_path_rate=0.3,
patch_norm=True,
out_indices=(0, 1, 2, 3),
use_checkpoint=False),
decode_head=dict(
type='UPerHead',
in_channels=[96, 192, 384, 768],
in_index=[0, 1, 2, 3],
pool_scales=(1, 2, 3, 6),
channels=512,
dropout_ratio=0.1,
num_classes=124,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=384,
in_index=2,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=124,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))
dataset settings
dataset_type = 'VSPWDataset'
data_root = '/data1/VSPW_Dataset'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
crop_size = (480, 853)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(type='Resize', img_scale=(853, 480), ratio_range=(0.5, 2.0)),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_semantic_seg']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(853, 480),
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
type=dataset_type,
data_root=data_root,
split='/data1/VSPW_Dataset/train.txt',
reduce_zero_label = True,
pipeline=train_pipeline),
val=dict(
type=dataset_type,
data_root=data_root,
split='/data1/VSPW_Dataset/val.txt',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
data_root=data_root,
split = '/data1/VSPW_Dataset/test.txt',
pipeline=test_pipeline))
base = [
'../base/models/upernet_focalnet.py', '../base/datasets/vspw.py',
'../base/default_runtime.py', '../base/schedules/schedule_160k.py'
]
model = dict(
backbone=dict(
type='FocalNet',
embed_dim=96,
depths=[2, 2, 6, 2],
drop_path_rate=0.3,
patch_norm=True,
use_checkpoint=False,
focal_windows=[9, 9, 9, 9],
focal_levels=[2, 2, 2, 2],
),
decode_head=dict(
in_channels=[96, 192, 384, 768],
num_classes=124
),
auxiliary_head=dict(
in_channels=384,
num_classes=124
))
AdamW optimizer, no weight decay for position embedding & layer norm in backbone
optimizer = dict(delete=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01,
paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.),
'relative_position_bias_table': dict(decay_mult=0.),
'norm': dict(decay_mult=0.)}))
lr_config = dict(delete=True, policy='poly',
warmup='linear',
warmup_iters=1500,
warmup_ratio=1e-6,
power=1.0, min_lr=0.0, by_epoch=False)
By default, models are trained on 8 GPUs with 2 images per GPU
data=dict(samples_per_gpu=2)
Beta Was this translation helpful? Give feedback.
All reactions