使用简单的自定义数据集训练模型出现IoU突变的问题 #2810
Unanswered
zhongqianli
asked this question in
General
Replies: 2 comments
-
We recommend using English or English & Chinese for issues so that we could have broader discussion. |
Beta Was this translation helpful? Give feedback.
0 replies
-
Hi @zhongqianli , I am new to mmsegmentation and don't know the annotation format. Thanks in advance. |
Beta Was this translation helpful? Give feedback.
0 replies
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
-
训练集7张图像,验证集使用训练集的图像,两个类别,背景和前景。
尝试用UNet训练模型,batch size为4,图像resize为512x512,图像值范围由[0, 255]转变为[0, 1]。epoch1282之前,前景的IoU都是0,epoch1281之后,前景的IoU才达到期望值。同样的数据集,使用PaddleSeg训练模型,达到同样的mIoU,PaddleSeg只需要一百左右的epoch。
=================
结果:
{"mode": "train", "epoch": 1281, "iter": 1282, "lr": 0.00099, "memory": 18236, "data_time": 4.4738, "decode.loss_ce": 0.01375, "decode.acc_seg": 98.41127, "loss": 0.01375, "time": 4.55988}
{"mode": "val", "epoch": 1281, "iter": 7, "lr": 0.00099, "aAcc": 0.9833, "mIoU": 0.4916, "mAcc": 0.5, "IoU.background": 0.9833, "IoU.light": 0.0, "Acc.background": 1.0, "Acc.light": 0.0}
{"mode": "train", "epoch": 1282, "iter": 1283, "lr": 0.00099, "memory": 18236, "data_time": 4.37041, "decode.loss_ce": 0.01309, "decode.acc_seg": 98.53649, "loss": 0.01309, "time": 4.45932}
{"mode": "val", "epoch": 1282, "iter": 7, "lr": 0.00099, "aAcc": 0.9977, "mIoU": 0.9319, "mAcc": 0.9427, "IoU.background": 0.9977, "IoU.light": 0.8662, "Acc.background": 0.9996, "Acc.light": 0.8857}
================
配置:
norm_cfg = dict(type='SyncBN', requires_grad=True)
model = dict(
type='EncoderDecoder',
pretrained=None,
backbone=dict(
type='UNet',
in_channels=3,
base_channels=32,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
dec_num_convs=(2, 2, 2, 2),
downsamples=(True, True, True, True),
enc_dilations=(1, 1, 1, 1, 1),
dec_dilations=(1, 1, 1, 1),
with_cp=False,
conv_cfg=None,
norm_cfg=dict(type='SyncBN', requires_grad=True),
act_cfg=dict(type='ReLU'),
upsample_cfg=dict(type='InterpConv'),
norm_eval=False),
decode_head=dict(
type='FCNHead',
in_channels=32,
in_index=4,
channels=32,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=2,
norm_cfg=dict(type='SyncBN', requires_grad=True),
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=None,
train_cfg=dict(),
test_cfg=dict(mode='whole'))
dataset_type = 'LightDataset'
data_root = 'data/light'
img_norm_cfg = dict(
mean=[0.0, 0.0, 0.0], std=[255.0, 255.0, 255.0], to_rgb=True)
img_scale = (512, 512)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='Resize', img_scale=(512, 512), keep_ratio=False),
dict(type='RandomFlip', prob=0.5),
dict(
type='Normalize',
mean=[0.0, 0.0, 0.0],
std=[255.0, 255.0, 255.0],
to_rgb=True),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(512, 512),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=False),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[0.0, 0.0, 0.0],
std=[255.0, 255.0, 255.0],
to_rgb=True),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=2,
train=dict(
type='LightDataset',
data_root='data/light',
img_dir='img_dir/train',
ann_dir='ann_dir/train',
pipeline=[
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='Resize', img_scale=(512, 512), keep_ratio=False),
dict(type='RandomFlip', prob=0.5),
dict(
type='Normalize',
mean=[0.0, 0.0, 0.0],
std=[255.0, 255.0, 255.0],
to_rgb=True),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
]),
val=dict(
type='LightDataset',
data_root='data/light',
img_dir='img_dir/val',
ann_dir='ann_dir/val',
pipeline=[
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(512, 512),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=False),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[0.0, 0.0, 0.0],
std=[255.0, 255.0, 255.0],
to_rgb=True),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]),
test=dict(
type='LightDataset',
data_root='data/light',
img_dir='img_dir/val',
ann_dir='ann_dir/val',
pipeline=[
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(512, 512),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=False),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[0.0, 0.0, 0.0],
std=[255.0, 255.0, 255.0],
to_rgb=True),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]))
log_config = dict(
interval=1, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
cudnn_benchmark = True
optimizer = dict(type='Adam', lr=0.001, eps=1e-08, weight_decay=0.0005)
optimizer_config = dict()
lr_config = dict(policy='poly', power=0.9, min_lr=0.0001, by_epoch=False)
runner = dict(type='IterBasedRunner', max_iters=160000)
checkpoint_config = dict(by_epoch=False, interval=16000)
evaluation = dict(interval=1, metric='mIoU', pre_eval=True, save_best='auto')
work_dir = './work_dirs/light_fcn_unet_s5-d16_4x4_512x1024_160k'
gpu_ids = [0]
auto_resume = False
Beta Was this translation helpful? Give feedback.
All reactions