使用简单的自定义数据集训练模型出现IoU突变的问题 #2810
Unanswered
zhongqianli
asked this question in
General
Replies: 2 comments
-
We recommend using English or English & Chinese for issues so that we could have broader discussion. |
Beta Was this translation helpful? Give feedback.
0 replies
-
Hi @zhongqianli , I am new to mmsegmentation and don't know the annotation format. Thanks in advance. |
Beta Was this translation helpful? Give feedback.
0 replies
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.
-
训练集7张图像,验证集使用训练集的图像,两个类别,背景和前景。
尝试用UNet训练模型,batch size为4,图像resize为512x512,图像值范围由[0, 255]转变为[0, 1]。epoch1282之前,前景的IoU都是0,epoch1281之后,前景的IoU才达到期望值。同样的数据集,使用PaddleSeg训练模型,达到同样的mIoU,PaddleSeg只需要一百左右的epoch。
=================
结果:
{"mode": "train", "epoch": 1281, "iter": 1282, "lr": 0.00099, "memory": 18236, "data_time": 4.4738, "decode.loss_ce": 0.01375, "decode.acc_seg": 98.41127, "loss": 0.01375, "time": 4.55988}
{"mode": "val", "epoch": 1281, "iter": 7, "lr": 0.00099, "aAcc": 0.9833, "mIoU": 0.4916, "mAcc": 0.5, "IoU.background": 0.9833, "IoU.light": 0.0, "Acc.background": 1.0, "Acc.light": 0.0}
{"mode": "train", "epoch": 1282, "iter": 1283, "lr": 0.00099, "memory": 18236, "data_time": 4.37041, "decode.loss_ce": 0.01309, "decode.acc_seg": 98.53649, "loss": 0.01309, "time": 4.45932}
{"mode": "val", "epoch": 1282, "iter": 7, "lr": 0.00099, "aAcc": 0.9977, "mIoU": 0.9319, "mAcc": 0.9427, "IoU.background": 0.9977, "IoU.light": 0.8662, "Acc.background": 0.9996, "Acc.light": 0.8857}
================
配置:
norm_cfg = dict(type='SyncBN', requires_grad=True)
model = dict(
type='EncoderDecoder',
pretrained=None,
backbone=dict(
type='UNet',
in_channels=3,
base_channels=32,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
dec_num_convs=(2, 2, 2, 2),
downsamples=(True, True, True, True),
enc_dilations=(1, 1, 1, 1, 1),
dec_dilations=(1, 1, 1, 1),
with_cp=False,
conv_cfg=None,
norm_cfg=dict(type='SyncBN', requires_grad=True),
act_cfg=dict(type='ReLU'),
upsample_cfg=dict(type='InterpConv'),
norm_eval=False),
decode_head=dict(
type='FCNHead',
in_channels=32,
in_index=4,
channels=32,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=2,
norm_cfg=dict(type='SyncBN', requires_grad=True),
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=None,
train_cfg=dict(),
test_cfg=dict(mode='whole'))
dataset_type = 'LightDataset'
data_root = 'data/light'
img_norm_cfg = dict(
mean=[0.0, 0.0, 0.0], std=[255.0, 255.0, 255.0], to_rgb=True)
img_scale = (512, 512)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='Resize', img_scale=(512, 512), keep_ratio=False),
dict(type='RandomFlip', prob=0.5),
dict(
type='Normalize',
mean=[0.0, 0.0, 0.0],
std=[255.0, 255.0, 255.0],
to_rgb=True),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(512, 512),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=False),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[0.0, 0.0, 0.0],
std=[255.0, 255.0, 255.0],
to_rgb=True),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=2,
train=dict(
type='LightDataset',
data_root='data/light',
img_dir='img_dir/train',
ann_dir='ann_dir/train',
pipeline=[
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='Resize', img_scale=(512, 512), keep_ratio=False),
dict(type='RandomFlip', prob=0.5),
dict(
type='Normalize',
mean=[0.0, 0.0, 0.0],
std=[255.0, 255.0, 255.0],
to_rgb=True),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
]),
val=dict(
type='LightDataset',
data_root='data/light',
img_dir='img_dir/val',
ann_dir='ann_dir/val',
pipeline=[
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(512, 512),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=False),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[0.0, 0.0, 0.0],
std=[255.0, 255.0, 255.0],
to_rgb=True),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]),
test=dict(
type='LightDataset',
data_root='data/light',
img_dir='img_dir/val',
ann_dir='ann_dir/val',
pipeline=[
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(512, 512),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=False),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[0.0, 0.0, 0.0],
std=[255.0, 255.0, 255.0],
to_rgb=True),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]))
log_config = dict(
interval=1, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
cudnn_benchmark = True
optimizer = dict(type='Adam', lr=0.001, eps=1e-08, weight_decay=0.0005)
optimizer_config = dict()
lr_config = dict(policy='poly', power=0.9, min_lr=0.0001, by_epoch=False)
runner = dict(type='IterBasedRunner', max_iters=160000)
checkpoint_config = dict(by_epoch=False, interval=16000)
evaluation = dict(interval=1, metric='mIoU', pre_eval=True, save_best='auto')
work_dir = './work_dirs/light_fcn_unet_s5-d16_4x4_512x1024_160k'
gpu_ids = [0]
auto_resume = False
Beta Was this translation helpful? Give feedback.
All reactions