Could you share the training log and evaluation script？

Question

Could you share the training log and evaluation script？

howardchina opened this issue 3 years ago · 0 comments

howardchina commented 3 years ago

I have trained TSD on the Docker.

baseline: tsd faster rcnn:
dataset: stanford cars.

envs:

cuda=10.1
pytorch=1.3.0
mmcv=0.4.3

My training log is as below, I might get a promising model.

{"mode": "train", "epoch": 12, "iter": 800, "lr": 0.0002, "time": 0.65021, "data_time": 0.00734, "memory": 3249, "loss_rpn_cls": 0.00914, "loss_rpn_bbox": 0.00379, "loss_cls": 0.16967, "acc": 97.57617, "loss_TSD_cls": 0.16994, "TSD_acc": 97.57617, "loss_bbox": 0.04268, "loss_TSD_bbox": 0.04255, "loss_pc_cls": 0.01316, "loss_pc_loc": 0.22021, "loss": 0.67113} {"mode": "train", "epoch": 12, "iter": 850, "lr": 0.0002, "time": 0.65292, "data_time": 0.00706, "memory": 3249, "loss_rpn_cls": 0.00864, "loss_rpn_bbox": 0.00349, "loss_cls": 0.1724, "acc": 97.51123, "loss_TSD_cls": 0.17322, "TSD_acc": 97.51123, "loss_bbox": 0.04279, "loss_TSD_bbox": 0.04291, "loss_pc_cls": 0.01329, "loss_pc_loc": 0.22873, "loss": 0.68547} {"mode": "train", "epoch": 12, "iter": 900, "lr": 0.0002, "time": 0.64935, "data_time": 0.00684, "memory": 3249, "loss_rpn_cls": 0.00942, "loss_rpn_bbox": 0.00344, "loss_cls": 0.16686, "acc": 97.57764, "loss_TSD_cls": 0.16697, "TSD_acc": 97.57764, "loss_bbox": 0.04192, "loss_TSD_bbox": 0.04198, "loss_pc_cls": 0.01312, "loss_pc_loc": 0.2191, "loss": 0.6628} {"mode": "train", "epoch": 12, "iter": 950, "lr": 0.0002, "time": 0.65193, "data_time": 0.00701, "memory": 3249, "loss_rpn_cls": 0.00878, "loss_rpn_bbox": 0.00356, "loss_cls": 0.17192, "acc": 97.50977, "loss_TSD_cls": 0.17272, "TSD_acc": 97.50977, "loss_bbox": 0.04384, "loss_TSD_bbox": 0.04405, "loss_pc_cls": 0.01319, "loss_pc_loc": 0.22383, "loss": 0.68188} {"mode": "train", "epoch": 12, "iter": 1000, "lr": 0.0002, "time": 0.64752, "data_time": 0.00715, "memory": 3249, "loss_rpn_cls": 0.00823, "loss_rpn_bbox": 0.00349, "loss_cls": 0.17404, "acc": 97.48682, "loss_TSD_cls": 0.17474, "TSD_acc": 97.48682, "loss_bbox": 0.04349, "loss_TSD_bbox": 0.04376, "loss_pc_cls": 0.01332, "loss_pc_loc": 0.21945, "loss": 0.68052}

But I can't test the model using the given command:

./tools/dist_test.sh configs/stanford_cars/faster_rcnn_r50_fpn_TSD_1x_stanford_cars.py work_dirs/faster_rcnn_r50_fpn_TSD_1x_stanford_cars/latest.pth 4 --eval bbox

the output is frastrastrating:

Evaluating bbox... Loading and preparing results... The testing results of the whole dataset is empty.

Could you share your training log and evaluation script here?
How to evaluate the model after each epoch? (I already opened evaluation and set interval=1 on my config)

here is my config:

# model settings
model = dict(
    type="FasterRCNN",
    pretrained="torchvision://resnet50",
    backbone=dict(
        type="ResNet",
        depth=50,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
        norm_cfg=dict(type="BN", requires_grad=True),
        style="pytorch",
    ),
    neck=dict(
        type="FPN", in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5
    ),
    rpn_head=dict(
        type="RPNHead",
        in_channels=256,
        feat_channels=256,
        anchor_scales=[8],
        anchor_ratios=[0.5, 1.0, 2.0],
        anchor_strides=[4, 8, 16, 32, 64],
        target_means=[0.0, 0.0, 0.0, 0.0],
        target_stds=[1.0, 1.0, 1.0, 1.0],
        loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0),
        loss_bbox=dict(type="SmoothL1Loss", beta=1.0 / 9.0, loss_weight=1.0),
    ),
    bbox_roi_extractor=dict(
        type="SingleRoIExtractor",
        roi_layer=dict(type="RoIAlign", out_size=7, sample_num=2),
        out_channels=256,
        featmap_strides=[4, 8, 16, 32],
    ),
    bbox_head=dict(
        type="TSDSharedFCBBoxHead",
        featmap_strides=[4, 8, 16, 32],
        num_fcs=2,
        in_channels=256,
        fc_out_channels=1024,
        roi_feat_size=7,
        num_classes=197,# fg + bg = 196 + 1
        cls_pc_margin=0.3,
        loc_pc_margin=0.3,
        target_means=[0.0, 0.0, 0.0, 0.0],
        target_stds=[0.1, 0.1, 0.2, 0.2],
        reg_class_agnostic=False,
        loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0),
        loss_bbox=dict(type="SmoothL1Loss", beta=1.0, loss_weight=1.0),
    ),
)
# model training and testing settings
train_cfg = dict(
    rpn=dict(
        assigner=dict(
            type="MaxIoUAssigner",
            pos_iou_thr=0.7,
            neg_iou_thr=0.3,
            min_pos_iou=0.3,
            ignore_iof_thr=-1,
        ),
        sampler=dict(
            type="RandomSampler",
            num=256,
            pos_fraction=0.5,
            neg_pos_ub=-1,
            add_gt_as_proposals=False,
        ),
        allowed_border=0,
        pos_weight=-1,
        debug=False,
    ),
    rpn_proposal=dict(
        nms_across_levels=False,
        nms_pre=2000,
        nms_post=2000,
        max_num=2000,
        nms_thr=0.7,
        min_bbox_size=0,
    ),
    rcnn=dict(
        assigner=dict(
            type="MaxIoUAssigner",
            pos_iou_thr=0.5,
            neg_iou_thr=0.5,
            min_pos_iou=0.5,
            ignore_iof_thr=-1,
        ),
        sampler=dict(
            type="RandomSampler",
            num=512,
            pos_fraction=0.25,
            neg_pos_ub=-1,
            add_gt_as_proposals=True,
        ),
        pos_weight=-1,
        debug=False,
    ),
)
test_cfg = dict(
    rpn=dict(
        nms_across_levels=False,
        nms_pre=1000,
        nms_post=1000,
        max_num=1000,
        nms_thr=0.7,
        min_bbox_size=0,
    ),
    rcnn=dict(score_thr=0.05, nms=dict(type="nms", iou_thr=0.5), max_per_img=1)
    # soft-nms is also supported for rcnn testing
    # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = "StanfordcarsDataset"
data_root = 'data/stanford_car'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True
)
train_pipeline = [
    dict(type="LoadImageFromFile"),
    dict(type="LoadAnnotations", with_bbox=True),
    dict(type="Resize", img_scale=(1333, 800), keep_ratio=True),
    dict(type="RandomFlip", flip_ratio=0.5),
    dict(type="Normalize", **img_norm_cfg),
    dict(type="Pad", size_divisor=32),
    dict(type="DefaultFormatBundle"),
    dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
]
test_pipeline = [
    dict(type="LoadImageFromFile"),
    dict(
        type="MultiScaleFlipAug",
        img_scale=(1333, 800),
        flip=False,
        transforms=[
            dict(type="Resize", keep_ratio=True),
            dict(type="RandomFlip"),
            dict(type="Normalize", **img_norm_cfg),
            dict(type="Pad", size_divisor=32),
            dict(type="ImageToTensor", keys=["img"]),
            dict(type="Collect", keys=["img"]),
        ],
    ),
]
data = dict(
    imgs_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
        ann_file=data_root+'/annotations/train.json',
        img_prefix=data_root+'/cars_train/',
        pipeline=train_pipeline),
    val=dict(
        type=dataset_type,
        ann_file=data_root+'/annotations/test.json',
        img_prefix=data_root+'/cars_test/',
        pipeline=test_pipeline),
    test=dict(
        type=dataset_type,
        ann_file=data_root+'/annotations/test.json',
        img_prefix=data_root+'/cars_test/',
        pipeline=test_pipeline))

evaluation = dict(interval=1, metric="bbox")

# optimizer
optimizer = dict(type="SGD", lr=0.02, momentum=0.9, weight_decay=0.0001)
# optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
    policy="step",
    warmup="linear",
    warmup_iters=500,
    warmup_ratio=0.001,
    step=[8, 11],
)

checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
    interval=50,
    hooks=[
        dict(type="TextLoggerHook"),
        # dict(type='TensorboardLoggerHook')
    ],
)
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend="nccl")
log_level = "INFO"
work_dir = "./work_dirs/faster_rcnn_r50_fpn_TSD_1x_stanford_cars"
load_from = None
resume_from = None
workflow = [("train", 1)]

Hope to get reply from you soon, thx.