main.py 8.8 KB
Newer Older
D
dengkaipeng 已提交
1
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Y
Yang Zhang 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import division
Y
Yang Zhang 已提交
16
from __future__ import print_function
Y
Yang Zhang 已提交
17 18 19 20 21 22 23

import argparse
import contextlib
import os

import numpy as np

D
dengkaipeng 已提交
24 25
from paddle import fluid
from paddle.fluid.optimizer import Momentum
D
dengkaipeng 已提交
26
from paddle.io import DataLoader
Y
Yang Zhang 已提交
27

28 29
from hapi.model import Model, Input, set_device
from hapi.distributed import DistributedBatchSampler
D
dengkaipeng 已提交
30
from hapi.download import is_url, get_weights_path
31 32 33
from hapi.datasets import COCODataset
from hapi.vision.transforms import *
from hapi.vision.models import yolov3_darknet53, YoloLoss
D
dengkaipeng 已提交
34

D
dengkaipeng 已提交
35
from coco_metric import COCOMetric
D
dengkaipeng 已提交
36

D
dengkaipeng 已提交
37
NUM_MAX_BOXES = 50
Y
Yang Zhang 已提交
38 39


D
dengkaipeng 已提交
40
def make_optimizer(step_per_epoch, parameter_list=None):
Y
Yang Zhang 已提交
41
    base_lr = FLAGS.lr
D
dengkaipeng 已提交
42
    warm_up_iter = 1000
Y
Yang Zhang 已提交
43 44
    momentum = 0.9
    weight_decay = 5e-4
D
dengkaipeng 已提交
45
    boundaries = [step_per_epoch * e for e in [200, 250]]
Y
Yang Zhang 已提交
46
    values = [base_lr * (0.1 ** i) for i in range(len(boundaries) + 1)]
Y
Yang Zhang 已提交
47
    learning_rate = fluid.layers.piecewise_decay(
Y
Yang Zhang 已提交
48 49
        boundaries=boundaries,
        values=values)
Y
Yang Zhang 已提交
50 51
    learning_rate = fluid.layers.linear_lr_warmup(
        learning_rate=learning_rate,
Y
Yang Zhang 已提交
52 53 54 55
        warmup_steps=warm_up_iter,
        start_lr=0.0,
        end_lr=base_lr)
    optimizer = fluid.optimizer.Momentum(
Y
Yang Zhang 已提交
56
        learning_rate=learning_rate,
Y
Yang Zhang 已提交
57 58 59 60 61 62 63
        regularization=fluid.regularizer.L2Decay(weight_decay),
        momentum=momentum,
        parameter_list=parameter_list)
    return optimizer


def main():
D
dengkaipeng 已提交
64 65 66
    device = set_device(FLAGS.device)
    fluid.enable_dygraph(device) if FLAGS.dynamic else None
    
D
dengkaipeng 已提交
67 68
    inputs = [Input([None, 1], 'int64', name='img_id'),
              Input([None, 2], 'int32', name='img_shape'),
D
dengkaipeng 已提交
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110
              Input([None, 3, None, None], 'float32', name='image')]
    labels = [Input([None, NUM_MAX_BOXES, 4], 'float32', name='gt_bbox'),
	      Input([None, NUM_MAX_BOXES], 'int32', name='gt_label'),
	      Input([None, NUM_MAX_BOXES], 'float32', name='gt_score')]

    if not FLAGS.eval_only: # training mode
        train_transform = Compose([ColorDistort(),
                                   RandomExpand(),
                                   RandomCrop(),
                                   RandomFlip(),
                                   NormalizeBox(),
                                   PadBox(),
                                   BboxXYXY2XYWH()])
        train_collate_fn = BatchCompose([RandomShape(), NormalizeImage()])
        dataset = COCODataset(dataset_dir=FLAGS.data,
                              anno_path='annotations/instances_train2017.json',
                              image_dir='train2017',
                              with_background=False,
                              mixup=True,
                              transform=train_transform)
        batch_sampler = DistributedBatchSampler(dataset,
                                                batch_size=FLAGS.batch_size,
                                                shuffle=True,
                                                drop_last=True)
        loader = DataLoader(dataset,
                            batch_sampler=batch_sampler,
                            places=device,
                            num_workers=FLAGS.num_workers,
                            return_list=True,
                            collate_fn=train_collate_fn)
    else: # evaluation mode
        eval_transform = Compose([ResizeImage(target_size=608),
                                  NormalizeBox(),
                                  PadBox(),
                                  BboxXYXY2XYWH()])
        eval_collate_fn = BatchCompose([NormalizeImage()])
        dataset = COCODataset(dataset_dir=FLAGS.data,
                              anno_path='annotations/instances_val2017.json',
                              image_dir='val2017',
                              with_background=False,
                              transform=eval_transform)
        # batch_size can only be 1 in evaluation for YOLOv3
D
dengkaipeng 已提交
111
        # prediction bbox is a LoDTensor
D
dengkaipeng 已提交
112 113 114 115 116 117 118 119 120 121 122
        batch_sampler = DistributedBatchSampler(dataset,
                                                batch_size=1,
                                                shuffle=False,
                                                drop_last=False)
        loader = DataLoader(dataset,
                            batch_sampler=batch_sampler,
                            places=device,
                            num_workers=FLAGS.num_workers,
                            return_list=True,
                            collate_fn=eval_collate_fn)

D
dengkaipeng 已提交
123 124 125 126 127
    pretrained = FLAGS.eval_only and FLAGS.weights is None
    model = yolov3_darknet53(num_classes=dataset.num_classes,
                   model_mode='eval' if FLAGS.eval_only else 'train',
                   pretrained=pretrained)

D
dengkaipeng 已提交
128
    if FLAGS.pretrain_weights and not FLAGS.eval_only:
D
dengkaipeng 已提交
129 130 131 132
        pretrain_weights = FLAGS.pretrain_weights
        if is_url(pretrain_weights):
            pretrain_weights = get_weights_path(pretrain_weights)
        model.load(pretrain_weights, skip_mismatch=True, reset_optimizer=True)
D
dengkaipeng 已提交
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150

    optim = make_optimizer(len(batch_sampler), parameter_list=model.parameters())

    model.prepare(optim,
                  YoloLoss(num_classes=dataset.num_classes),
                  inputs=inputs, labels=labels,
                  device=FLAGS.device)

    # NOTE: we implement COCO metric of YOLOv3 model here, separately
    # from 'prepare' and 'fit' framework for follwing reason:
    # 1. YOLOv3 network structure is different between 'train' and
    # 'eval' mode, in 'eval' mode, output prediction bbox is not the
    # feature map used for YoloLoss calculating
    # 2. COCO metric behavior is also different from defined Metric
    # for COCO metric should not perform accumulate in each iteration
    # but only accumulate at the end of an epoch
    if FLAGS.eval_only:
        if FLAGS.weights is not None:
D
dengkaipeng 已提交
151
            model.load(FLAGS.weights, reset_optimizer=True)
D
dengkaipeng 已提交
152
        preds = model.predict(loader, stack_outputs=False)
D
dengkaipeng 已提交
153
        _, _, _, img_ids, bboxes = preds
Y
Yang Zhang 已提交
154

D
dengkaipeng 已提交
155 156 157 158 159 160 161
        anno_path = os.path.join(FLAGS.data, 'annotations/instances_val2017.json')
        coco_metric = COCOMetric(anno_path=anno_path, with_background=False)
        for img_id, bbox in zip(img_ids, bboxes):
            coco_metric.update(img_id, bbox)
        coco_metric.accumulate()
        coco_metric.reset()
        return
Y
Yang Zhang 已提交
162

D
dengkaipeng 已提交
163 164
    if FLAGS.resume is not None:
        model.load(FLAGS.resume)
Y
Yang Zhang 已提交
165

D
dengkaipeng 已提交
166 167 168 169
    model.fit(train_data=loader,
              epochs=FLAGS.epoch - FLAGS.no_mixup_epoch,
              save_dir="yolo_checkpoint/mixup",
              save_freq=10)
Y
Yang Zhang 已提交
170

D
dengkaipeng 已提交
171 172 173 174 175 176
    # do not use image mixup transfrom in laste FLAGS.no_mixup_epoch epoches
    dataset.mixup = False
    model.fit(train_data=loader,
              epochs=FLAGS.no_mixup_epoch,
              save_dir="yolo_checkpoint/no_mixup",
              save_freq=5)
Y
Yang Zhang 已提交
177 178 179


if __name__ == '__main__':
D
dengkaipeng 已提交
180 181 182 183
    parser = argparse.ArgumentParser("Yolov3 Training on VOC")
    parser.add_argument(
        "--data", type=str, default='dataset/voc',
        help="path to dataset directory")
D
dengkaipeng 已提交
184 185
    parser.add_argument(
        "--device", type=str, default='gpu', help="device to use, gpu or cpu")
Y
Yang Zhang 已提交
186 187
    parser.add_argument(
        "-d", "--dynamic", action='store_true', help="enable dygraph mode")
D
dengkaipeng 已提交
188 189
    parser.add_argument(
        "--eval_only", action='store_true', help="run evaluation only")
Y
Yang Zhang 已提交
190 191
    parser.add_argument(
        "-e", "--epoch", default=300, type=int, help="number of epoch")
D
dengkaipeng 已提交
192 193 194
    parser.add_argument(
        "--no_mixup_epoch", default=30, type=int,
        help="number of the last N epoch without image mixup")
Y
Yang Zhang 已提交
195
    parser.add_argument(
Y
Yang Zhang 已提交
196 197
        '--lr', '--learning-rate', default=0.001, type=float, metavar='LR',
        help='initial learning rate')
Y
Yang Zhang 已提交
198
    parser.add_argument(
D
dengkaipeng 已提交
199 200 201
        "-b", "--batch_size", default=8, type=int, help="batch size")
    parser.add_argument(
        "-j", "--num_workers", default=4, type=int, help="reader worker number")
Y
Yang Zhang 已提交
202
    parser.add_argument(
D
dengkaipeng 已提交
203 204
        "-p", "--pretrain_weights",
        default="./pretrain_weights/darknet53_pretrained", type=str,
Y
Yang Zhang 已提交
205
        help="path to pretrained weights")
D
dengkaipeng 已提交
206
    parser.add_argument(
D
dengkaipeng 已提交
207
        "-r", "--resume", default=None, type=str,
D
dengkaipeng 已提交
208
        help="path to model weights")
D
dengkaipeng 已提交
209 210 211
    parser.add_argument(
        "-w", "--weights", default=None, type=str,
        help="path to weights for evaluation")
Y
Yang Zhang 已提交
212
    FLAGS = parser.parse_args()
Y
Yang Zhang 已提交
213
    assert FLAGS.data, "error: must provide data path"
Y
Yang Zhang 已提交
214
    main()