for caffe

ef0fa051 · chenyuntc · 14d409ee · ef0fa051 · ef0fa051 · ef0fa051
Showing with 26 addition and 33 deletion

data/dataset.py data/dataset.py +7 -13

model/faster_rcnn_vgg16.py model/faster_rcnn_vgg16.py +2 -1

model/utils/rpn_tools.py model/utils/rpn_tools.py +0 -2

train.py train.py +17 -17

未找到文件。
--- a/data/dataset.py
+++ b/data/dataset.py
@@ -4,6 +4,7 @@ from skimage import transform as sktsf
 from torchvision import transforms as tvtsf
 from . import util
 from util import array_tool as at
+ 


 def preprocess(img, min_size=600, max_size=1000):
@@ -32,20 +33,13 @@ def preprocess(img, min_size=600, max_size=1000):
    scale = min(scale1, scale2)
    # both the longer and shorter should be less than
    # max_size and min_size
-    img = img / 256.
+    img = img / 255.
+    img = img[[2,1,0],:,:] #RGB-BGR
    img = sktsf.resize(img, (C, H * scale, W * scale), mode='reflect')
-    normalize = tvtsf.Normalize(mean=[0.485, 0.456, 0.406],
-                                std=[0.229, 0.224, 0.225])
-
-    img = normalize(t.from_numpy(img))
-    return img.numpy()
-    # unNOTE: original implementation in chainer:
-    # mean=np.array([122.7717, 115.9465, 102.9801],
-    # img = (img - self.mean).astype(np.float32, copy=False)
-    # Answer: https://github.com/pytorch/vision/issues/223
-    # the input of vgg16 in pytorch:
-    # rgb 0 to 1, instead of bgr 0 to 255
-
+    img = img*255
+    mean=np.array([122.7717, 115.9465, 102.9801]).reshape(3,1,1)
+    img = (img - mean).astype(np.float32, copy=True)
+    return img

 class Transform(object):


--- a/model/faster_rcnn_vgg16.py
+++ b/model/faster_rcnn_vgg16.py
@@ -12,7 +12,8 @@ from config import opt

 def decom_vgg16(pretrained=True):
    # the 30th layer of features is relu of conv5_3
-    model = vgg16(pretrained)
+    model = vgg16(pretrained=False)
+    model.load_state_dict('/home/a/code/pytorch/faster-rcnn/pytorch-faster-rcnn/data/imagenet_weights/vgg16.pth')
    features = list(model.features)[:30]
    classifier = model.classifier


--- a/model/utils/rpn_tools.py
+++ b/model/utils/rpn_tools.py
@@ -54,7 +54,6 @@ class ProposalCreator():
                 n_train_post_nms=2000,
                 n_test_pre_nms=6000,
                 n_test_post_nms=300,
-                 force_cpu_nms=False,
                 min_size=16
                 ):
        self.parent_model = parent_model
@@ -63,7 +62,6 @@ class ProposalCreator():
        self.n_train_post_nms = n_train_post_nms
        self.n_test_pre_nms = n_test_pre_nms
        self.n_test_post_nms = n_test_post_nms
-        self.force_cpu_nms = force_cpu_nms
        self.min_size = min_size

    def __call__(self, loc, score,

--- a/train.py
+++ b/train.py
@@ -62,7 +62,7 @@ def train(**kwargs):
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)

-    trainer.optimizer = trainer.faster_rcnn.get_great_optimizer()
+    # trainer.optimizer = trainer.faster_rcnn.get_great_optimizer()
    trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    for epoch in range(opt.epoch):
@@ -80,20 +80,20 @@ def train(**kwargs):
                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

-                # plot groud truth bboxes
-                ori_img_ = (img * 0.225 + 0.45).clamp(min=0, max=1) * 255
-                gt_img = visdom_bbox(at.tonumpy(ori_img_)[0], 
-                                    at.tonumpy(bbox_)[0], 
-                                    label_[0].numpy())
-                trainer.vis.img('gt_img', gt_img)
-
-                # plot predicti bboxes
-                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(ori_img,visualize=True)
-                pred_img = visdom_bbox( at.tonumpy(ori_img[0]), 
-                                        at.tonumpy(_bboxes[0]),
-                                        at.tonumpy(_labels[0]).reshape(-1), 
-                                        at.tonumpy(_scores[0]))
-                trainer.vis.img('pred_img', pred_img)
+                # # plot groud truth bboxes
+                # ori_img_ = (img * 0.225 + 0.45).clamp(min=0, max=1) * 255
+                # gt_img = visdom_bbox(at.tonumpy(ori_img_)[0], 
+                #                     at.tonumpy(bbox_)[0], 
+                #                     label_[0].numpy())
+                # trainer.vis.img('gt_img', gt_img)
+
+                # # plot predicti bboxes
+                # _bboxes, _labels, _scores = trainer.faster_rcnn.predict(ori_img,visualize=True)
+                # pred_img = visdom_bbox( at.tonumpy(ori_img[0]), 
+                #                         at.tonumpy(_bboxes[0]),
+                #                         at.tonumpy(_labels[0]).reshape(-1), 
+                #                         at.tonumpy(_scores[0]))
+                # trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
@@ -110,8 +110,8 @@ def train(**kwargs):
        if epoch==8:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
-        if epoch ==0:
-            trainer.optimizer = trainer.faster_rcnn.get_optimizer()
+        # if epoch ==0:
+        #     trainer.optimizer = trainer.faster_rcnn.get_optimizer()

        trainer.vis.plot('test_map', eval_result['map'])
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']