提交 ef0fa051 编写于 作者: C chenyuntc

for caffe

上级 14d409ee
......@@ -4,6 +4,7 @@ from skimage import transform as sktsf
from torchvision import transforms as tvtsf
from . import util
from util import array_tool as at
def preprocess(img, min_size=600, max_size=1000):
......@@ -32,20 +33,13 @@ def preprocess(img, min_size=600, max_size=1000):
scale = min(scale1, scale2)
# both the longer and shorter should be less than
# max_size and min_size
img = img / 256.
img = img / 255.
img = img[[2,1,0],:,:] #RGB-BGR
img = sktsf.resize(img, (C, H * scale, W * scale), mode='reflect')
normalize = tvtsf.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
img = normalize(t.from_numpy(img))
return img.numpy()
# unNOTE: original implementation in chainer:
# mean=np.array([122.7717, 115.9465, 102.9801],
# img = (img - self.mean).astype(np.float32, copy=False)
# Answer: https://github.com/pytorch/vision/issues/223
# the input of vgg16 in pytorch:
# rgb 0 to 1, instead of bgr 0 to 255
img = img*255
mean=np.array([122.7717, 115.9465, 102.9801]).reshape(3,1,1)
img = (img - mean).astype(np.float32, copy=True)
return img
class Transform(object):
......
......@@ -12,7 +12,8 @@ from config import opt
def decom_vgg16(pretrained=True):
# the 30th layer of features is relu of conv5_3
model = vgg16(pretrained)
model = vgg16(pretrained=False)
model.load_state_dict('/home/a/code/pytorch/faster-rcnn/pytorch-faster-rcnn/data/imagenet_weights/vgg16.pth')
features = list(model.features)[:30]
classifier = model.classifier
......
......@@ -54,7 +54,6 @@ class ProposalCreator():
n_train_post_nms=2000,
n_test_pre_nms=6000,
n_test_post_nms=300,
force_cpu_nms=False,
min_size=16
):
self.parent_model = parent_model
......@@ -63,7 +62,6 @@ class ProposalCreator():
self.n_train_post_nms = n_train_post_nms
self.n_test_pre_nms = n_test_pre_nms
self.n_test_post_nms = n_test_post_nms
self.force_cpu_nms = force_cpu_nms
self.min_size = min_size
def __call__(self, loc, score,
......
......@@ -62,7 +62,7 @@ def train(**kwargs):
trainer.load(opt.load_path)
print('load pretrained model from %s' % opt.load_path)
trainer.optimizer = trainer.faster_rcnn.get_great_optimizer()
# trainer.optimizer = trainer.faster_rcnn.get_great_optimizer()
trainer.vis.text(dataset.db.label_names, win='labels')
best_map = 0
for epoch in range(opt.epoch):
......@@ -80,20 +80,20 @@ def train(**kwargs):
# plot loss
trainer.vis.plot_many(trainer.get_meter_data())
# plot groud truth bboxes
ori_img_ = (img * 0.225 + 0.45).clamp(min=0, max=1) * 255
gt_img = visdom_bbox(at.tonumpy(ori_img_)[0],
at.tonumpy(bbox_)[0],
label_[0].numpy())
trainer.vis.img('gt_img', gt_img)
# plot predicti bboxes
_bboxes, _labels, _scores = trainer.faster_rcnn.predict(ori_img,visualize=True)
pred_img = visdom_bbox( at.tonumpy(ori_img[0]),
at.tonumpy(_bboxes[0]),
at.tonumpy(_labels[0]).reshape(-1),
at.tonumpy(_scores[0]))
trainer.vis.img('pred_img', pred_img)
# # plot groud truth bboxes
# ori_img_ = (img * 0.225 + 0.45).clamp(min=0, max=1) * 255
# gt_img = visdom_bbox(at.tonumpy(ori_img_)[0],
# at.tonumpy(bbox_)[0],
# label_[0].numpy())
# trainer.vis.img('gt_img', gt_img)
# # plot predicti bboxes
# _bboxes, _labels, _scores = trainer.faster_rcnn.predict(ori_img,visualize=True)
# pred_img = visdom_bbox( at.tonumpy(ori_img[0]),
# at.tonumpy(_bboxes[0]),
# at.tonumpy(_labels[0]).reshape(-1),
# at.tonumpy(_scores[0]))
# trainer.vis.img('pred_img', pred_img)
# rpn confusion matrix(meter)
trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
......@@ -110,8 +110,8 @@ def train(**kwargs):
if epoch==8:
trainer.load(best_path)
trainer.faster_rcnn.scale_lr(opt.lr_decay)
if epoch ==0:
trainer.optimizer = trainer.faster_rcnn.get_optimizer()
# if epoch ==0:
# trainer.optimizer = trainer.faster_rcnn.get_optimizer()
trainer.vis.plot('test_map', eval_result['map'])
lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册