未验证 提交 61ced380 编写于 作者: J JiaQi Xu 提交者: GitHub

Update ssd_training.py

上级 f741b610
...@@ -8,6 +8,8 @@ import torch.nn.functional as F ...@@ -8,6 +8,8 @@ import torch.nn.functional as F
from torch.autograd import Variable from torch.autograd import Variable
from utils.config import Config from utils.config import Config
from utils.box_utils import match, log_sum_exp from utils.box_utils import match, log_sum_exp
from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
from PIL import Image
MEANS = (104, 117, 123) MEANS = (104, 117, 123)
...@@ -94,180 +96,116 @@ class MultiBoxLoss(nn.Module): ...@@ -94,180 +96,116 @@ class MultiBoxLoss(nn.Module):
targets_weighted = conf_t[(pos+neg).gt(0)] targets_weighted = conf_t[(pos+neg).gt(0)]
loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)
# Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
N = num_pos.data.sum() N = num_pos.data.sum()
loss_l /= N loss_l /= N
loss_c /= N loss_c /= N
return loss_l, loss_c return loss_l, loss_c
def rand(a=0, b=1):
return np.random.rand()*(b-a) + a
class Generator(object): class Generator(object):
def __init__(self,batch_size, def __init__(self,batch_size,
train_lines, image_size,num_classes, train_lines, image_size,num_classes,
saturation_var=0.5, ):
brightness_var=0.5,
contrast_var=0.5,
lighting_std=0.5,
hflip_prob=0.5,
vflip_prob=0.5,
do_crop=True,
crop_area_range=[0.75, 1.0],
aspect_ratio_range=[3./4., 4./3.]):
self.batch_size = batch_size self.batch_size = batch_size
self.train_lines = train_lines self.train_lines = train_lines
self.train_batches = len(train_lines) self.train_batches = len(train_lines)
self.image_size = image_size self.image_size = image_size
self.color_jitter = [] self.num_classes = num_classes-1
self.num_classes = num_classes
if saturation_var:
self.saturation_var = saturation_var
self.color_jitter.append(self.saturation)
if brightness_var:
self.brightness_var = brightness_var
self.color_jitter.append(self.brightness)
if contrast_var:
self.contrast_var = contrast_var
self.color_jitter.append(self.contrast)
self.lighting_std = lighting_std
self.hflip_prob = hflip_prob
self.vflip_prob = vflip_prob
self.do_crop = do_crop
self.crop_area_range = crop_area_range
self.aspect_ratio_range = aspect_ratio_range
def grayscale(self, rgb):
return rgb.dot([0.299, 0.587, 0.114])
def saturation(self, rgb):
gs = self.grayscale(rgb)
alpha = 2 * np.random.random() * self.saturation_var
alpha += 1 - self.saturation_var
rgb = rgb * alpha + (1 - alpha) * gs[:, :, None]
return np.clip(rgb, 0, 255)
def brightness(self, rgb):
alpha = 2 * np.random.random() * self.brightness_var
alpha += 1 - self.saturation_var
rgb = rgb * alpha
return np.clip(rgb, 0, 255)
def contrast(self, rgb):
gs = self.grayscale(rgb).mean() * np.ones_like(rgb)
alpha = 2 * np.random.random() * self.contrast_var
alpha += 1 - self.contrast_var
rgb = rgb * alpha + (1 - alpha) * gs
return np.clip(rgb, 0, 255)
def lighting(self, img):
cov = np.cov(img.reshape(-1, 3) / 255.0, rowvar=False)
eigval, eigvec = np.linalg.eigh(cov)
noise = np.random.randn(3) * self.lighting_std
noise = eigvec.dot(eigval * noise) * 255
img += noise
return np.clip(img, 0, 255)
def horizontal_flip(self, img, y):
if np.random.random() < self.hflip_prob:
img = img[:, ::-1]
y[:, [0, 2]] = 1 - y[:, [2, 0]]
return img, y
def vertical_flip(self, img, y):
if np.random.random() < self.vflip_prob:
img = img[::-1]
y[:, [1, 3]] = 1 - y[:, [3, 1]]
return img, y
def random_sized_crop(self, img, targets):
img_w = img.shape[1]
img_h = img.shape[0]
img_area = img_w * img_h
random_scale = np.random.random()
random_scale *= (self.crop_area_range[1] -
self.crop_area_range[0])
random_scale += self.crop_area_range[0]
target_area = random_scale * img_area
random_ratio = np.random.random()
random_ratio *= (self.aspect_ratio_range[1] -
self.aspect_ratio_range[0])
random_ratio += self.aspect_ratio_range[0]
w = np.round(np.sqrt(target_area * random_ratio))
h = np.round(np.sqrt(target_area / random_ratio))
if np.random.random() < 0.5:
w, h = h, w
w = min(w, img_w)
w = int(w)
h = min(h, img_h)
h = int(h)
h = min(h,w) def get_random_data(self, annotation_line, input_shape, random=True, jitter=.1, hue=.1, sat=1.1, val=1.1, proc_img=True):
w = h '''r实时数据增强的随机预处理'''
w_rel = w / img_w line = annotation_line.split()
h_rel = h / img_h image = Image.open(line[0])
x = np.random.random() * (img_w - w) iw, ih = image.size
x_rel = x / img_w h, w = input_shape
x = int(x) box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
y = np.random.random() * (img_h - h)
y_rel = y / img_h # resize image
y = int(y) new_ar = w/h * rand(1-jitter,1+jitter)/rand(1-jitter,1+jitter)
img = img[y:y+h, x:x+w] scale = rand(.5, 1)
new_targets = [] if new_ar < 1:
for box in targets: nh = int(scale*h)
cx = 0.5 * (box[0] + box[2]) nw = int(nh*new_ar)
cy = 0.5 * (box[1] + box[3]) else:
if (x_rel < cx < x_rel + w_rel and nw = int(scale*w)
y_rel < cy < y_rel + h_rel): nh = int(nw/new_ar)
xmin = (box[0] - x_rel) / w_rel image = image.resize((nw,nh), Image.BICUBIC)
ymin = (box[1] - y_rel) / h_rel
xmax = (box[2] - x_rel) / w_rel # place image
ymax = (box[3] - y_rel) / h_rel dx = int(rand(0, w-nw))
xmin = max(0, xmin) dy = int(rand(0, h-nh))
ymin = max(0, ymin) new_image = Image.new('RGB', (w,h), (128,128,128))
xmax = min(1, xmax) new_image.paste(image, (dx, dy))
ymax = min(1, ymax) image = new_image
box[:4] = [xmin, ymin, xmax, ymax]
new_targets.append(box) # flip image or not
new_targets = np.asarray(new_targets).reshape(-1, targets.shape[1]) flip = rand()<.5
return img, new_targets if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
def generate(self): # distort image
hue = rand(-hue, hue)
sat = rand(1, sat) if rand()<.5 else 1/rand(1, sat)
val = rand(1, val) if rand()<.5 else 1/rand(1, val)
x = rgb_to_hsv(np.array(image)/255.)
x[..., 0] += hue
x[..., 0][x[..., 0]>1] -= 1
x[..., 0][x[..., 0]<0] += 1
x[..., 1] *= sat
x[..., 2] *= val
x[x>1] = 1
x[x<0] = 0
image_data = hsv_to_rgb(x)*255 # numpy array, 0 to 1
# correct boxes
box_data = np.zeros((len(box),5))
if len(box)>0:
np.random.shuffle(box)
box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
if flip: box[:, [0,2]] = w - box[:, [2,0]]
box[:, 0:2][box[:, 0:2]<0] = 0
box[:, 2][box[:, 2]>w] = w
box[:, 3][box[:, 3]>h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box
box_data = np.zeros((len(box),5))
box_data[:len(box)] = box
if len(box) == 0:
return image_data, []
if (box_data[:,:4]>0).any():
return image_data, box_data
else:
return image_data, []
def generate(self, train=True):
while True: while True:
shuffle(self.train_lines) shuffle(self.train_lines)
lines = self.train_lines lines = self.train_lines
inputs = [] inputs = []
targets = [] targets = []
for annotation_line in lines: for annotation_line in lines:
line = annotation_line.split() img,y=self.get_random_data(annotation_line,self.image_size[0:2])
img_path = line[0]
img = imread(img_path).astype('float32')
shape = np.shape(img)
y = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
if len(y)==0: if len(y)==0:
continue continue
boxes = np.array(y[:,:4],dtype=np.float32) boxes = np.array(y[:,:4],dtype=np.float32)
boxes[:,0] = boxes[:,0]/shape[1] boxes[:,0] = boxes[:,0]/self.image_size[1]
boxes[:,1] = boxes[:,1]/shape[0] boxes[:,1] = boxes[:,1]/self.image_size[0]
boxes[:,2] = boxes[:,2]/shape[1] boxes[:,2] = boxes[:,2]/self.image_size[1]
boxes[:,3] = boxes[:,3]/shape[0] boxes[:,3] = boxes[:,3]/self.image_size[0]
boxes = np.maximum(np.minimum(boxes,1),0) boxes = np.maximum(np.minimum(boxes,1),0)
if ((boxes[:,3]-boxes[:,1])<=0).any() and ((boxes[:,2]-boxes[:,0])<=0).any():
y = np.concatenate([boxes,y[:,-1:]],axis=-1)
if self.do_crop:
img, y = self.random_sized_crop(img, y)
img = imresize(img, self.image_size).astype('float32')
shuffle(self.color_jitter)
for jitter in self.color_jitter:
img = jitter(img)
if self.lighting_std:
img = self.lighting(img)
if self.hflip_prob > 0:
img, y = self.horizontal_flip(img, y)
if self.vflip_prob > 0:
img, y = self.vertical_flip(img, y)
if len(y)==0:
continue continue
y = np.concatenate([boxes,y[:,-1:]],axis=-1)
inputs.append(np.transpose(img-MEANS,(2,0,1))) inputs.append(np.transpose(img-MEANS,(2,0,1)))
targets.append(y) targets.append(y)
if len(targets) == self.batch_size: if len(targets) == self.batch_size:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册