Update ssd_training.py

61ced380 · JiaQi Xu · GitHub · f741b610 · 61ced380
隐藏空白更改
内联并排

Showing with 87 addition and 149 deletion

nets/ssd_training.py nets/ssd_training.py +87 -149

未找到文件。
--- a/nets/ssd_training.py
+++ b/nets/ssd_training.py
@@ -8,6 +8,8 @@ import torch.nn.functional as F
 from torch.autograd import Variable
 from utils.config import  Config
 from utils.box_utils import match, log_sum_exp
+from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
+from PIL import Image
 MEANS = (104, 117, 123)
@@ -94,180 +96,116 @@ class MultiBoxLoss(nn.Module):
        targets_weighted = conf_t[(pos+neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)
-        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        N = num_pos.data.sum()
        loss_l /= N
        loss_c /= N
        return loss_l, loss_c
+def rand(a=0, b=1):
+    return np.random.rand()*(b-a) + a
 class Generator(object):
    def __init__(self,batch_size,
                 train_lines, image_size,num_classes,
-                 saturation_var=0.5,
+                 ):
-                 brightness_var=0.5,
-                 contrast_var=0.5,
-                 lighting_std=0.5,
-                 hflip_prob=0.5,
-                 vflip_prob=0.5,
-                 do_crop=True,
-                 crop_area_range=[0.75, 1.0],
-                 aspect_ratio_range=[3./4., 4./3.]):
        self.batch_size = batch_size
        self.train_lines = train_lines
        self.train_batches = len(train_lines)
        self.image_size = image_size
-        self.color_jitter = []
+        self.num_classes = num_classes-1
-        self.num_classes = num_classes
-        if saturation_var:
-            self.saturation_var = saturation_var
-            self.color_jitter.append(self.saturation)
-        if brightness_var:
-            self.brightness_var = brightness_var
-            self.color_jitter.append(self.brightness)
-        if contrast_var:
-            self.contrast_var = contrast_var
-            self.color_jitter.append(self.contrast)
-        self.lighting_std = lighting_std
-        self.hflip_prob = hflip_prob
-        self.vflip_prob = vflip_prob
-        self.do_crop = do_crop
-        self.crop_area_range = crop_area_range
-        self.aspect_ratio_range = aspect_ratio_range
-    def grayscale(self, rgb):
-        return rgb.dot([0.299, 0.587, 0.114])
-    def saturation(self, rgb):
-        gs = self.grayscale(rgb)
-        alpha = 2 * np.random.random() * self.saturation_var 
-        alpha += 1 - self.saturation_var
-        rgb = rgb * alpha + (1 - alpha) * gs[:, :, None]
-        return np.clip(rgb, 0, 255)
-    def brightness(self, rgb):
-        alpha = 2 * np.random.random() * self.brightness_var 
-        alpha += 1 - self.saturation_var
-        rgb = rgb * alpha
-        return np.clip(rgb, 0, 255)
-    def contrast(self, rgb):
-        gs = self.grayscale(rgb).mean() * np.ones_like(rgb)
-        alpha = 2 * np.random.random() * self.contrast_var 
-        alpha += 1 - self.contrast_var
-        rgb = rgb * alpha + (1 - alpha) * gs
-        return np.clip(rgb, 0, 255)
-    def lighting(self, img):
-        cov = np.cov(img.reshape(-1, 3) / 255.0, rowvar=False)
-        eigval, eigvec = np.linalg.eigh(cov)
-        noise = np.random.randn(3) * self.lighting_std
-        noise = eigvec.dot(eigval * noise) * 255
-        img += noise
-        return np.clip(img, 0, 255)
-    def horizontal_flip(self, img, y):
-        if np.random.random() < self.hflip_prob:
-            img = img[:, ::-1]
-            y[:, [0, 2]] = 1 - y[:, [2, 0]]
-        return img, y
-    def vertical_flip(self, img, y):
-        if np.random.random() < self.vflip_prob:
-            img = img[::-1]
-            y[:, [1, 3]] = 1 - y[:, [3, 1]]
-        return img, y
-    def random_sized_crop(self, img, targets):
-        img_w = img.shape[1]
-        img_h = img.shape[0]
-        img_area = img_w * img_h
-        random_scale = np.random.random()
-        random_scale *= (self.crop_area_range[1] -
-                         self.crop_area_range[0])
-        random_scale += self.crop_area_range[0]
-        target_area = random_scale * img_area
-        random_ratio = np.random.random()
-        random_ratio *= (self.aspect_ratio_range[1] -
-                         self.aspect_ratio_range[0])
-        random_ratio += self.aspect_ratio_range[0]
-        w = np.round(np.sqrt(target_area * random_ratio))     
-        h = np.round(np.sqrt(target_area / random_ratio))
-        if np.random.random() < 0.5:
-            w, h = h, w
-        w = min(w, img_w)
-        w = int(w)
-        h = min(h, img_h)
-        h = int(h)
-        h = min(h,w)
+    def get_random_data(self, annotation_line, input_shape, random=True, jitter=.1, hue=.1, sat=1.1, val=1.1, proc_img=True):
-        w = h
+        '''r实时数据增强的随机预处理'''
-        w_rel = w / img_w
+        line = annotation_line.split()
-        h_rel = h / img_h
+        image = Image.open(line[0])
-        x = np.random.random() * (img_w - w)
+        iw, ih = image.size
-        x_rel = x / img_w
+        h, w = input_shape
-        x = int(x)
+        box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
-        y = np.random.random() * (img_h - h)
-        y_rel = y / img_h
+        # resize image
-        y = int(y)
+        new_ar = w/h * rand(1-jitter,1+jitter)/rand(1-jitter,1+jitter)
-        img = img[y:y+h, x:x+w]
+        scale = rand(.5, 1)
-        new_targets = []
+        if new_ar < 1:
-        for box in targets:
+            nh = int(scale*h)
-            cx = 0.5 * (box[0] + box[2])
+            nw = int(nh*new_ar)
-            cy = 0.5 * (box[1] + box[3])
+        else:
-            if (x_rel < cx < x_rel + w_rel and
+            nw = int(scale*w)
-                y_rel < cy < y_rel + h_rel):
+            nh = int(nw/new_ar)
-                xmin = (box[0] - x_rel) / w_rel
+        image = image.resize((nw,nh), Image.BICUBIC)
-                ymin = (box[1] - y_rel) / h_rel
-                xmax = (box[2] - x_rel) / w_rel
+        # place image
-                ymax = (box[3] - y_rel) / h_rel
+        dx = int(rand(0, w-nw))
-                xmin = max(0, xmin)
+        dy = int(rand(0, h-nh))
-                ymin = max(0, ymin)
+        new_image = Image.new('RGB', (w,h), (128,128,128))
-                xmax = min(1, xmax)
+        new_image.paste(image, (dx, dy))
-                ymax = min(1, ymax)
+        image = new_image
-                box[:4] = [xmin, ymin, xmax, ymax]
-                new_targets.append(box)
+        # flip image or not
-        new_targets = np.asarray(new_targets).reshape(-1, targets.shape[1])
+        flip = rand()<.5
-        return img, new_targets
+        if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
-    def generate(self):
+        # distort image
+        hue = rand(-hue, hue)
+        sat = rand(1, sat) if rand()<.5 else 1/rand(1, sat)
+        val = rand(1, val) if rand()<.5 else 1/rand(1, val)
+        x = rgb_to_hsv(np.array(image)/255.)
+        x[..., 0] += hue
+        x[..., 0][x[..., 0]>1] -= 1
+        x[..., 0][x[..., 0]<0] += 1
+        x[..., 1] *= sat
+        x[..., 2] *= val
+        x[x>1] = 1
+        x[x<0] = 0
+        image_data = hsv_to_rgb(x)*255 # numpy array, 0 to 1
+        # correct boxes
+        box_data = np.zeros((len(box),5))
+        if len(box)>0:
+            np.random.shuffle(box)
+            box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
+            box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
+            if flip: box[:, [0,2]] = w - box[:, [2,0]]
+            box[:, 0:2][box[:, 0:2]<0] = 0
+            box[:, 2][box[:, 2]>w] = w
+            box[:, 3][box[:, 3]>h] = h
+            box_w = box[:, 2] - box[:, 0]
+            box_h = box[:, 3] - box[:, 1]
+            box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box
+            box_data = np.zeros((len(box),5))
+            box_data[:len(box)] = box
+        if len(box) == 0:
+            return image_data, []
+        if (box_data[:,:4]>0).any():
+            return image_data, box_data
+        else:
+            return image_data, []
+    def generate(self, train=True):
        while True:
            shuffle(self.train_lines)
            lines = self.train_lines
            inputs = []
            targets = []
            for annotation_line in lines:  
-                line = annotation_line.split()
+                img,y=self.get_random_data(annotation_line,self.image_size[0:2])
-                img_path = line[0]
-                img = imread(img_path).astype('float32')
-                shape = np.shape(img)
-                y = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
                if len(y)==0:
                    continue
                boxes = np.array(y[:,:4],dtype=np.float32)
-                boxes[:,0] = boxes[:,0]/shape[1]
+                boxes[:,0] = boxes[:,0]/self.image_size[1]
-                boxes[:,1] = boxes[:,1]/shape[0]
+                boxes[:,1] = boxes[:,1]/self.image_size[0]
-                boxes[:,2] = boxes[:,2]/shape[1]
+                boxes[:,2] = boxes[:,2]/self.image_size[1]
-                boxes[:,3] = boxes[:,3]/shape[0]
+                boxes[:,3] = boxes[:,3]/self.image_size[0]
                boxes = np.maximum(np.minimum(boxes,1),0)
+                if ((boxes[:,3]-boxes[:,1])<=0).any() and ((boxes[:,2]-boxes[:,0])<=0).any():
-                y = np.concatenate([boxes,y[:,-1:]],axis=-1)
-                if self.do_crop:
-                    img, y = self.random_sized_crop(img, y)
-                img = imresize(img, self.image_size).astype('float32')
-                shuffle(self.color_jitter)
-                for jitter in self.color_jitter:
-                    img = jitter(img)
-                if self.lighting_std:
-                    img = self.lighting(img)
-                if self.hflip_prob > 0:
-                    img, y = self.horizontal_flip(img, y)
-                if self.vflip_prob > 0:
-                    img, y = self.vertical_flip(img, y)
-                if len(y)==0:
                    continue
+                y = np.concatenate([boxes,y[:,-1:]],axis=-1)
                inputs.append(np.transpose(img-MEANS,(2,0,1)))                
                targets.append(y)
                if len(targets) == self.batch_size: