提交 9aca31da 编写于 作者: H HypoX64

make more training data

上级 d2beea9b
...@@ -141,6 +141,7 @@ test*/ ...@@ -141,6 +141,7 @@ test*/
video_tmp/ video_tmp/
result/ result/
#./ #./
/pix2pixHD
/tmp /tmp
/to_make_show /to_make_show
/test_media /test_media
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
# <img src="./imgs/icon.jpg" width="48">DeepMosaics # <img src="./imgs/icon.jpg" width="48">DeepMosaics
You can use it to automatically remove the mosaics in images and videos, or add mosaics to them.<br> You can use it to automatically remove the mosaics in images and videos, or add mosaics to them.<br>
This porject based on ‘semantic segmentation’ and ‘Image-to-Image Translation’.<br> This porject based on ‘semantic segmentation’ and ‘Image-to-Image Translation’.<br>
Master is not stable. Please use a [stable version](https://github.com/HypoX64/DeepMosaics/tree/stable)<br>
* [中文版](./README_CN.md)<br> * [中文版](./README_CN.md)<br>
### More example ### More example
......
![image](./imgs/hand.gif) ![image](./imgs/hand.gif)
# <img src="./imgs/icon.jpg" width="48">DeepMosaics # <img src="./imgs/icon.jpg" width="48">DeepMosaics
这是一个通过深度学习自动的为图片/视频添加马赛克,或消除马赛克的项目.<br>它基于“语义分割”以及“图像翻译”.<br> 这是一个通过深度学习自动的为图片/视频添加马赛克,或消除马赛克的项目.<br>它基于“语义分割”以及“图像翻译”.<br>
主分支并不稳定,请移步[稳定版本](https://github.com/HypoX64/DeepMosaics/tree/stable)<br>
### 更多例子 ### 更多例子
原始 | 自动打码 | 自动去码 原始 | 自动打码 | 自动去码
:-:|:-:|:-: :-:|:-:|:-:
......
import os
import numpy as np
import cv2
import random
import csv
import sys
sys.path.append("..")
from util import util,ffmpeg
from util import image_processing as impro
files = util.Traversal('/media/hypo/Media/download')
videos = util.is_videos(files)
output_dir = './dataset/v2im'
FPS = 1
util.makedirs(output_dir)
for video in videos:
ffmpeg.continuous_screenshot(video, output_dir, FPS)
\ No newline at end of file
...@@ -22,7 +22,7 @@ Area_Type = 'normal' ...@@ -22,7 +22,7 @@ Area_Type = 'normal'
suffix = '' suffix = ''
net = loadmodel.unet(opt) net = loadmodel.unet(opt)
for path in videos: for i,path in enumerate(videos,0):
try: try:
path = os.path.join('./video',path) path = os.path.join('./video',path)
util.clean_tempfiles() util.clean_tempfiles()
...@@ -37,14 +37,14 @@ for path in videos: ...@@ -37,14 +37,14 @@ for path in videos:
mask_avg = np.zeros((impro.resize(img_ori_example, 128)).shape[:2]) mask_avg = np.zeros((impro.resize(img_ori_example, 128)).shape[:2])
for imagepath in imagepaths: for imagepath in imagepaths:
imagepath = os.path.join('./tmp/video2image',imagepath) imagepath = os.path.join('./tmp/video2image',imagepath)
print('Find ROI location:',imagepath) #print('Find ROI location:',imagepath)
img = impro.imread(imagepath) img = impro.imread(imagepath)
x,y,size,mask = runmodel.get_mosaic_position(img,net,opt,threshold = 64) x,y,size,mask = runmodel.get_mosaic_position(img,net,opt,threshold = 80)
cv2.imwrite(os.path.join('./tmp/ROI_mask', cv2.imwrite(os.path.join('./tmp/ROI_mask',
os.path.basename(imagepath)),mask) os.path.basename(imagepath)),mask)
positions.append([x,y,size]) positions.append([x,y,size])
mask_avg = mask_avg + mask mask_avg = mask_avg + mask
print('Optimize ROI locations...') #print('Optimize ROI locations...')
mask_index = filt.position_medfilt(np.array(positions), 13) mask_index = filt.position_medfilt(np.array(positions), 13)
mask = np.clip(mask_avg/len(imagepaths),0,255).astype('uint8') mask = np.clip(mask_avg/len(imagepaths),0,255).astype('uint8')
...@@ -62,7 +62,7 @@ for path in videos: ...@@ -62,7 +62,7 @@ for path in videos:
os.makedirs(mask_path) os.makedirs(mask_path)
os.makedirs(ori_path) os.makedirs(ori_path)
os.makedirs(mosaic_path) os.makedirs(mosaic_path)
print('Add mosaic to images...') #print('Add mosaic to images...')
mosaic_size = mosaic.get_autosize(img_ori_example,mask,area_type = Area_Type)*random.uniform(1,2) mosaic_size = mosaic.get_autosize(img_ori_example,mask,area_type = Area_Type)*random.uniform(1,2)
models = ['squa_avg','rect_avg','squa_mid'] models = ['squa_avg','rect_avg','squa_mid']
mosaic_type = random.randint(0,len(models)-1) mosaic_type = random.randint(0,len(models)-1)
...@@ -81,4 +81,6 @@ for path in videos: ...@@ -81,4 +81,6 @@ for path in videos:
cv2.imwrite(os.path.join(mosaic_path,os.path.basename(imagepaths[i])),img_mosaic_crop) cv2.imwrite(os.path.join(mosaic_path,os.path.basename(imagepaths[i])),img_mosaic_crop)
cv2.imwrite(os.path.join(mask_path,os.path.basename(imagepaths[i])),mask_crop) cv2.imwrite(os.path.join(mask_path,os.path.basename(imagepaths[i])),mask_crop)
except Exception as e: except Exception as e:
print(e) print(e)
\ No newline at end of file
print(util.get_bar(100*i/len(videos),num=50))
\ No newline at end of file
...@@ -11,19 +11,20 @@ from util import util,mosaic ...@@ -11,19 +11,20 @@ from util import util,mosaic
import datetime import datetime
ir_mask_path = './Irregular_Holes_mask' ir_mask_path = './Irregular_Holes_mask'
img_path ='/home/hypo/桌面/FaceRankSample' img_dir ='/home/hypo/MyProject/Haystack/CV/output/all/face'
output_dir = './datasets_img'
util.makedirs(output_dir)
MOD = 'HD' #HD | pix2pix | mosaic MOD = 'HD' #HD | pix2pix | mosaic
MASK = False # if True, output mask,too MASK = False # if True, output mask,too
BOUNDING = False # if true the mosaic size will be more big BOUNDING = True # if true the mosaic size will be more big
suffix = ''
output_dir = os.path.join('./dataset_img',MOD)
util.makedirs(output_dir)
if MOD='HD': if MOD == 'HD':
train_A_path = os.path.join(output_dir,'train_A') train_A_path = os.path.join(output_dir,'train_A')
train_B_path = os.path.join(output_dir,'train_B') train_B_path = os.path.join(output_dir,'train_B')
util.makedirs(train_A_path) util.makedirs(train_A_path)
util.makedirs(train_B_path) util.makedirs(train_B_path)
elif MOD='pix2pix': elif MOD == 'pix2pix':
train_path = os.path.join(output_dir,'train') train_path = os.path.join(output_dir,'train')
util.makedirs(train_path) util.makedirs(train_path)
if MASK: if MASK:
...@@ -42,12 +43,12 @@ transform_img = transforms.Compose([ ...@@ -42,12 +43,12 @@ transform_img = transforms.Compose([
]) ])
mask_names = os.listdir(ir_mask_path) mask_names = os.listdir(ir_mask_path)
img_names = os.listdir(img_path) img_names = os.listdir(img_dir)
print('Find images:',len(img_names)) print('Find images:',len(img_names))
for i,img_name in enumerate(img_names,1): for i,img_name in enumerate(img_names,1):
try: try:
img = Image.open(os.path.join(img_path,img_name)) img = Image.open(os.path.join(img_dir,img_name))
img = transform_img(img) img = transform_img(img)
img = np.array(img) img = np.array(img)
img = img[...,::-1] img = img[...,::-1]
...@@ -60,22 +61,20 @@ for i,img_name in enumerate(img_names,1): ...@@ -60,22 +61,20 @@ for i,img_name in enumerate(img_names,1):
mask = np.array(mask) mask = np.array(mask)
mosaic_area = impro.mask_area(mask) mosaic_area = impro.mask_area(mask)
mosaic_img = mosaic.addmosaic_random(img, mask,'bounding') mosaic_img = mosaic.addmosaic_random(img, mask,'bounding')
BOUNDING_flag = '_bound'
else: else:
mask = Image.open(os.path.join(ir_mask_path,random.choices(mask_names)[0])) mask = Image.open(os.path.join(ir_mask_path,random.choices(mask_names)[0]))
mask = transform_mask(mask) mask = transform_mask(mask)
mask = np.array(mask) mask = np.array(mask)
mosaic_img = mosaic.addmosaic_random(img, mask) mosaic_img = mosaic.addmosaic_random(img, mask)
BOUNDING_flag = ''
if MOD == 'HD':#[128:384,128:384,:] --->256
if HD:#[128:384,128:384,:] --->256 cv2.imwrite(os.path.join(train_A_path,'%05d' % i+suffix+'.jpg'), mosaic_img)
cv2.imwrite(os.path.join(train_A_path,'%05d' % i+BOUNDING_flag+'.jpg'), mosaic_img) cv2.imwrite(os.path.join(train_B_path,'%05d' % i+suffix+'.jpg'), img)
cv2.imwrite(os.path.join(train_B_path,'%05d' % i+BOUNDING_flag+'.jpg'), img)
else: else:
merge_img = impro.makedataset(mosaic_img, img) merge_img = impro.makedataset(mosaic_img, img)
cv2.imwrite(os.path.join(train_path,'%05d' % i+BOUNDING_flag+'.jpg'), merge_img) cv2.imwrite(os.path.join(train_path,'%05d' % i+suffix+'.jpg'), merge_img)
if MASK: if MASK:
cv2.imwrite(os.path.join(mask_path,'%05d' % i+BOUNDING_flag+'.png'), mask) cv2.imwrite(os.path.join(mask_path,'%05d' % i+suffix+'.png'), mask)
print("Processing:",img_name," ","Remain:",len(img_names)-i) print('\r','Proc/all:'+str(i)+'/'+str(len(img_names)),util.get_bar(100*i/len(img_names),num=40),end='')
except Exception as e: except Exception as e:
print(img_name,e) print(img_name,e)
...@@ -2,7 +2,13 @@ import torch ...@@ -2,7 +2,13 @@ import torch
from .pix2pix_model import define_G from .pix2pix_model import define_G
from .pix2pixHD_model import define_G as define_G_HD from .pix2pixHD_model import define_G as define_G_HD
from .unet_model import UNet from .unet_model import UNet
from .video_model import HypoNet from .video_model import MosaicNet
def show_paramsnumber(net,netname='net'):
parameters = sum(param.numel() for param in net.parameters())
parameters = round(parameters/1e6,2)
print(netname+' parameters: '+str(parameters)+'M')
def pix2pix(opt): def pix2pix(opt):
# print(opt.model_path,opt.netG) # print(opt.model_path,opt.netG)
...@@ -10,7 +16,7 @@ def pix2pix(opt): ...@@ -10,7 +16,7 @@ def pix2pix(opt):
netG = define_G_HD(3, 3, 64, 'global' ,4) netG = define_G_HD(3, 3, 64, 'global' ,4)
else: else:
netG = define_G(3, 3, 64, opt.netG, norm='batch',use_dropout=True, init_type='normal', gpu_ids=[]) netG = define_G(3, 3, 64, opt.netG, norm='batch',use_dropout=True, init_type='normal', gpu_ids=[])
show_paramsnumber(netG,'netG')
netG.load_state_dict(torch.load(opt.model_path)) netG.load_state_dict(torch.load(opt.model_path))
netG.eval() netG.eval()
if opt.use_gpu: if opt.use_gpu:
...@@ -18,7 +24,8 @@ def pix2pix(opt): ...@@ -18,7 +24,8 @@ def pix2pix(opt):
return netG return netG
def video(opt): def video(opt):
netG = HypoNet(3*25+1, 3) netG = MosaicNet(3*25+1, 3)
show_paramsnumber(netG,'netG')
netG.load_state_dict(torch.load(opt.model_path)) netG.load_state_dict(torch.load(opt.model_path))
netG.eval() netG.eval()
if opt.use_gpu: if opt.use_gpu:
...@@ -28,6 +35,7 @@ def video(opt): ...@@ -28,6 +35,7 @@ def video(opt):
def unet_clean(opt): def unet_clean(opt):
net = UNet(n_channels = 3, n_classes = 1) net = UNet(n_channels = 3, n_classes = 1)
show_paramsnumber(net,'segment')
net.load_state_dict(torch.load(opt.mosaic_position_model_path)) net.load_state_dict(torch.load(opt.mosaic_position_model_path))
net.eval() net.eval()
if opt.use_gpu: if opt.use_gpu:
...@@ -36,6 +44,7 @@ def unet_clean(opt): ...@@ -36,6 +44,7 @@ def unet_clean(opt):
def unet(opt): def unet(opt):
net = UNet(n_channels = 3, n_classes = 1) net = UNet(n_channels = 3, n_classes = 1)
show_paramsnumber(net,'segment')
net.load_state_dict(torch.load(opt.model_path)) net.load_state_dict(torch.load(opt.model_path))
net.eval() net.eval()
if opt.use_gpu: if opt.use_gpu:
......
...@@ -151,9 +151,9 @@ class encoder_3d(nn.Module): ...@@ -151,9 +151,9 @@ class encoder_3d(nn.Module):
class HypoNet(nn.Module): class MosaicNet(nn.Module):
def __init__(self, in_channel, out_channel): def __init__(self, in_channel, out_channel):
super(HypoNet, self).__init__() super(MosaicNet, self).__init__()
self.encoder_2d = encoder_2d(4,-1,64,n_blocks=9) self.encoder_2d = encoder_2d(4,-1,64,n_blocks=9)
self.encoder_3d = encoder_3d(in_channel) self.encoder_3d = encoder_3d(in_channel)
......
...@@ -11,15 +11,28 @@ import torch.backends.cudnn as cudnn ...@@ -11,15 +11,28 @@ import torch.backends.cudnn as cudnn
import torch.nn as nn import torch.nn as nn
from torch import optim from torch import optim
from unet import UNet import sys
sys.path.append("..")
sys.path.append("../..")
from util import mosaic,util,ffmpeg,filt,data
from util import image_processing as impro
from models import unet_model
from matplotlib import pyplot as plt
import torch.backends.cudnn as cudnn
LR = 0.0002
EPOCHS = 100
BATCHSIZE = 16
LOADSIZE = 256
FINESIZE = 224
CONTINUE = False
use_gpu = True
SAVE_FRE = 5
cudnn.benchmark = False
def resize(img,size): dir_img = './datasets/av/origin_image/'
h, w = img.shape[:2] dir_mask = './datasets/av/mask/'
if w >= h: dir_checkpoint = 'checkpoints/'
res = cv2.resize(img,(int(size*w/h), size))
else:
res = cv2.resize(img,(size, int(size*h/w)))
return res
def Totensor(img,use_gpu=True): def Totensor(img,use_gpu=True):
...@@ -29,20 +42,15 @@ def Totensor(img,use_gpu=True): ...@@ -29,20 +42,15 @@ def Totensor(img,use_gpu=True):
img = img.cuda() img = img.cuda()
return img return img
def random_color(img,random_num):
for i in range(3): img[:,:,i]=np.clip(img[:,:,i].astype('int')+random.randint(-random_num,random_num),0,255).astype('uint8')
bright = random.randint(-random_num*2,random_num*2)
for i in range(3): img[:,:,i]=np.clip(img[:,:,i].astype('int')+bright,0,255).astype('uint8')
return img
def Toinputshape(imgs,masks,finesize): def Toinputshape(imgs,masks,finesize):
batchsize = len(imgs) batchsize = len(imgs)
result_imgs=[];result_masks=[] result_imgs=[];result_masks=[]
for i in range(batchsize): for i in range(batchsize):
# print(imgs[i].shape,masks[i].shape) # print(imgs[i].shape,masks[i].shape)
img,mask = random_transform(imgs[i], masks[i], finesize) img,mask = data.random_transform_image(imgs[i], masks[i], finesize)
# print(img.shape,mask.shape) # print(img.shape,mask.shape)
mask = mask[:,:,0].reshape(1,finesize,finesize)/255.0 mask = mask.reshape(1,finesize,finesize)/255.0
img = img.transpose((2, 0, 1))/255.0 img = img.transpose((2, 0, 1))/255.0
result_imgs.append(img) result_imgs.append(img)
result_masks.append(mask) result_masks.append(mask)
...@@ -50,65 +58,6 @@ def Toinputshape(imgs,masks,finesize): ...@@ -50,65 +58,6 @@ def Toinputshape(imgs,masks,finesize):
result_masks = np.array(result_masks) result_masks = np.array(result_masks)
return result_imgs,result_masks return result_imgs,result_masks
def random_transform(img,mask,finesize):
# randomsize = int(finesize*(1.2+0.2*random.random())+2)
h,w = img.shape[:2]
loadsize = min((h,w))
a = (float(h)/float(w))*random.uniform(0.9, 1.1)
if h<w:
mask = cv2.resize(mask, (int(loadsize/a),loadsize))
img = cv2.resize(img, (int(loadsize/a),loadsize))
else:
mask = cv2.resize(mask, (loadsize,int(loadsize*a)))
img = cv2.resize(img, (loadsize,int(loadsize*a)))
# mask = randomsize(mask,loadsize)
# img = randomsize(img,loadsize)
#random crop
h,w = img.shape[:2]
h_move = int((h-finesize)*random.random())
w_move = int((w-finesize)*random.random())
# print(h,w,h_move,w_move)
img_crop = img[h_move:h_move+finesize,w_move:w_move+finesize]
mask_crop = mask[h_move:h_move+finesize,w_move:w_move+finesize]
#random rotation
if random.random()<0.2:
h,w = img_crop.shape[:2]
M = cv2.getRotationMatrix2D((w/2,h/2),90*int(4*random.random()),1)
img = cv2.warpAffine(img_crop,M,(w,h))
mask = cv2.warpAffine(mask_crop,M,(w,h))
else:
img,mask = img_crop,mask_crop
#random color
img=random_color(img, 15)
#random flip
if random.random()<0.5:
if random.random()<0.5:
img = cv2.flip(img,0)
mask = cv2.flip(mask,0)
else:
img = cv2.flip(img,1)
mask = cv2.flip(mask,1)
return img,mask
def randomresize(img):
size = np.min(img.shape[:2])
img = resize(img, int(size*random.uniform(1,1.2)))
img = resize(img, size)
return img
def batch_generator(images,masks,batchsize): def batch_generator(images,masks,batchsize):
dataset_images = [] dataset_images = []
dataset_masks = [] dataset_masks = []
...@@ -125,16 +74,17 @@ def batch_generator(images,masks,batchsize): ...@@ -125,16 +74,17 @@ def batch_generator(images,masks,batchsize):
def loadimage(dir_img,dir_mask,loadsize,eval_p): def loadimage(dir_img,dir_mask,loadsize,eval_p):
t1 = datetime.datetime.now() t1 = datetime.datetime.now()
imgnames = os.listdir(dir_img) imgnames = os.listdir(dir_img)
# imgnames = imgnames[:100]
print('images num:',len(imgnames)) print('images num:',len(imgnames))
random.shuffle(imgnames) random.shuffle(imgnames)
imgnames = (f[:-4] for f in imgnames) imgnames = (f[:-4] for f in imgnames)
images = [] images = []
masks = [] masks = []
for imgname in imgnames: for imgname in imgnames:
img = cv2.imread(dir_img+imgname+'.jpg') img = impro.imread(dir_img+imgname+'.jpg')
mask = cv2.imread(dir_mask+imgname+'.png') mask = impro.imread(dir_mask+imgname+'.png',mod = 'gray')
img = resize(img,loadsize) img = impro.resize(img,loadsize)
mask = resize(mask,loadsize) mask = impro.resize(mask,loadsize)
images.append(img) images.append(img)
masks.append(mask) masks.append(mask)
train_images,train_masks = images[0:int(len(masks)*(1-eval_p))],masks[0:int(len(masks)*(1-eval_p))] train_images,train_masks = images[0:int(len(masks)*(1-eval_p))],masks[0:int(len(masks)*(1-eval_p))]
...@@ -143,39 +93,7 @@ def loadimage(dir_img,dir_mask,loadsize,eval_p): ...@@ -143,39 +93,7 @@ def loadimage(dir_img,dir_mask,loadsize,eval_p):
print('load data cost time:',(t2 - t1).seconds,'s') print('load data cost time:',(t2 - t1).seconds,'s')
return train_images,train_masks,eval_images,eval_masks return train_images,train_masks,eval_images,eval_masks
def showresult(img,mask,mask_pred):
img = (img.cpu().detach().numpy()*255)
mask = (mask.cpu().detach().numpy()*255)
mask_pred = (mask_pred.cpu().detach().numpy()*255)
batchsize = img.shape[0]
size = img.shape[3]
ran =int(batchsize*random.random())
showimg=np.zeros((size,size*3,3))
showimg[0:size,0:size] =img[ran].transpose((1, 2, 0))
showimg[0:size,size:size*2,1] = mask[ran].reshape(size,size)
showimg[0:size,size*2:size*3,1] = mask_pred[ran].reshape(size,size)
# cv2.imshow("", showimg.astype('uint8'))
# key = cv2.waitKey(1)
# if key == ord('q'):
# exit()
cv2.imwrite('./result.jpg', showimg)
LR = 0.001
EPOCHS = 100
BATCHSIZE = 12
LOADSIZE = 144
FINESIZE = 128
CONTINUE = True
use_gpu = True
SAVE_FRE = 5
cudnn.benchmark = False
dir_img = './origin_image/'
dir_mask = './mask/'
dir_checkpoint = 'checkpoints/'
print('loading data......') print('loading data......')
train_images,train_masks,eval_images,eval_masks = loadimage(dir_img,dir_mask,LOADSIZE,0.2) train_images,train_masks,eval_images,eval_masks = loadimage(dir_img,dir_mask,LOADSIZE,0.2)
...@@ -183,7 +101,7 @@ dataset_eval_images,dataset_eval_masks = batch_generator(eval_images,eval_masks, ...@@ -183,7 +101,7 @@ dataset_eval_images,dataset_eval_masks = batch_generator(eval_images,eval_masks,
dataset_train_images,dataset_train_masks = batch_generator(train_images,train_masks,BATCHSIZE) dataset_train_images,dataset_train_masks = batch_generator(train_images,train_masks,BATCHSIZE)
net = UNet(n_channels = 3, n_classes = 1) net = unet_model.UNet(n_channels = 3, n_classes = 1)
if CONTINUE: if CONTINUE:
...@@ -192,7 +110,7 @@ if use_gpu: ...@@ -192,7 +110,7 @@ if use_gpu:
net.cuda() net.cuda()
optimizer = torch.optim.Adam(net.parameters(), lr=LR, betas=(0.9, 0.99)) optimizer = torch.optim.Adam(net.parameters(), lr=LR, betas=(0.9, 0.999))
criterion = nn.BCELoss() criterion = nn.BCELoss()
# criterion = nn.L1Loss() # criterion = nn.L1Loss()
...@@ -220,8 +138,8 @@ for epoch in range(EPOCHS): ...@@ -220,8 +138,8 @@ for epoch in range(EPOCHS):
loss.backward() loss.backward()
optimizer.step() optimizer.step()
if i%10 == 0: if i%100 == 0:
showresult(img,mask,mask_pred) data.showresult(img,mask,mask_pred,os.path.join(dir_checkpoint,'result.png'))
# torch.cuda.empty_cache() # torch.cuda.empty_cache()
# # net.eval() # # net.eval()
...@@ -243,11 +161,8 @@ for epoch in range(EPOCHS): ...@@ -243,11 +161,8 @@ for epoch in range(EPOCHS):
epoch_loss_eval/len(dataset_eval_images), epoch_loss_eval/len(dataset_eval_images),
(endtime - starttime).seconds)), (endtime - starttime).seconds)),
torch.save(net.cpu().state_dict(),dir_checkpoint+'last.pth') torch.save(net.cpu().state_dict(),dir_checkpoint+'last.pth')
# print('--- Epoch loss: {0:.6f}'.format(epoch_loss/i))
# print('Cost time: ',(endtime - starttime).seconds,'s')
if (epoch+1)%SAVE_FRE == 0: if (epoch+1)%SAVE_FRE == 0:
torch.save(net.cpu().state_dict(),dir_checkpoint+'epoch'+str(epoch+1)+'.pth') torch.save(net.cpu().state_dict(),dir_checkpoint+'epoch'+str(epoch+1)+'.pth')
data.showresult(img,mask,mask_pred,os.path.join(dir_checkpoint,'epoch_'+str(epoch+1)+'.png'))
print('network saved.') print('network saved.')
# torch.save(net.cpu().state_dict(),dir_checkpoint+'last.pth')
# print('network saved.')
...@@ -9,11 +9,10 @@ import time ...@@ -9,11 +9,10 @@ import time
import sys import sys
sys.path.append("..") sys.path.append("..")
sys.path.append("../..") sys.path.append("../..")
from models import runmodel,loadmodel
from util import mosaic,util,ffmpeg,filt,data from util import mosaic,util,ffmpeg,filt,data
from util import image_processing as impro from util import image_processing as impro
from cores import Options from cores import Options
from models import pix2pix_model,video_model,unet_model from models import pix2pix_model,video_model,unet_model,loadmodel
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
import torch.backends.cudnn as cudnn import torch.backends.cudnn as cudnn
...@@ -32,8 +31,8 @@ SAVE_FRE = 10000 ...@@ -32,8 +31,8 @@ SAVE_FRE = 10000
start_iter = 0 start_iter = 0
finesize = 128 finesize = 128
loadsize = int(finesize*1.1) loadsize = int(finesize*1.1)
perload_num = 32
savename = 'MosaicNet_test' savename = 'MosaicNet_noL2'
dir_checkpoint = 'checkpoints/'+savename dir_checkpoint = 'checkpoints/'+savename
util.makedirs(dir_checkpoint) util.makedirs(dir_checkpoint)
...@@ -51,7 +50,8 @@ for video in videos: ...@@ -51,7 +50,8 @@ for video in videos:
#unet_128 #unet_128
#resnet_9blocks #resnet_9blocks
#netG = pix2pix_model.define_G(3*N+1, 3, 128, 'resnet_6blocks', norm='instance',use_dropout=True, init_type='normal', gpu_ids=[]) #netG = pix2pix_model.define_G(3*N+1, 3, 128, 'resnet_6blocks', norm='instance',use_dropout=True, init_type='normal', gpu_ids=[])
netG = video_model.HypoNet(3*N+1, 3) netG = video_model.MosaicNet(3*N+1, 3)
loadmodel.show_paramsnumber(netG,'netG')
# netG = unet_model.UNet(3*N+1, 3) # netG = unet_model.UNet(3*N+1, 3)
if use_gan: if use_gan:
netD = pix2pix_model.define_D(3*2+1, 64, 'basic', n_layers_D=3, norm='instance', init_type='normal', init_gain=0.02, gpu_ids=[]) netD = pix2pix_model.define_D(3*2+1, 64, 'basic', n_layers_D=3, norm='instance', init_type='normal', init_gain=0.02, gpu_ids=[])
...@@ -77,43 +77,6 @@ if use_gan: ...@@ -77,43 +77,6 @@ if use_gan:
optimizer_D = torch.optim.Adam(netG.parameters(), lr=LR,betas=(beta1, 0.999)) optimizer_D = torch.optim.Adam(netG.parameters(), lr=LR,betas=(beta1, 0.999))
criterionGAN = pix2pix_model.GANLoss(gan_mode='lsgan').cuda() criterionGAN = pix2pix_model.GANLoss(gan_mode='lsgan').cuda()
def random_transform(src,target,finesize):
#random crop
h,w = target.shape[:2]
h_move = int((h-finesize)*random.random())
w_move = int((w-finesize)*random.random())
# print(h,w,h_move,w_move)
target = target[h_move:h_move+finesize,w_move:w_move+finesize,:]
src = src[h_move:h_move+finesize,w_move:w_move+finesize,:]
#random flip
if random.random()<0.5:
src = src[:,::-1,:]
target = target[:,::-1,:]
#random color
random_num = 15
bright = random.randint(-random_num*2,random_num*2)
for i in range(N*3): src[:,:,i]=np.clip(src[:,:,i].astype('int')+bright,0,255).astype('uint8')
for i in range(3): target[:,:,i]=np.clip(target[:,:,i].astype('int')+bright,0,255).astype('uint8')
return src,target
def showresult(img1,img2,img3,name):
img1 = (img1.cpu().detach().numpy()*255)
img2 = (img2.cpu().detach().numpy()*255)
img3 = (img3.cpu().detach().numpy()*255)
batchsize = img1.shape[0]
size = img1.shape[3]
ran =int(batchsize*random.random())
showimg=np.zeros((size,size*3,3))
showimg[0:size,0:size] =img1[ran].transpose((1, 2, 0))
showimg[0:size,size:size*2] = img2[ran].transpose((1, 2, 0))
showimg[0:size,size*2:size*3] = img3[ran].transpose((1, 2, 0))
cv2.imwrite(os.path.join(dir_checkpoint,name), showimg)
def loaddata(): def loaddata():
video_index = random.randint(0,len(videos)-1) video_index = random.randint(0,len(videos)-1)
...@@ -121,7 +84,7 @@ def loaddata(): ...@@ -121,7 +84,7 @@ def loaddata():
img_index = random.randint(N,lengths[video_index]- N) img_index = random.randint(N,lengths[video_index]- N)
input_img = np.zeros((loadsize,loadsize,3*N+1), dtype='uint8') input_img = np.zeros((loadsize,loadsize,3*N+1), dtype='uint8')
for i in range(0,N): for i in range(0,N):
# print('./dataset/'+video+'/mosaic/output_'+'%05d'%(img_index+i-int(N/2))+'.png')
img = cv2.imread('./dataset/'+video+'/mosaic/output_'+'%05d'%(img_index+i-int(N/2))+'.png') img = cv2.imread('./dataset/'+video+'/mosaic/output_'+'%05d'%(img_index+i-int(N/2))+'.png')
img = impro.resize(img,loadsize) img = impro.resize(img,loadsize)
input_img[:,:,i*3:(i+1)*3] = img input_img[:,:,i*3:(i+1)*3] = img
...@@ -133,7 +96,7 @@ def loaddata(): ...@@ -133,7 +96,7 @@ def loaddata():
ground_true = cv2.imread('./dataset/'+video+'/ori/output_'+'%05d'%(img_index)+'.png') ground_true = cv2.imread('./dataset/'+video+'/ori/output_'+'%05d'%(img_index)+'.png')
ground_true = impro.resize(ground_true,loadsize) ground_true = impro.resize(ground_true,loadsize)
input_img,ground_true = random_transform(input_img,ground_true,finesize) input_img,ground_true = data.random_transform_video(input_img,ground_true,finesize,N)
input_img = data.im2tensor(input_img,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False) input_img = data.im2tensor(input_img,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False)
ground_true = data.im2tensor(ground_true,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False) ground_true = data.im2tensor(ground_true,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False)
...@@ -150,7 +113,7 @@ def preload(): ...@@ -150,7 +113,7 @@ def preload():
input_img,ground_true = loaddata() input_img,ground_true = loaddata()
input_imgs.append(input_img) input_imgs.append(input_img)
ground_trues.append(ground_true) ground_trues.append(ground_true)
if len(input_imgs)>10: if len(input_imgs)>perload_num:
del(input_imgs[0]) del(input_imgs[0])
del(ground_trues[0]) del(ground_trues[0])
load_cnt += 1 load_cnt += 1
...@@ -162,7 +125,7 @@ import threading ...@@ -162,7 +125,7 @@ import threading
t = threading.Thread(target=preload,args=()) #t为新创建的线程 t = threading.Thread(target=preload,args=()) #t为新创建的线程
t.daemon = True t.daemon = True
t.start() t.start()
while load_cnt < 10: while load_cnt < perload_num:
time.sleep(0.1) time.sleep(0.1)
netG.train() netG.train()
...@@ -171,7 +134,7 @@ print("Begin training...") ...@@ -171,7 +134,7 @@ print("Begin training...")
for iter in range(start_iter+1,ITER): for iter in range(start_iter+1,ITER):
# input_img,ground_true = loaddata() # input_img,ground_true = loaddata()
ran = random.randint(1, 8) ran = random.randint(1, perload_num-2)
input_img = input_imgs[ran] input_img = input_imgs[ran]
ground_true = ground_trues[ran] ground_true = ground_trues[ran]
...@@ -231,7 +194,8 @@ for iter in range(start_iter+1,ITER): ...@@ -231,7 +194,8 @@ for iter in range(start_iter+1,ITER):
if (iter+1)%100 == 0: if (iter+1)%100 == 0:
try: try:
showresult(input_img[:,int((N-1)/2)*3:(int((N-1)/2)+1)*3,:,:], ground_true, pred,'result_train.png') data.showresult(input_img[:,int((N-1)/2)*3:(int((N-1)/2)+1)*3,:,:],
ground_true, pred,os.path.join(dir_checkpoint,'result_train.png'))
except Exception as e: except Exception as e:
print(e) print(e)
...@@ -266,7 +230,6 @@ for iter in range(start_iter+1,ITER): ...@@ -266,7 +230,6 @@ for iter in range(start_iter+1,ITER):
time_start=time.time() time_start=time.time()
if (iter+1)%SAVE_FRE == 0: if (iter+1)%SAVE_FRE == 0:
if iter+1 != SAVE_FRE: if iter+1 != SAVE_FRE:
os.rename(os.path.join(dir_checkpoint,'last_G.pth'),os.path.join(dir_checkpoint,str(iter+1-SAVE_FRE)+'G.pth')) os.rename(os.path.join(dir_checkpoint,'last_G.pth'),os.path.join(dir_checkpoint,str(iter+1-SAVE_FRE)+'G.pth'))
...@@ -282,7 +245,6 @@ for iter in range(start_iter+1,ITER): ...@@ -282,7 +245,6 @@ for iter in range(start_iter+1,ITER):
f = open(os.path.join(dir_checkpoint,'iter'),'w+') f = open(os.path.join(dir_checkpoint,'iter'),'w+')
f.write(str(iter+1)) f.write(str(iter+1))
f.close() f.close()
# torch.save(netG.cpu().state_dict(),dir_checkpoint+'iter'+str(iter+1)+'.pth')
print('network saved.') print('network saved.')
#test #test
...@@ -292,6 +254,7 @@ for iter in range(start_iter+1,ITER): ...@@ -292,6 +254,7 @@ for iter in range(start_iter+1,ITER):
for cnt,test_name in enumerate(test_names,0): for cnt,test_name in enumerate(test_names,0):
img_names = os.listdir(os.path.join('./test',test_name,'image')) img_names = os.listdir(os.path.join('./test',test_name,'image'))
img_names.sort()
input_img = np.zeros((finesize,finesize,3*N+1), dtype='uint8') input_img = np.zeros((finesize,finesize,3*N+1), dtype='uint8')
img_names.sort() img_names.sort()
for i in range(0,N): for i in range(0,N):
...@@ -307,7 +270,7 @@ for iter in range(start_iter+1,ITER): ...@@ -307,7 +270,7 @@ for iter in range(start_iter+1,ITER):
input_img = data.im2tensor(input_img,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False) input_img = data.im2tensor(input_img,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False)
pred = netG(input_img) pred = netG(input_img)
pred = (pred.cpu().detach().numpy()*255)[0].transpose((1, 2, 0)) pred = data.tensor2im(pred,rgb2bgr = False, is0_1 = True)
result[finesize:finesize*2,finesize*cnt:finesize*(cnt+1),:] = pred result[finesize:finesize*2,finesize*cnt:finesize*(cnt+1),:] = pred
cv2.imwrite(os.path.join(dir_checkpoint,str(iter+1)+'_test.png'), result) cv2.imwrite(os.path.join(dir_checkpoint,str(iter+1)+'_test.png'), result)
......
import random
import numpy as np import numpy as np
import torch import torch
import torchvision.transforms as transforms import torchvision.transforms as transforms
import cv2
transform = transforms.Compose([ transform = transforms.Compose([
transforms.ToTensor(), transforms.ToTensor(),
...@@ -8,7 +10,7 @@ transform = transforms.Compose([ ...@@ -8,7 +10,7 @@ transform = transforms.Compose([
] ]
) )
def tensor2im(image_tensor, imtype=np.uint8, gray=False, rgb2bgr = True): def tensor2im(image_tensor, imtype=np.uint8, gray=False, rgb2bgr = True ,is0_1 = False):
image_tensor =image_tensor.data image_tensor =image_tensor.data
image_numpy = image_tensor[0].cpu().float().numpy() image_numpy = image_tensor[0].cpu().float().numpy()
# if gray: # if gray:
...@@ -16,7 +18,12 @@ def tensor2im(image_tensor, imtype=np.uint8, gray=False, rgb2bgr = True): ...@@ -16,7 +18,12 @@ def tensor2im(image_tensor, imtype=np.uint8, gray=False, rgb2bgr = True):
# else: # else:
if image_numpy.shape[0] == 1: if image_numpy.shape[0] == 1:
image_numpy = np.tile(image_numpy, (3, 1, 1)) image_numpy = np.tile(image_numpy, (3, 1, 1))
image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0
image_numpy = image_numpy.transpose((1, 2, 0))
if not is0_1:
image_numpy = (image_numpy + 1)/2.0
image_numpy = np.clip(image_numpy * 255.0,0,255)
if rgb2bgr and not gray: if rgb2bgr and not gray:
image_numpy = image_numpy[...,::-1]-np.zeros_like(image_numpy) image_numpy = image_numpy[...,::-1]-np.zeros_like(image_numpy)
return image_numpy.astype(imtype) return image_numpy.astype(imtype)
...@@ -46,12 +53,88 @@ def im2tensor(image_numpy, imtype=np.uint8, gray=False,bgr2rgb = True, reshape = ...@@ -46,12 +53,88 @@ def im2tensor(image_numpy, imtype=np.uint8, gray=False,bgr2rgb = True, reshape =
image_tensor = image_tensor.cuda() image_tensor = image_tensor.cuda()
return image_tensor return image_tensor
# def im2tensor(image_numpy, use_gpu=False):
# h, w ,ch = image_numpy.shape def random_transform_video(src,target,finesize,N):
# image_numpy = image_numpy/255.0
# image_numpy = image_numpy.transpose((2, 0, 1)) #random crop
# image_numpy = image_numpy.reshape(-1,ch,h,w) h,w = target.shape[:2]
# img_tensor = torch.from_numpy(image_numpy).float() h_move = int((h-finesize)*random.random())
# if use_gpu: w_move = int((w-finesize)*random.random())
# img_tensor = img_tensor.cuda() # print(h,w,h_move,w_move)
# return img_tensor target = target[h_move:h_move+finesize,w_move:w_move+finesize,:]
\ No newline at end of file src = src[h_move:h_move+finesize,w_move:w_move+finesize,:]
#random flip
if random.random()<0.5:
src = src[:,::-1,:]
target = target[:,::-1,:]
#random color
random_num = 15
bright = random.randint(-random_num*2,random_num*2)
for i in range(N*3): src[:,:,i]=np.clip(src[:,:,i].astype('int')+bright,0,255).astype('uint8')
for i in range(3): target[:,:,i]=np.clip(target[:,:,i].astype('int')+bright,0,255).astype('uint8')
return src,target
def random_transform_image(img,mask,finesize):
# randomsize = int(finesize*(1.2+0.2*random.random())+2)
h,w = img.shape[:2]
loadsize = min((h,w))
a = (float(h)/float(w))*random.uniform(0.9, 1.1)
if h<w:
mask = cv2.resize(mask, (int(loadsize/a),loadsize))
img = cv2.resize(img, (int(loadsize/a),loadsize))
else:
mask = cv2.resize(mask, (loadsize,int(loadsize*a)))
img = cv2.resize(img, (loadsize,int(loadsize*a)))
# mask = randomsize(mask,loadsize)
# img = randomsize(img,loadsize)
#random crop
h,w = img.shape[:2]
h_move = int((h-finesize)*random.random())
w_move = int((w-finesize)*random.random())
# print(h,w,h_move,w_move)
img_crop = img[h_move:h_move+finesize,w_move:w_move+finesize]
mask_crop = mask[h_move:h_move+finesize,w_move:w_move+finesize]
#random rotation
if random.random()<0.2:
h,w = img_crop.shape[:2]
M = cv2.getRotationMatrix2D((w/2,h/2),90*int(4*random.random()),1)
img = cv2.warpAffine(img_crop,M,(w,h))
mask = cv2.warpAffine(mask_crop,M,(w,h))
else:
img,mask = img_crop,mask_crop
#random color
random_num = 15
for i in range(3): img[:,:,i]=np.clip(img[:,:,i].astype('int')+random.randint(-random_num,random_num),0,255).astype('uint8')
bright = random.randint(-random_num*2,random_num*2)
for i in range(3): img[:,:,i]=np.clip(img[:,:,i].astype('int')+bright,0,255).astype('uint8')
#random flip
if random.random()<0.5:
if random.random()<0.5:
img = img[:,::-1,:]
mask = mask[:,::-1]
else:
img = img[::-1,:,:]
mask = mask[::-1,:]
return img,mask
def showresult(img1,img2,img3,name):
size = img1.shape[3]
showimg=np.zeros((size,size*3,3))
showimg[0:size,0:size] = tensor2im(img1,rgb2bgr = False, is0_1 = True)
showimg[0:size,size:size*2] = tensor2im(img2,rgb2bgr = False, is0_1 = True)
showimg[0:size,size*2:size*3] = tensor2im(img3,rgb2bgr = False, is0_1 = True)
cv2.imwrite(name, showimg)
...@@ -39,4 +39,10 @@ def cut_video(in_path,start_time,last_time,out_path,vcodec='h265'): ...@@ -39,4 +39,10 @@ def cut_video(in_path,start_time,last_time,out_path,vcodec='h265'):
os.system('ffmpeg -ss '+start_time+' -t '+last_time+' -i "'+in_path+'" -vcodec libx265 -b 12M '+out_path) os.system('ffmpeg -ss '+start_time+' -t '+last_time+' -i "'+in_path+'" -vcodec libx265 -b 12M '+out_path)
def continuous_screenshot(videopath,savedir,fps): def continuous_screenshot(videopath,savedir,fps):
os.system('ffmpeg -i '+videopath+' -vf fps='+str(fps)+' '+savedir+'/'+'%05d.jpg') '''
videopath: input video path
savedir: images will save here
fps: save how many images per second
'''
videoname = os.path.splitext(os.path.basename(videopath))[0]
os.system('ffmpeg -i '+videopath+' -vf fps='+str(fps)+' '+savedir+'/'+videoname+'%05d.jpg')
...@@ -70,3 +70,13 @@ def file_init(opt): ...@@ -70,3 +70,13 @@ def file_init(opt):
os.makedirs(opt.result_dir) os.makedirs(opt.result_dir)
print('makedir:',opt.result_dir) print('makedir:',opt.result_dir)
clean_tempfiles() clean_tempfiles()
def get_bar(percent,num = 25):
bar = '['
for i in range(num):
if i < round(percent/(100/num)):
bar += '#'
else:
bar += '-'
bar += ']'
return bar+' '+str(round(percent,2))+'%'
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册