diff --git a/.gitignore b/.gitignore index ae22b8c373b5739cf500900ddc875b0bf7213b7a..5398998da214ef502789cc982ed3c2e8951b6ef9 100644 --- a/.gitignore +++ b/.gitignore @@ -134,6 +134,7 @@ __pycache__/ tmp/ checkpoints/ mask/ +mask_old/ origin_image/ datasets/ dataset/ @@ -149,6 +150,8 @@ result/ /result /reference /python_test.py +/pretrained_models_old +/deepmosaic_window #./make_datasets /make_datasets/video /make_datasets/tmp diff --git a/cores/core.py b/cores/core.py index 57bb510d40a5449f184d11df2cf7c0db03326b4c..ce3853b085a1bd3105a9c9670f8c04a5ec5be765 100644 --- a/cores/core.py +++ b/cores/core.py @@ -6,17 +6,15 @@ from models import runmodel,loadmodel from util import mosaic,util,ffmpeg,filt,data from util import image_processing as impro -def addmosaic_img(opt): - net = loadmodel.unet(opt) +def addmosaic_img(opt,netS): path = opt.media_path print('Add Mosaic:',path) img = impro.imread(path) - mask = runmodel.get_ROI_position(img,net,opt)[0] + mask = runmodel.get_ROI_position(img,netS,opt)[0] img = mosaic.addmosaic(img,mask,opt) cv2.imwrite(os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_add.jpg'),img) -def addmosaic_video(opt): - net = loadmodel.unet(opt) +def addmosaic_video(opt,netS): path = opt.media_path util.clean_tempfiles() fps = ffmpeg.get_video_infos(path)[0] @@ -30,7 +28,7 @@ def addmosaic_video(opt): for imagepath in imagepaths: print('Find ROI location:',imagepath) img = impro.imread(os.path.join('./tmp/video2image',imagepath)) - mask,x,y,area = runmodel.get_ROI_position(img,net,opt) + mask,x,y,area = runmodel.get_ROI_position(img,netS,opt) positions.append([x,y,area]) cv2.imwrite(os.path.join('./tmp/ROI_mask',imagepath),mask) print('Optimize ROI locations...') @@ -49,13 +47,13 @@ def addmosaic_video(opt): './tmp/voice_tmp.mp3', os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_add.mp4')) -def cleanmosaic_img(opt): - netG = loadmodel.pix2pix(opt) - net_mosaic_pos = loadmodel.unet_clean(opt) +def cleanmosaic_img(opt,netG,netM): + path = opt.media_path print('Clean Mosaic:',path) img_origin = impro.imread(path) - x,y,size = runmodel.get_mosaic_position(img_origin,net_mosaic_pos,opt)[:3] + x,y,size,mask = runmodel.get_mosaic_position(img_origin,netM,opt) + cv2.imwrite('./mask/'+os.path.basename(path), mask) img_result = img_origin.copy() if size != 0 : img_mosaic = img_origin[y-size:y+size,x-size:x+size] @@ -65,9 +63,7 @@ def cleanmosaic_img(opt): print('Do not find mosaic') cv2.imwrite(os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_clean.jpg'),img_result) -def cleanmosaic_video_byframe(opt): - netG = loadmodel.pix2pix(opt) - net_mosaic_pos = loadmodel.unet_clean(opt) +def cleanmosaic_video_byframe(opt,netG,netM): path = opt.media_path util.clean_tempfiles() fps = ffmpeg.get_video_infos(path)[0] @@ -80,7 +76,7 @@ def cleanmosaic_video_byframe(opt): # get position for imagepath in imagepaths: img_origin = impro.imread(os.path.join('./tmp/video2image',imagepath)) - x,y,size = runmodel.get_mosaic_position(img_origin,net_mosaic_pos,opt)[:3] + x,y,size = runmodel.get_mosaic_position(img_origin,netM,opt)[:3] positions.append([x,y,size]) print('Find mosaic location:',imagepath) print('Optimize mosaic locations...') @@ -103,9 +99,7 @@ def cleanmosaic_video_byframe(opt): './tmp/voice_tmp.mp3', os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_clean.mp4')) -def cleanmosaic_video_fusion(opt): - net = loadmodel.video(opt) - net_mosaic_pos = loadmodel.unet_clean(opt) +def cleanmosaic_video_fusion(opt,netG,netM): path = opt.media_path N = 25 INPUT_SIZE = 128 @@ -122,7 +116,7 @@ def cleanmosaic_video_fusion(opt): for imagepath in imagepaths: img_origin = impro.imread(os.path.join('./tmp/video2image',imagepath)) # x,y,size = runmodel.get_mosaic_position(img_origin,net_mosaic_pos,opt)[:3] - x,y,size,mask = runmodel.get_mosaic_position(img_origin,net_mosaic_pos,opt) + x,y,size,mask = runmodel.get_mosaic_position(img_origin,netM,opt) cv2.imwrite(os.path.join('./tmp/mosaic_mask',imagepath), mask) positions.append([x,y,size]) print('Find mosaic location:',imagepath) @@ -151,11 +145,12 @@ def cleanmosaic_video_fusion(opt): mask = mask[y-size:y+size,x-size:x+size] mask = impro.resize(mask, INPUT_SIZE) mosaic_input[:,:,-1] = mask - mosaic_input = data.im2tensor(mosaic_input,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False) - unmosaic_pred = net(mosaic_input) + mosaic_input = data.im2tensor(mosaic_input,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False,is0_1 = False) + unmosaic_pred = netG(mosaic_input) - unmosaic_pred = (unmosaic_pred.cpu().detach().numpy()*255)[0] - img_fake = unmosaic_pred.transpose((1, 2, 0)) + #unmosaic_pred = (unmosaic_pred.cpu().detach().numpy()*255)[0] + #img_fake = unmosaic_pred.transpose((1, 2, 0)) + img_fake = data.tensor2im(unmosaic_pred,rgb2bgr = False ,is0_1 = False) img_result = impro.replace_mosaic(img_origin,img_fake,x,y,size,opt.no_feather) cv2.imwrite(os.path.join('./tmp/replace_mosaic',imagepath),img_result) diff --git a/cores/options.py b/cores/options.py index a1bd9abae58ed70979880ffda2928f58fc2c19ac..f4740c6fae7c0110b38b40aaf0810403ad8fb0f9 100644 --- a/cores/options.py +++ b/cores/options.py @@ -10,7 +10,7 @@ class Options(): def initialize(self): #base - self.parser.add_argument('--use_gpu',type=bool,default=True, help='if True, use gpu') + self.parser.add_argument('--use_gpu',type=int,default=1, help='if 0, do not use gpu') # self.parser.add_argument('--use_gpu', action='store_true', help='if input it, use gpu') self.parser.add_argument('--media_path', type=str, default='./hands_test.mp4',help='your videos or images path') self.parser.add_argument('--mode', type=str, default='auto',help='add or clean mosaic into your media auto | add | clean') diff --git a/deepmosaic.py b/deepmosaic.py index 82bde84e6b75bbcc6ab71d8defc579ffe4d29e53..9eff4fdc0f7a78df00001f1b033ca3319c822cb9 100644 --- a/deepmosaic.py +++ b/deepmosaic.py @@ -1,38 +1,56 @@ import os from cores import Options,core from util import util +from models import loadmodel opt = Options().getparse() util.file_init(opt) def main(): + if os.path.isdir(opt.media_path): + files = util.Traversal(opt.media_path) + else: + files = [opt.media_path] if opt.mode == 'add': - if util.is_img(opt.media_path): - core.addmosaic_img(opt) - elif util.is_video(opt.media_path): - core.addmosaic_video(opt) - else: - print('This type of file is not supported') + netS = loadmodel.unet(opt) + for file in files: + opt.media_path = file + if util.is_img(file): + core.addmosaic_img(opt,netS) + elif util.is_video(file): + core.addmosaic_video(opt,netS) + util.clean_tempfiles(tmp_init = False) + else: + print('This type of file is not supported') elif opt.mode == 'clean': - if util.is_img(opt.media_path): - core.cleanmosaic_img(opt) - elif util.is_video(opt.media_path): - if opt.netG == 'video': - core.cleanmosaic_video_fusion(opt) - else: - core.cleanmosaic_video_byframe(opt) + netM = loadmodel.unet_clean(opt) + if opt.netG == 'video': + netG = loadmodel.video(opt) else: - print('This type of file is not supported') - - util.clean_tempfiles(tmp_init = False) + netG = loadmodel.pix2pix(opt) + + for file in files: + opt.media_path = file + if util.is_img(file): + core.cleanmosaic_img(opt,netG,netM) + elif util.is_video(file): + if opt.netG == 'video': + core.cleanmosaic_video_fusion(opt,netG,netM) + else: + core.cleanmosaic_video_byframe(opt,netG,netM) + util.clean_tempfiles(tmp_init = False) + else: + print('This type of file is not supported') -if __name__ == '__main__': - try: - main() - except Exception as e: - print('Error:',e) - input('Please press any key to exit.\n') - util.clean_tempfiles(tmp_init = False) - exit(0) + +main() +# if __name__ == '__main__': +# try: +# main() +# except Exception as e: +# print('Error:',e) +# input('Please press any key to exit.\n') +# util.clean_tempfiles(tmp_init = False) +# exit(0) diff --git a/make_datasets/use_drawn_mask_make_dataset.py b/make_datasets/use_drawn_mask_make_dataset.py index faf6eac8df24d4c94dffeeb963e9ddf0870e8288..2117a337fafd033169110790b76e73bce252008f 100644 --- a/make_datasets/use_drawn_mask_make_dataset.py +++ b/make_datasets/use_drawn_mask_make_dataset.py @@ -11,14 +11,15 @@ from util import util,mosaic import datetime import shutil -mask_path = '/media/hypo/Porject/Datasets/unet/av/mask' -img_path ='/media/hypo/Porject/Datasets/unet/av/origin_image' +mask_dir = '/media/hypo/Project/MyProject/DeepMosaics/DeepMosaics/train/add/datasets/av/mask' +img_dir ='/media/hypo/Project/MyProject/DeepMosaics/DeepMosaics/train/add/datasets/av/origin_image' output_dir = './datasets_img' util.makedirs(output_dir) HD = True # if false make dataset for pix2pix, if Ture for pix2pix_HD -MASK = False # if True, output mask,too +MASK = True # if True, output mask,too OUT_SIZE = 256 -FOLD_NUM = 5 +FOLD_NUM = 2 +Bounding = True if HD: train_A_path = os.path.join(output_dir,'train_A') @@ -32,8 +33,8 @@ if MASK: mask_path = os.path.join(output_dir,'mask') util.makedirs(mask_path) -mask_names = os.listdir(mask_path) -img_names = os.listdir(img_path) +mask_names = os.listdir(mask_dir) +img_names = os.listdir(img_dir) mask_names.sort() img_names.sort() print('Find images:',len(img_names)) @@ -42,13 +43,14 @@ cnt = 0 for fold in range(FOLD_NUM): for img_name,mask_name in zip(img_names,mask_names): try: - img = impro.imread(os.path.join(img_path,img_name)) - mask = impro.imread(os.path.join(mask_path,mask_name),'gray') + img = impro.imread(os.path.join(img_dir,img_name)) + mask = impro.imread(os.path.join(mask_dir,mask_name),'gray') mask = impro.resize_like(mask, img) x,y,size,area = impro.boundingSquare(mask, 1.5) if area > 100: - img = impro.resize(img[y-size:y+size,x-size:x+size],OUT_SIZE) - mask = impro.resize(mask[y-size:y+size,x-size:x+size],OUT_SIZE) + if Boundingļ¼š + img = impro.resize(img[y-size:y+size,x-size:x+size],OUT_SIZE) + mask = impro.resize(mask[y-size:y+size,x-size:x+size],OUT_SIZE) img_mosaic = mosaic.addmosaic_random(img, mask) if HD: diff --git a/make_datasets/use_irregular_holes_mask_make_dataset.py b/make_datasets/use_irregular_holes_mask_make_dataset.py index 37d92a4b1d07457a54373231818b23774e743a9c..0925f731c279072e762a119adfd24ecdc76b6f57 100644 --- a/make_datasets/use_irregular_holes_mask_make_dataset.py +++ b/make_datasets/use_irregular_holes_mask_make_dataset.py @@ -11,11 +11,11 @@ from util import util,mosaic import datetime ir_mask_path = './Irregular_Holes_mask' -img_dir ='/home/hypo/MyProject/Haystack/CV/output/all/face' -MOD = 'HD' #HD | pix2pix | mosaic +img_dir ='/media/hypo/Hypoyun/Datasets/other/face512' +MOD = 'mosaic' #HD | pix2pix | mosaic MASK = False # if True, output mask,too -BOUNDING = True # if true the mosaic size will be more big -suffix = '' +BOUNDING = False # if true the mosaic size will be more big +suffix = '_1' output_dir = os.path.join('./datasets_img',MOD) util.makedirs(output_dir) @@ -27,6 +27,13 @@ if MOD == 'HD': elif MOD == 'pix2pix': train_path = os.path.join(output_dir,'train') util.makedirs(train_path) +elif MOD == 'mosaic': + ori_path = os.path.join(output_dir,'ori') + mosaic_path = os.path.join(output_dir,'mosaic') + mask_path = os.path.join(output_dir,'mask') + util.makedirs(ori_path) + util.makedirs(mosaic_path) + util.makedirs(mask_path) if MASK: mask_path = os.path.join(output_dir,'mask') util.makedirs(mask_path) @@ -43,12 +50,13 @@ transform_img = transforms.Compose([ ]) mask_names = os.listdir(ir_mask_path) -img_names = os.listdir(img_dir) -print('Find images:',len(img_names)) +img_paths = util.Traversal(img_dir) +img_paths = util.is_imgs(img_paths) +print('Find images:',len(img_paths)) -for i,img_name in enumerate(img_names,1): +for i,img_path in enumerate(img_paths,1): try: - img = Image.open(os.path.join(img_dir,img_name)) + img = Image.open(img_path) img = transform_img(img) img = np.array(img) img = img[...,::-1] @@ -70,11 +78,16 @@ for i,img_name in enumerate(img_names,1): if MOD == 'HD':#[128:384,128:384,:] --->256 cv2.imwrite(os.path.join(train_A_path,'%05d' % i+suffix+'.jpg'), mosaic_img) cv2.imwrite(os.path.join(train_B_path,'%05d' % i+suffix+'.jpg'), img) - else: + if MASK: + cv2.imwrite(os.path.join(mask_path,'%05d' % i+suffix+'.png'), mask) + elif MOD == 'pix2pix': merge_img = impro.makedataset(mosaic_img, img) cv2.imwrite(os.path.join(train_path,'%05d' % i+suffix+'.jpg'), merge_img) - if MASK: + elif MOD == 'mosaic': + cv2.imwrite(os.path.join(mosaic_path,'%05d' % i+suffix+'.jpg'), mosaic_img) + cv2.imwrite(os.path.join(ori_path,'%05d' % i+suffix+'.jpg'), img) cv2.imwrite(os.path.join(mask_path,'%05d' % i+suffix+'.png'), mask) - print('\r','Proc/all:'+str(i)+'/'+str(len(img_names)),util.get_bar(100*i/len(img_names),num=40),end='') + + print('\r','Proc/all:'+str(i)+'/'+str(len(img_paths)),util.get_bar(100*i/len(img_paths),num=40),end='') except Exception as e: - print(img_name,e) + print(img_path,e) diff --git a/models/runmodel.py b/models/runmodel.py index 31a4f8c453784755cc7fe2977d08166da16588d8..993a64e5feaf1cfdc7fb522c39fa72373f7a5b15 100755 --- a/models/runmodel.py +++ b/models/runmodel.py @@ -5,7 +5,7 @@ from util import mosaic from util import data import torch -def run_unet(img,net,size = 128,use_gpu = True): +def run_unet(img,net,size = 224,use_gpu = True): img=impro.image2folat(img,3) img=img.reshape(1,3,size,size) img = torch.from_numpy(img) @@ -16,12 +16,12 @@ def run_unet(img,net,size = 128,use_gpu = True): pred = pred.reshape(size,size).astype('uint8') return pred -def run_unet_rectim(img,net,size = 128,use_gpu = True): +def run_unet_rectim(img,net,size = 224,use_gpu = True): img = impro.resize(img,size) - img1,img2 = impro.spiltimage(img) - mask1 = run_unet(img1,net,size = 128,use_gpu = use_gpu) - mask2 = run_unet(img2,net,size = 128,use_gpu = use_gpu) - mask = impro.mergeimage(mask1,mask2,img) + img1,img2 = impro.spiltimage(img,size) + mask1 = run_unet(img1,net,size,use_gpu = use_gpu) + mask2 = run_unet(img2,net,size,use_gpu = use_gpu) + mask = impro.mergeimage(mask1,mask2,img,size) return mask def run_pix2pix(img,net,opt): @@ -42,8 +42,9 @@ def get_ROI_position(img,net,opt): def get_mosaic_position(img_origin,net_mosaic_pos,opt,threshold = 128 ): mask = run_unet_rectim(img_origin,net_mosaic_pos,use_gpu = opt.use_gpu) - mask = impro.mask_threshold(mask,10,threshold) + mask_1 = mask.copy() + mask = impro.mask_threshold(mask,20,threshold) x,y,size,area = impro.boundingSquare(mask,Ex_mul=1.5) - rat = min(img_origin.shape[:2])/128.0 + rat = min(img_origin.shape[:2])/224.0 x,y,size = int(rat*x),int(rat*y),int(rat*size) - return x,y,size,mask \ No newline at end of file + return x,y,size,mask_1 \ No newline at end of file diff --git a/models/unet_model.py b/models/unet_model.py index a0b5870bfbcdaedb4446c406e999d40cf829b1d2..de16f646f3b40d2a7a39e5a008974ac9c0aa2dd0 100755 --- a/models/unet_model.py +++ b/models/unet_model.py @@ -31,4 +31,4 @@ class UNet(nn.Module): x = self.up3(x, x2) x = self.up4(x, x1) x = self.outc(x) - return torch.Tanh(x) \ No newline at end of file + return x \ No newline at end of file diff --git a/models/unet_parts.py b/models/unet_parts.py index 80165e5c36f992c14e8fd10f4092fc9d5fe72fe2..2d93833babf7f16e753571517c7e3925f7d80b0d 100755 --- a/models/unet_parts.py +++ b/models/unet_parts.py @@ -90,7 +90,12 @@ class up(nn.Module): class outconv(nn.Module): def __init__(self, in_ch, out_ch): super(outconv, self).__init__() - self.conv = nn.Conv2d(in_ch, out_ch, 1) + self.conv = nn.Sequential( + nn.Conv2d(in_ch, out_ch, 1), + nn.Sigmoid() + ) + + def forward(self, x): x = self.conv(x) diff --git a/train/add/train.py b/train/add/train.py index 4629e95fb7d8b673a9ad875d3095739c32b6b841..410573c3aec2aabd65e5be9896ab2b36e89588a7 100644 --- a/train/add/train.py +++ b/train/add/train.py @@ -22,17 +22,19 @@ import torch.backends.cudnn as cudnn LR = 0.0002 EPOCHS = 100 -BATCHSIZE = 16 +BATCHSIZE = 8 LOADSIZE = 256 FINESIZE = 224 -CONTINUE = False +CONTINUE = True use_gpu = True -SAVE_FRE = 5 -cudnn.benchmark = False +SAVE_FRE = 1 +MAX_LOAD = 35000 +#cudnn.benchmark = True -dir_img = './datasets/av/origin_image/' -dir_mask = './datasets/av/mask/' -dir_checkpoint = 'checkpoints/' + +dir_img = './datasets/mosaic/mosaic/' +dir_mask = './datasets/mosaic/mask/' +dir_checkpoint = 'checkpoints/mosaic/' def Totensor(img,use_gpu=True): @@ -43,15 +45,15 @@ def Totensor(img,use_gpu=True): return img -def Toinputshape(imgs,masks,finesize): +def Toinputshape(imgs,masks,finesize,test_flag = False): batchsize = len(imgs) result_imgs=[];result_masks=[] for i in range(batchsize): # print(imgs[i].shape,masks[i].shape) - img,mask = data.random_transform_image(imgs[i], masks[i], finesize) + img,mask = data.random_transform_image(imgs[i], masks[i], finesize, test_flag) # print(img.shape,mask.shape) - mask = (mask.reshape(1,finesize,finesize)/255.0-0.5)/0.5 - img = (img.transpose((2, 0, 1))/255.0-0.5)/0.5 + mask = (mask.reshape(1,finesize,finesize)/255.0) + img = (img.transpose((2, 0, 1))/255.0) result_imgs.append(img) result_masks.append(mask) result_imgs = np.array(result_imgs) @@ -74,9 +76,10 @@ def batch_generator(images,masks,batchsize): def loadimage(dir_img,dir_mask,loadsize,eval_p): t1 = datetime.datetime.now() imgnames = os.listdir(dir_img) - # imgnames = imgnames[:100] - print('images num:',len(imgnames)) + # imgnames = imgnames[:100] random.shuffle(imgnames) + imgnames = imgnames[:MAX_LOAD] + print('load images:',len(imgnames)) imgnames = (f[:-4] for f in imgnames) images = [] masks = [] @@ -94,7 +97,7 @@ def loadimage(dir_img,dir_mask,loadsize,eval_p): return train_images,train_masks,eval_images,eval_masks - +util.makedirs(dir_checkpoint) print('loading data......') train_images,train_masks,eval_images,eval_masks = loadimage(dir_img,dir_mask,LOADSIZE,0.2) dataset_eval_images,dataset_eval_masks = batch_generator(eval_images,eval_masks,BATCHSIZE) @@ -104,6 +107,10 @@ dataset_train_images,dataset_train_masks = batch_generator(train_images,train_ma net = unet_model.UNet(n_channels = 3, n_classes = 1) +if CONTINUE: + if not os.path.isfile(os.path.join(dir_checkpoint,'last.pth')): + CONTINUE = False + print('can not load last.pth, training on init weight.') if CONTINUE: net.load_state_dict(torch.load(dir_checkpoint+'last.pth')) if use_gpu: @@ -117,6 +124,7 @@ criterion = nn.BCELoss() print('begin training......') for epoch in range(EPOCHS): + random_save = random.randint(0, len(dataset_train_images)) starttime = datetime.datetime.now() print('Epoch {}/{}.'.format(epoch + 1, EPOCHS)) @@ -139,15 +147,18 @@ for epoch in range(EPOCHS): optimizer.step() if i%100 == 0: - data.showresult(img,mask,mask_pred,os.path.join(dir_checkpoint,'result.png')) + data.showresult(img,mask,mask_pred,os.path.join(dir_checkpoint,'result.png'),True) + if i == random_save: + data.showresult(img,mask,mask_pred,os.path.join(dir_checkpoint,'epoch_'+str(epoch+1)+'.png'),True) # torch.cuda.empty_cache() # # net.eval() epoch_loss_eval = 0 with torch.no_grad(): + #net.eval() for i,(img,mask) in enumerate(zip(dataset_eval_images,dataset_eval_masks)): # print(epoch,i,img.shape,mask.shape) - img,mask = Toinputshape(img, mask, FINESIZE) + img,mask = Toinputshape(img, mask, FINESIZE,test_flag=True) img = Totensor(img,use_gpu) mask = Totensor(mask,use_gpu) mask_pred = net(img) @@ -164,5 +175,5 @@ for epoch in range(EPOCHS): if (epoch+1)%SAVE_FRE == 0: torch.save(net.cpu().state_dict(),dir_checkpoint+'epoch'+str(epoch+1)+'.pth') - data.showresult(img,mask,mask_pred,os.path.join(dir_checkpoint,'epoch_'+str(epoch+1)+'.png')) + print('network saved.') diff --git a/train/clean/train.py b/train/clean/train.py index 8c9bc5b36564cb467fa44d392ff5fd93aa29bf5e..e41801c7f52be3275dfb9d2a08263922817b87d4 100644 --- a/train/clean/train.py +++ b/train/clean/train.py @@ -21,19 +21,19 @@ ITER = 10000000 LR = 0.0002 beta1 = 0.5 use_gpu = True -use_gan = False -use_L2 = True +use_gan = True +use_L2 = False CONTINUE = True lambda_L1 = 100.0 lambda_gan = 1 SAVE_FRE = 10000 start_iter = 0 -finesize = 128 -loadsize = int(finesize*1.1) -batchsize = 8 +finesize = 256 +loadsize = int(finesize*1.2) +batchsize = 1 perload_num = 16 -savename = 'MosaicNet_batch' +savename = 'MosaicNet_instance_gan_256_D5' dir_checkpoint = 'checkpoints/'+savename util.makedirs(dir_checkpoint) @@ -57,10 +57,14 @@ loadmodel.show_paramsnumber(netG,'netG') # netG = unet_model.UNet(3*N+1, 3) if use_gan: #netD = pix2pix_model.define_D(3*2+1, 64, 'pixel', norm='instance') - netD = pix2pix_model.define_D(3*2+1, 64, 'basic', norm='instance') - #netD = pix2pix_model.define_D(3*2+1, 64, 'n_layers', n_layers_D=5, norm='instance', init_type='normal', init_gain=0.02, gpu_ids=[]) + #netD = pix2pix_model.define_D(3*2+1, 64, 'basic', norm='instance') + netD = pix2pix_model.define_D(3*2+1, 64, 'n_layers', n_layers_D=5, norm='instance') if CONTINUE: + if not os.path.isfile(os.path.join(dir_checkpoint,'last_G.pth')): + CONTINUE = False + print('can not load last_G, training on init weight.') +if CONTINUE: netG.load_state_dict(torch.load(os.path.join(dir_checkpoint,'last_G.pth'))) if use_gan: netD.load_state_dict(torch.load(os.path.join(dir_checkpoint,'last_D.pth'))) diff --git a/util/data.py b/util/data.py index 64746f0c5855cad671de6e19d544a0094261421c..b60a0d1a2f463829c7f2eff704e9b872913400b1 100755 --- a/util/data.py +++ b/util/data.py @@ -37,7 +37,7 @@ def im2tensor(image_numpy, imtype=np.uint8, gray=False,bgr2rgb = True, reshape = image_numpy = (image_numpy/255.0-0.5)/0.5 image_tensor = torch.from_numpy(image_numpy).float() if reshape: - image_tensor=image_tensor.reshape(1,1,h,w) + image_tensor = image_tensor.reshape(1,1,h,w) else: h, w ,ch = image_numpy.shape if bgr2rgb: @@ -52,7 +52,7 @@ def im2tensor(image_numpy, imtype=np.uint8, gray=False,bgr2rgb = True, reshape = image_numpy = image_numpy.transpose((2, 0, 1)) image_tensor = torch.from_numpy(image_numpy).float() if reshape: - image_tensor=image_tensor.reshape(1,ch,h,w) + image_tensor = image_tensor.reshape(1,ch,h,w) if use_gpu: image_tensor = image_tensor.cuda() return image_tensor @@ -91,7 +91,7 @@ def random_transform_video(src,target,finesize,N): return src,target -def random_transform_image(img,mask,finesize): +def random_transform_image(img,mask,finesize,test_flag = False): # randomsize = int(finesize*(1.2+0.2*random.random())+2) @@ -118,6 +118,9 @@ def random_transform_image(img,mask,finesize): # print(h,w,h_move,w_move) img_crop = img[h_move:h_move+finesize,w_move:w_move+finesize] mask_crop = mask[h_move:h_move+finesize,w_move:w_move+finesize] + + if test_flag: + return img_crop,mask_crop #random rotation if random.random()<0.2: @@ -143,12 +146,19 @@ def random_transform_image(img,mask,finesize): else: img = img[::-1,:,:] mask = mask[::-1,:] + + #random blur + if random.random()>0.5: + size_ran = random.uniform(0.5,1.5) + img = cv2.resize(img, (int(finesize*size_ran),int(finesize*size_ran))) + img = cv2.resize(img, (finesize,finesize)) + #img = cv2.blur(img, (random.randint(1,3), random.randint(1,3))) return img,mask -def showresult(img1,img2,img3,name): +def showresult(img1,img2,img3,name,is0_1 = False): size = img1.shape[3] showimg=np.zeros((size,size*3,3)) - showimg[0:size,0:size] = tensor2im(img1,rgb2bgr = False, is0_1 = False) - showimg[0:size,size:size*2] = tensor2im(img2,rgb2bgr = False, is0_1 = False) - showimg[0:size,size*2:size*3] = tensor2im(img3,rgb2bgr = False, is0_1 = False) + showimg[0:size,0:size] = tensor2im(img1,rgb2bgr = False, is0_1 = is0_1) + showimg[0:size,size:size*2] = tensor2im(img2,rgb2bgr = False, is0_1 = is0_1) + showimg[0:size,size*2:size*3] = tensor2im(img3,rgb2bgr = False, is0_1 = is0_1) cv2.imwrite(name, showimg) diff --git a/util/image_processing.py b/util/image_processing.py index d03e7717360aeafbf1e9f475d9c426543fcb39ff..fdb840c59f79250cdad21d6f402b4b4132db1d68 100755 --- a/util/image_processing.py +++ b/util/image_processing.py @@ -84,9 +84,9 @@ def image2folat(img,ch): img = (img.transpose((2, 0, 1))/255.0).astype(np.float32) return img -def spiltimage(img): +def spiltimage(img,size = 128): h, w = img.shape[:2] - size = min(h,w) + # size = min(h,w) if w >= h: img1 = img[:,0:size] img2 = img[:,w-size:w] @@ -96,12 +96,12 @@ def spiltimage(img): return img1,img2 -def mergeimage(img1,img2,orgin_image): +def mergeimage(img1,img2,orgin_image,size = 128): h, w = orgin_image.shape[:2] new_img1 = np.zeros((h,w), dtype = "uint8") new_img2 = np.zeros((h,w), dtype = "uint8") - size = min(h,w) + # size = min(h,w) if w >= h: new_img1[:,0:size]=img1 new_img2[:,w-size:w]=img2