diff --git a/.gitignore b/.gitignore index 03ab31855bc6f5fff72cb3a63b1e323593d9b219..973701941c8c07162d56b2656be17c5ea6bdf5e2 100644 --- a/.gitignore +++ b/.gitignore @@ -153,6 +153,7 @@ result/ /python_test.py /pretrained_models_old /deepmosaic_window +/sftp-config.json #./make_datasets /make_datasets/video /make_datasets/tmp @@ -160,6 +161,9 @@ result/ /make_datasets/datasets /make_datasets/dataset /make_datasets/datasets_img +/make_datasets/videos +#./models +/models/videoHD_model.py #./train /train/clean/dataset #mediafile @@ -177,4 +181,5 @@ result/ *.rmvb *.JPG *.MP4 -*.JPEG \ No newline at end of file +*.JPEG +*.exe \ No newline at end of file diff --git a/README.md b/README.md index 6ac5fb35e288262f4639dbdc606b29b2b7a6f99b..15977a2819e6f1e469117d76a40e5aed069ef58f 100755 --- a/README.md +++ b/README.md @@ -48,17 +48,17 @@ git clone https://github.com/HypoX64/DeepMosaics cd DeepMosaics ``` #### Get pre_trained models and test video -You can download pre_trained models and test video and replace the files in the project.
+You can download pre_trained models and put them into './pretrained_models'.
[[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ) #### Simple example * Add Mosaic (output video will save in './result') ```bash -python3 deepmosaic.py +python3 deepmosaic.py --media_path ./imgs/ruoruo.jpg --model_path ./pretrained_models/mosaic/add_face.pth --use_gpu -1 ``` * Clean Mosaic (output video will save in './result') ```bash -python3 deepmosaic.py --mode clean --model_path ./pretrained_models/clean_hands_unet_128.pth --media_path ./result/hands_test_AddMosaic.mp4 +python3 deepmosaic.py --media_path ./result/ruoruo_add.jpg --model_path ./pretrained_models/mosaic/clean_face_HD.pth --use_gpu -1 ``` #### More parameters If you want to test other image or video, please refer to this file. diff --git a/README_CN.md b/README_CN.md index e6661f0cd54853a791862ed4b987cf8cbea69627..b4bef9bc188dad0d4d5eb3251e944c36b2707cb4 100644 --- a/README_CN.md +++ b/README_CN.md @@ -49,18 +49,18 @@ git clone https://github.com/HypoX64/DeepMosaics cd DeepMosaics ``` -#### 下载测试视频以及预训练模型 -可以通过以下两种方法下载测试视频以及预训练模型,并将他们置于项目文件夹中.
+#### 下载预训练模型 +可以通过以下两种方法下载预训练模型,并将他们置于'./pretrained_models'文件夹中.
[[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ)
#### 简单的例子 -* 为视频添加马赛克,例子中认为手是需要打码的区域 ,可以通过切换预训练模型切换自动打码区域(输出结果将储存到 './result') +* 为视频添加马赛克,例子中认为脸是需要打码的区域 ,可以通过切换预训练模型切换自动打码区域(输出结果将储存到 './result') ```bash -python3 deepmosaic.py +python3 deepmosaic.py --media_path ./imgs/ruoruo.jpg --model_path ./pretrained_models/mosaic/add_face.pth --use_gpu -1 ``` * 将视频中的马赛克移除,对于不同的打码物体需要使用对应的预训练模型进行马赛克消除(输出结果将储存到 './result') ```bash -python3 deepmosaic.py --mode clean --model_path ./pretrained_models/clean_hands_unet_128.pth --media_path ./result/hands_test_AddMosaic.mp4 +python3 deepmosaic.py --media_path ./result/ruoruo_add.jpg --model_path ./pretrained_models/mosaic/clean_face_HD.pth --use_gpu -1 ``` #### 更多的参数 如果想要测试其他的图片或视频,请参照以下文件输入参数. diff --git a/cores/core.py b/cores/core.py index b320dbd1d0a3113a79f2f3fc788346c218a95470..e5ed377cf784d0f0d8c437eaf2d673d6c45df209 100644 --- a/cores/core.py +++ b/cores/core.py @@ -6,15 +6,23 @@ from models import runmodel,loadmodel from util import mosaic,util,ffmpeg,filt,data from util import image_processing as impro +''' +---------------------Video Init--------------------- +''' def video_init(opt,path): util.clean_tempfiles() - fps = ffmpeg.get_video_infos(path)[0] + fps,endtime,height,width = ffmpeg.get_video_infos(path) + if opt.fps !=0: + fps = opt.fps ffmpeg.video2voice(path,'./tmp/voice_tmp.mp3') - ffmpeg.video2image(path,'./tmp/video2image/output_%05d.'+opt.tempimage_type) + ffmpeg.video2image(path,'./tmp/video2image/output_%05d.'+opt.tempimage_type,fps) imagepaths=os.listdir('./tmp/video2image') imagepaths.sort() - return fps,imagepaths + return fps,imagepaths,height,width +''' +---------------------Add Mosaic--------------------- +''' def addmosaic_img(opt,netS): path = opt.media_path print('Add Mosaic:',path) @@ -25,7 +33,7 @@ def addmosaic_img(opt,netS): def addmosaic_video(opt,netS): path = opt.media_path - fps,imagepaths = video_init(opt,path) + fps,imagepaths = video_init(opt,path)[:2] # get position positions = [] for i,imagepath in enumerate(imagepaths,1): @@ -33,7 +41,7 @@ def addmosaic_video(opt,netS): mask,x,y,area = runmodel.get_ROI_position(img,netS,opt) positions.append([x,y,area]) cv2.imwrite(os.path.join('./tmp/ROI_mask',imagepath),mask) - print('\r','Find ROI location:'+str(i)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=40),end='') + print('\r','Find ROI location:'+str(i)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=35),end='') print('\nOptimize ROI locations...') mask_index = filt.position_medfilt(np.array(positions), 7) @@ -44,13 +52,16 @@ def addmosaic_video(opt,netS): if impro.mask_area(mask)>100: img = mosaic.addmosaic(img, mask, opt) cv2.imwrite(os.path.join('./tmp/addmosaic_image',imagepaths[i]),img) - print('\r','Add Mosaic:'+str(i+1)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=40),end='') + print('\r','Add Mosaic:'+str(i+1)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=35),end='') print() ffmpeg.image2video( fps, './tmp/addmosaic_image/output_%05d.'+opt.tempimage_type, './tmp/voice_tmp.mp3', os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_add.mp4')) +''' +---------------------Style Transfer--------------------- +''' def styletransfer_img(opt,netG): print('Style Transfer_img:',opt.media_path) img = impro.imread(opt.media_path) @@ -61,13 +72,13 @@ def styletransfer_img(opt,netG): def styletransfer_video(opt,netG): path = opt.media_path positions = [] - fps,imagepaths = video_init(opt,path) + fps,imagepaths = video_init(opt,path)[:2] for i,imagepath in enumerate(imagepaths,1): img = impro.imread(os.path.join('./tmp/video2image',imagepath)) img = runmodel.run_styletransfer(opt, netG, img) cv2.imwrite(os.path.join('./tmp/style_transfer',imagepath),img) - print('\r','Transfer:'+str(i)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=40),end='') + print('\r','Transfer:'+str(i)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=35),end='') print() suffix = os.path.basename(opt.model_path).replace('.pth','').replace('style_','') ffmpeg.image2video( fps, @@ -75,6 +86,24 @@ def styletransfer_video(opt,netG): './tmp/voice_tmp.mp3', os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_'+suffix+'.mp4')) +''' +---------------------Clean Mosaic--------------------- +''' +def get_mosaic_positions(opt,netM,imagepaths,savemask=True): + # get mosaic position + positions = [] + for i,imagepath in enumerate(imagepaths,1): + img_origin = impro.imread(os.path.join('./tmp/video2image',imagepath)) + x,y,size,mask = runmodel.get_mosaic_position(img_origin,netM,opt) + if savemask: + cv2.imwrite(os.path.join('./tmp/mosaic_mask',imagepath), mask) + positions.append([x,y,size]) + print('\r','Find mosaic location:'+str(i)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=35),end='') + print('\nOptimize mosaic locations...') + positions =np.array(positions) + for i in range(3):positions[:,i] = filt.medfilt(positions[:,i],opt.medfilt_num) + return positions + def cleanmosaic_img(opt,netG,netM): path = opt.media_path @@ -85,7 +114,10 @@ def cleanmosaic_img(opt,netG,netM): img_result = img_origin.copy() if size != 0 : img_mosaic = img_origin[y-size:y+size,x-size:x+size] - img_fake = runmodel.run_pix2pix(img_mosaic,netG,opt) + if opt.traditional: + img_fake = runmodel.traditional_cleaner(img_mosaic,opt) + else: + img_fake = runmodel.run_pix2pix(img_mosaic,netG,opt) img_result = impro.replace_mosaic(img_origin,img_fake,x,y,size,opt.no_feather) else: print('Do not find mosaic') @@ -93,19 +125,8 @@ def cleanmosaic_img(opt,netG,netM): def cleanmosaic_video_byframe(opt,netG,netM): path = opt.media_path - fps,imagepaths = video_init(opt,path) - positions = [] - # get position - for i,imagepath in enumerate(imagepaths,1): - img_origin = impro.imread(os.path.join('./tmp/video2image',imagepath)) - x,y,size = runmodel.get_mosaic_position(img_origin,netM,opt)[:3] - positions.append([x,y,size]) - print('\r','Find mosaic location:'+str(i)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=40),end='') - - print('\nOptimize mosaic locations...') - positions =np.array(positions) - for i in range(3):positions[:,i] = filt.medfilt(positions[:,i],opt.medfilt_num) - + fps,imagepaths = video_init(opt,path)[:2] + positions = get_mosaic_positions(opt,netM,imagepaths,savemask=False) # clean mosaic for i,imagepath in enumerate(imagepaths,0): x,y,size = positions[i][0],positions[i][1],positions[i][2] @@ -113,10 +134,13 @@ def cleanmosaic_video_byframe(opt,netG,netM): img_result = img_origin.copy() if size != 0: img_mosaic = img_origin[y-size:y+size,x-size:x+size] - img_fake = runmodel.run_pix2pix(img_mosaic,netG,opt) - img_result = impro.replace_mosaic(img_origin,img_fake,x,y,size,opt.no_feather) + if opt.traditional: + img_fake = runmodel.traditional_cleaner(img_mosaic,opt) + else: + img_fake = runmodel.run_pix2pix(img_mosaic,netG,opt) + img_result = impro.replace_mosaic(img_origin,img_fake,x,y,size,opt.no_feather) cv2.imwrite(os.path.join('./tmp/replace_mosaic',imagepath),img_result) - print('\r','Clean Mosaic:'+str(i+1)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=40),end='') + print('\r','Clean Mosaic:'+str(i+1)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=35),end='') print() ffmpeg.image2video( fps, './tmp/replace_mosaic/output_%05d.'+opt.tempimage_type, @@ -127,48 +151,39 @@ def cleanmosaic_video_fusion(opt,netG,netM): path = opt.media_path N = 25 INPUT_SIZE = 128 - fps,imagepaths = video_init(opt,path) - positions = [] - # get position - for i,imagepath in enumerate(imagepaths,1): - img_origin = impro.imread(os.path.join('./tmp/video2image',imagepath)) - # x,y,size = runmodel.get_mosaic_position(img_origin,net_mosaic_pos,opt)[:3] - x,y,size,mask = runmodel.get_mosaic_position(img_origin,netM,opt) - cv2.imwrite(os.path.join('./tmp/mosaic_mask',imagepath), mask) - positions.append([x,y,size]) - print('\r','Find mosaic location:'+str(i)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=40),end='') - print('\nOptimize mosaic locations...') - positions =np.array(positions) - for i in range(3):positions[:,i] = filt.medfilt(positions[:,i],opt.medfilt_num) - + fps,imagepaths,height,width = video_init(opt,path) + positions = get_mosaic_positions(opt,netM,imagepaths,savemask=True) + # clean mosaic + img_pool = np.zeros((height,width,3*N), dtype='uint8') for i,imagepath in enumerate(imagepaths,0): x,y,size = positions[i][0],positions[i][1],positions[i][2] - img_origin = impro.imread(os.path.join('./tmp/video2image',imagepath)) + + # image read stream mask = cv2.imread(os.path.join('./tmp/mosaic_mask',imagepath),0) + if i==0 : + for j in range(0,N): + img_pool[:,:,j*3:(j+1)*3] = impro.imread(os.path.join('./tmp/video2image',imagepaths[np.clip(i+j-12,0,len(imagepaths)-1)])) + else: + img_pool[:,:,0:(N-1)*3] = img_pool[:,:,3:N*3] + img_pool[:,:,(N-1)*3:] = impro.imread(os.path.join('./tmp/video2image',imagepaths[np.clip(i+12,0,len(imagepaths)-1)])) + img_origin = img_pool[:,:,int((N-1)/2)*3:(int((N-1)/2)+1)*3] - if size==0: + if size==0: # can not find mosaic, cv2.imwrite(os.path.join('./tmp/replace_mosaic',imagepath),img_origin) else: + mosaic_input = np.zeros((INPUT_SIZE,INPUT_SIZE,3*N+1), dtype='uint8') - for j in range(0,N): - img = impro.imread(os.path.join('./tmp/video2image',imagepaths[np.clip(i+j-12,0,len(imagepaths)-1)])) - img = img[y-size:y+size,x-size:x+size] - img = impro.resize(img,INPUT_SIZE) - mosaic_input[:,:,j*3:(j+1)*3] = img - mask = impro.resize(mask,np.min(img_origin.shape[:2])) - mask = mask[y-size:y+size,x-size:x+size] - mask = impro.resize(mask, INPUT_SIZE) - mosaic_input[:,:,-1] = mask + mosaic_input[:,:,0:N*3] = impro.resize(img_pool[y-size:y+size,x-size:x+size,:], INPUT_SIZE) + mask = impro.resize(mask,np.min(img_origin.shape[:2]))[y-size:y+size,x-size:x+size] + mosaic_input[:,:,-1] = impro.resize(mask, INPUT_SIZE) + mosaic_input = data.im2tensor(mosaic_input,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False,is0_1 = False) unmosaic_pred = netG(mosaic_input) - - #unmosaic_pred = (unmosaic_pred.cpu().detach().numpy()*255)[0] - #img_fake = unmosaic_pred.transpose((1, 2, 0)) img_fake = data.tensor2im(unmosaic_pred,rgb2bgr = False ,is0_1 = False) img_result = impro.replace_mosaic(img_origin,img_fake,x,y,size,opt.no_feather) cv2.imwrite(os.path.join('./tmp/replace_mosaic',imagepath),img_result) - print('\r','Clean Mosaic:'+str(i+1)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=40),end='') + print('\r','Clean Mosaic:'+str(i+1)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=35),end='') print() ffmpeg.image2video( fps, './tmp/replace_mosaic/output_%05d.'+opt.tempimage_type, diff --git a/cores/options.py b/cores/options.py index b621a026d2a0c813235ed35113f8700db3ed238d..cf31aacf7fb1a2319e89a29f6be75635a2a8865d 100644 --- a/cores/options.py +++ b/cores/options.py @@ -10,15 +10,16 @@ class Options(): def initialize(self): #base - self.parser.add_argument('--use_gpu',type=int,default=1, help='if 0 or -1, do not use gpu') + self.parser.add_argument('--use_gpu',type=int,default=0, help='if -1, do not use gpu') # self.parser.add_argument('--use_gpu', action='store_true', help='if input it, use gpu') - self.parser.add_argument('--media_path', type=str, default='./hands_test.mp4',help='your videos or images path') + self.parser.add_argument('--media_path', type=str, default='./imgs/ruoruo.jpg',help='your videos or images path') self.parser.add_argument('--mode', type=str, default='auto',help='auto | add | clean | style') - self.parser.add_argument('--model_path', type=str, default='./pretrained_models/add_hands_128.pth',help='pretrained model path') + self.parser.add_argument('--model_path', type=str, default='./pretrained_models/mosaic/add_face.pth',help='pretrained model path') self.parser.add_argument('--result_dir', type=str, default='./result',help='output media will be saved here') self.parser.add_argument('--tempimage_type', type=str, default='png',help='type of temp image, png | jpg, png is better but occupy more storage space') self.parser.add_argument('--netG', type=str, default='auto', help='select model to use for netG(Clean mosaic and Transfer style) -> auto | unet_128 | unet_256 | resnet_9blocks | HD | video') + self.parser.add_argument('--fps', type=int, default=0,help='read and output fps, if 0-> origin') self.parser.add_argument('--output_size', type=int, default=0,help='size of output file,if 0 -> origin') #AddMosaic @@ -29,8 +30,11 @@ class Options(): #CleanMosaic self.parser.add_argument('--mosaic_position_model_path', type=str, default='auto',help='name of model use to find mosaic position') + self.parser.add_argument('--traditional', action='store_true', help='if true, use traditional image processing methods to clean mosaic') + self.parser.add_argument('--tr_blur', type=int, default=10, help='ksize of blur when using traditional method, it will affect final quality') + self.parser.add_argument('--tr_down', type=int, default=10, help='downsample when using traditional method,it will affect final quality') self.parser.add_argument('--no_feather', action='store_true', help='if true, no edge feather and color correction, but run faster') - self.parser.add_argument('--no_large_area', action='store_true', help='if true, do not find the largest mosaic area') + self.parser.add_argument('--all_mosaic_area', action='store_true', help='if true, find all mosaic area, else only find the largest area') self.parser.add_argument('--medfilt_num', type=int, default=11,help='medfilt window of mosaic movement in the video') self.parser.add_argument('--ex_mult', type=str, default='auto',help='mosaic area expansion') @@ -50,17 +54,16 @@ class Options(): model_name = os.path.basename(self.opt.model_path) - if torch.cuda.is_available() and self.opt.use_gpu > 0: + if torch.cuda.is_available() and self.opt.use_gpu > -1: self.opt.use_gpu = True else: self.opt.use_gpu = False - if self.opt.mode == 'auto': - if 'add' in model_name: - self.opt.mode = 'add' - elif 'clean' in model_name: + if 'clean' in model_name or self.opt.traditional: self.opt.mode = 'clean' + elif 'add' in model_name: + self.opt.mode = 'add' elif 'style' in model_name or 'edges' in model_name: self.opt.mode = 'style' else: diff --git a/deepmosaic.py b/deepmosaic.py index c75a06897dc9cd131fa31a02443682c898982c64..dec510296deeb08d011adcaa4ea064a9d9782039 100644 --- a/deepmosaic.py +++ b/deepmosaic.py @@ -25,7 +25,9 @@ def main(): elif opt.mode == 'clean': netM = loadmodel.unet_clean(opt) - if opt.netG == 'video': + if opt.traditional: + netG = None + elif opt.netG == 'video': netG = loadmodel.video(opt) else: netG = loadmodel.pix2pix(opt) @@ -35,7 +37,7 @@ def main(): if util.is_img(file): core.cleanmosaic_img(opt,netG,netM) elif util.is_video(file): - if opt.netG == 'video': + if opt.netG == 'video' and not opt.traditional: core.cleanmosaic_video_fusion(opt,netG,netM) else: core.cleanmosaic_video_byframe(opt,netG,netM) @@ -56,12 +58,12 @@ def main(): util.clean_tempfiles(tmp_init = False) -# main() -if __name__ == '__main__': - try: - main() - except Exception as e: - print('Error:',e) - input('Please press any key to exit.\n') - util.clean_tempfiles(tmp_init = False) - exit(0) +main() +# if __name__ == '__main__': +# try: +# main() +# except Exception as e: +# print('Error:',e) +# input('Please press any key to exit.\n') +# util.clean_tempfiles(tmp_init = False) +# exit(0) diff --git a/make_datasets/use_irregular_holes_mask_make_dataset.py b/make_datasets/use_irregular_holes_make_dataset.py similarity index 100% rename from make_datasets/use_irregular_holes_mask_make_dataset.py rename to make_datasets/use_irregular_holes_make_dataset.py diff --git a/models/runmodel.py b/models/runmodel.py index ff156f5a77ea83754a30dfd85e9674b9579e389e..11b14dd2878c69b59c7460b72201b7c8108bcdb7 100755 --- a/models/runmodel.py +++ b/models/runmodel.py @@ -8,22 +8,10 @@ import torch import numpy as np def run_unet(img,net,size = 224,use_gpu = True): - img=impro.image2folat(img,3) - img=img.reshape(1,3,size,size) - img = torch.from_numpy(img) - if use_gpu: - img=img.cuda() - pred = net(img) - pred = (pred.cpu().detach().numpy()*255) - pred = pred.reshape(size,size).astype('uint8') - return pred - -def run_unet_rectim(img,net,size = 224,use_gpu = True): img = impro.resize(img,size) - img1,img2 = impro.spiltimage(img,size) - mask1 = run_unet(img1,net,size,use_gpu = use_gpu) - mask2 = run_unet(img2,net,size,use_gpu = use_gpu) - mask = impro.mergeimage(mask1,mask2,img,size) + img = data.im2tensor(img,use_gpu = use_gpu, bgr2rgb = False,use_transform = False , is0_1 = True) + mask = net(img) + mask = data.tensor2im(mask, gray=True,rgb2bgr = False, is0_1 = True) return mask def run_pix2pix(img,net,opt): @@ -36,6 +24,13 @@ def run_pix2pix(img,net,opt): img_fake = data.tensor2im(img_fake) return img_fake +def traditional_cleaner(img,opt): + h,w = img.shape[:2] + img = cv2.blur(img, (opt.tr_blur,opt.tr_blur)) + img = img[::opt.tr_down,::opt.tr_down,:] + img = cv2.resize(img, (w,h),interpolation=cv2.INTER_LANCZOS4) + return img + def run_styletransfer(opt, net, img): if opt.output_size != 0: @@ -60,23 +55,22 @@ def run_styletransfer(opt, net, img): return img img = data.im2tensor(img,use_gpu=opt.use_gpu,gray=True,use_transform = False,is0_1 = False) else: - img = data.im2tensor(img,use_gpu=opt.use_gpu) + img = data.im2tensor(img,use_gpu=opt.use_gpu,gray=False,use_transform = True) img = net(img) img = data.tensor2im(img) return img def get_ROI_position(img,net,opt): - mask = run_unet_rectim(img,net,use_gpu = opt.use_gpu) + mask = run_unet(img,net,size=224,use_gpu = opt.use_gpu) mask = impro.mask_threshold(mask,opt.mask_extend,opt.mask_threshold) x,y,halfsize,area = impro.boundingSquare(mask, 1) return mask,x,y,area def get_mosaic_position(img_origin,net_mosaic_pos,opt,threshold = 128 ): - mask = run_unet_rectim(img_origin,net_mosaic_pos,use_gpu = opt.use_gpu) - #mask_1 = mask.copy() + mask = run_unet(img_origin,net_mosaic_pos,size=224,use_gpu = opt.use_gpu) mask = impro.mask_threshold(mask,30,threshold) - if not opt.no_large_area: - mask = impro.find_best_ROI(mask) + if not opt.all_mosaic_area: + mask = impro.find_mostlikely_ROI(mask) x,y,size,area = impro.boundingSquare(mask,Ex_mul=opt.ex_mult) rat = min(img_origin.shape[:2])/224.0 x,y,size = int(rat*x),int(rat*y),int(rat*size) diff --git a/models/video_model_unet.py b/models/video_model_unet.py deleted file mode 100644 index 8e338b89df83dd425b3e291dead5216998b84a8f..0000000000000000000000000000000000000000 --- a/models/video_model_unet.py +++ /dev/null @@ -1,108 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F -from .unet_parts import * - - -class conv_3d(nn.Module): - def __init__(self,inchannel,outchannel,kernel_size=3,stride=2,padding=1): - super(conv_3d, self).__init__() - self.conv = nn.Sequential( - nn.Conv3d(inchannel, outchannel, kernel_size=kernel_size, stride=stride, padding=padding, bias=False), - nn.BatchNorm3d(outchannel), - nn.ReLU(inplace=True), - ) - - def forward(self, x): - x = self.conv(x) - return x - - -class encoder_3d(nn.Module): - def __init__(self,in_channel): - super(encoder_3d, self).__init__() - self.down1 = conv_3d(1, 64, 3, 2, 1) - self.down2 = conv_3d(64, 128, 3, 2, 1) - self.down3 = conv_3d(128, 256, 3, 2, 1) - self.down4 = conv_3d(256, 512, 3, 2, 1) - self.conver2d = nn.Sequential( - nn.Conv2d(int(in_channel/16)+1, 1, kernel_size=3, stride=1, padding=1, bias=False), - nn.BatchNorm2d(1), - nn.ReLU(inplace=True), - ) - - - def forward(self, x): - x = x.view(x.size(0),1,x.size(1),x.size(2),x.size(3)) - x = self.down1(x) - x = self.down2(x) - x = self.down3(x) - x = self.down4(x) - x = x.view(x.size(1),x.size(2),x.size(3),x.size(4)) - x = self.conver2d(x) - x = x.view(x.size(1),x.size(0),x.size(2),x.size(3)) - # print(x.size()) - # x = self.avgpool(x) - return x - - - - -class encoder_2d(nn.Module): - def __init__(self, in_channel): - super(encoder_2d, self).__init__() - self.inc = inconv(in_channel, 64) - self.down1 = down(64, 128) - self.down2 = down(128, 256) - self.down3 = down(256, 512) - self.down4 = down(512, 512) - - def forward(self, x): - x1 = self.inc(x) - x2 = self.down1(x1) - x3 = self.down2(x2) - x4 = self.down3(x3) - x5 = self.down4(x4) - - return x1,x2,x3,x4,x5 - -class decoder_2d(nn.Module): - def __init__(self, out_channel): - super(decoder_2d, self).__init__() - self.up1 = up(1024, 256,bilinear=False) - self.up2 = up(512, 128,bilinear=False) - self.up3 = up(256, 64,bilinear=False) - self.up4 = up(128, 64,bilinear=False) - self.outc = outconv(64, out_channel) - - def forward(self,x5,x4,x3,x2,x1): - x = self.up1(x5, x4) - x = self.up2(x, x3) - x = self.up3(x, x2) - x = self.up4(x, x1) - x = self.outc(x) - - return x - - -class HypoNet(nn.Module): - def __init__(self, in_channel, out_channel): - super(HypoNet, self).__init__() - - self.encoder_2d = encoder_2d(4) - self.encoder_3d = encoder_3d(in_channel) - self.decoder_2d = decoder_2d(out_channel) - - def forward(self, x): - - N = int((x.size()[1])/3) - x_2d = torch.cat((x[:,int((N-1)/2)*3:(int((N-1)/2)+1)*3,:,:], x[:,N-1:N,:,:]), 1) - # print(x_2d.size()) - x_3d = self.encoder_3d(x) - - x1,x2,x3,x4,x5 = self.encoder_2d(x_2d) - x5 = x5 + x_3d - x_2d = self.decoder_2d(x5,x4,x3,x2,x1) - - return x_2d - diff --git a/train/clean/train.py b/train/clean/train.py index 587e55b1219114c70c17e2c0595d1916a446a3c3..962aeeb12cb13e1df7f0a36395932adea649f850 100644 --- a/train/clean/train.py +++ b/train/clean/train.py @@ -12,37 +12,41 @@ sys.path.append("../..") from util import mosaic,util,ffmpeg,filt,data from util import image_processing as impro from cores import Options -from models import pix2pix_model,pix2pixHD_model,video_model,unet_model,loadmodel +from models import pix2pix_model,pix2pixHD_model,video_model,unet_model,loadmodel,videoHD_model from matplotlib import pyplot as plt import torch.backends.cudnn as cudnn -N = 25 -ITER = 10000000 -LR = 0.0002 -beta1 = 0.5 -use_gpu = True -use_gan = False -use_L2 = False -CONTINUE = True -lambda_L1 = 100.0 -lambda_gan = 0.5 +opt = Options() +opt.parser.add_argument('--N',type=int,default=25, help='') +opt.parser.add_argument('--lr',type=float,default=0.0002, help='') +opt.parser.add_argument('--beta1',type=float,default=0.5, help='') +opt.parser.add_argument('--gan', action='store_true', help='if input it, use gan') +opt.parser.add_argument('--l2', action='store_true', help='if input it, use L2 loss') +opt.parser.add_argument('--lambda_L1',type=float,default=100, help='') +opt.parser.add_argument('--lambda_gan',type=float,default=1, help='') +opt.parser.add_argument('--finesize',type=int,default=256, help='') +opt.parser.add_argument('--loadsize',type=int,default=286, help='') +opt.parser.add_argument('--batchsize',type=int,default=1, help='') +opt.parser.add_argument('--perload_num',type=int,default=16, help='') +opt.parser.add_argument('--norm',type=str,default='instance', help='') -SAVE_FRE = 10000 -start_iter = 0 -finesize = 256 -loadsize = int(finesize*1.2) -batchsize = 6 -perload_num = 16 -# savename = 'MosaicNet_instance_gan_256_hdD' -savename = 'MosaicNet_instance_test' -dir_checkpoint = 'checkpoints/'+savename +opt.parser.add_argument('--maxiter',type=int,default=10000000, help='') +opt.parser.add_argument('--savefreq',type=int,default=10000, help='') +opt.parser.add_argument('--startiter',type=int,default=0, help='') +opt.parser.add_argument('--continuetrain', action='store_true', help='') +opt.parser.add_argument('--savename',type=str,default='MosaicNet', help='') + +opt = opt.getparse() +dir_checkpoint = os.path.join('checkpoints/',opt.savename) util.makedirs(dir_checkpoint) +util.writelog(os.path.join(dir_checkpoint,'loss.txt'), + str(time.asctime(time.localtime(time.time())))+'\n'+util.opt2str(opt)) +N = opt.N loss_sum = [0.,0.,0.,0.] loss_plot = [[],[]] item_plot = [] -opt = Options().getparse() videos = os.listdir('./dataset') videos.sort() lengths = [] @@ -53,39 +57,39 @@ for video in videos: #unet_128 #resnet_9blocks #netG = pix2pix_model.define_G(3*N+1, 3, 128, 'resnet_6blocks', norm='instance',use_dropout=True, init_type='normal', gpu_ids=[]) -netG = video_model.MosaicNet(3*N+1, 3, norm='instance') +netG = videoHD_model.MosaicNet(3*N+1, 3, norm=opt.norm) loadmodel.show_paramsnumber(netG,'netG') # netG = unet_model.UNet(3*N+1, 3) -if use_gan: - netD = pix2pixHD_model.define_D(6, 64, 3, norm='instance', use_sigmoid=False, num_D=2) +if opt.gan: + netD = pix2pixHD_model.define_D(6, 64, 3, norm=opt.norm, use_sigmoid=False, num_D=2) #netD = pix2pix_model.define_D(3*2+1, 64, 'pixel', norm='instance') #netD = pix2pix_model.define_D(3*2, 64, 'basic', norm='instance') #netD = pix2pix_model.define_D(3*2+1, 64, 'n_layers', n_layers_D=5, norm='instance') -if CONTINUE: +if opt.continuetrain: if not os.path.isfile(os.path.join(dir_checkpoint,'last_G.pth')): - CONTINUE = False + opt.continuetrain = False print('can not load last_G, training on init weight.') -if CONTINUE: +if opt.continuetrain: netG.load_state_dict(torch.load(os.path.join(dir_checkpoint,'last_G.pth'))) - if use_gan: + if opt.gan: netD.load_state_dict(torch.load(os.path.join(dir_checkpoint,'last_D.pth'))) f = open(os.path.join(dir_checkpoint,'iter'),'r') - start_iter = int(f.read()) + opt.startiter = int(f.read()) f.close() -optimizer_G = torch.optim.Adam(netG.parameters(), lr=LR,betas=(beta1, 0.999)) +optimizer_G = torch.optim.Adam(netG.parameters(), lr=opt.lr,betas=(opt.beta1, 0.999)) criterion_L1 = nn.L1Loss() criterion_L2 = nn.MSELoss() -if use_gan: - optimizer_D = torch.optim.Adam(netG.parameters(), lr=LR,betas=(beta1, 0.999)) +if opt.gan: + optimizer_D = torch.optim.Adam(netG.parameters(), lr=opt.lr,betas=(opt.beta1, 0.999)) # criterionGAN = pix2pix_model.GANLoss(gan_mode='lsgan').cuda() criterionGAN = pix2pixHD_model.GANLoss(tensor=torch.cuda.FloatTensor) netD.train() -if use_gpu: +if opt.use_gpu: netG.cuda() - if use_gan: + if opt.gan: netD.cuda() criterionGAN.cuda() cudnn.benchmark = True @@ -93,22 +97,22 @@ if use_gpu: def loaddata(): video_index = random.randint(0,len(videos)-1) video = videos[video_index] - img_index = random.randint(N,lengths[video_index]- N) - input_img = np.zeros((loadsize,loadsize,3*N+1), dtype='uint8') + img_index = random.randint(int(N/2)+1,lengths[video_index]- int(N/2)-1) + input_img = np.zeros((opt.loadsize,opt.loadsize,3*N+1), dtype='uint8') for i in range(0,N): img = cv2.imread('./dataset/'+video+'/mosaic/output_'+'%05d'%(img_index+i-int(N/2))+'.png') - img = impro.resize(img,loadsize) + img = impro.resize(img,opt.loadsize) input_img[:,:,i*3:(i+1)*3] = img mask = cv2.imread('./dataset/'+video+'/mask/output_'+'%05d'%(img_index)+'.png',0) - mask = impro.resize(mask,loadsize) + mask = impro.resize(mask,opt.loadsize) mask = impro.mask_threshold(mask,15,128) input_img[:,:,-1] = mask ground_true = cv2.imread('./dataset/'+video+'/ori/output_'+'%05d'%(img_index)+'.png') - ground_true = impro.resize(ground_true,loadsize) + ground_true = impro.resize(ground_true,opt.loadsize) - input_img,ground_true = data.random_transform_video(input_img,ground_true,finesize,N) + input_img,ground_true = data.random_transform_video(input_img,ground_true,opt.finesize,N) input_img = data.im2tensor(input_img,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False,is0_1=False) ground_true = data.im2tensor(ground_true,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False,is0_1=False) @@ -116,17 +120,17 @@ def loaddata(): print('preloading data, please wait 5s...') -if perload_num <= batchsize: - perload_num = batchsize*2 -input_imgs = torch.rand(perload_num,N*3+1,finesize,finesize).cuda() -ground_trues = torch.rand(perload_num,3,finesize,finesize).cuda() +if opt.perload_num <= opt.batchsize: + opt.perload_num = opt.batchsize*2 +input_imgs = torch.rand(opt.perload_num,N*3+1,opt.finesize,opt.finesize).cuda() +ground_trues = torch.rand(opt.perload_num,3,opt.finesize,opt.finesize).cuda() load_cnt = 0 def preload(): global load_cnt while 1: try: - ran = random.randint(0, perload_num-1) + ran = random.randint(0, opt.perload_num-1) input_imgs[ran],ground_trues[ran] = loaddata() load_cnt += 1 # time.sleep(0.1) @@ -139,24 +143,24 @@ t.daemon = True t.start() time_start=time.time() -while load_cnt < perload_num: +while load_cnt < opt.perload_num: time.sleep(0.1) time_end=time.time() -print('load speed:',round((time_end-time_start)/perload_num,3),'s/it') +print('load speed:',round((time_end-time_start)/opt.perload_num,3),'s/it') util.copyfile('./train.py', os.path.join(dir_checkpoint,'train.py')) -util.copyfile('../../models/video_model.py', os.path.join(dir_checkpoint,'model.py')) +util.copyfile('../../models/videoHD_model.py', os.path.join(dir_checkpoint,'model.py')) netG.train() time_start=time.time() print("Begin training...") -for iter in range(start_iter+1,ITER): +for iter in range(opt.startiter+1,opt.maxiter): - ran = random.randint(0, perload_num-batchsize-1) - inputdata = input_imgs[ran:ran+batchsize].clone() - target = ground_trues[ran:ran+batchsize].clone() + ran = random.randint(0, opt.perload_num-opt.batchsize-1) + inputdata = input_imgs[ran:ran+opt.batchsize].clone() + target = ground_trues[ran:ran+opt.batchsize].clone() - if use_gan: + if opt.gan: # compute fake images: G(A) pred = netG(inputdata) # update D @@ -186,12 +190,12 @@ for iter in range(start_iter+1,ITER): real_A = inputdata[:,int((N-1)/2)*3:(int((N-1)/2)+1)*3,:,:] fake_AB = torch.cat((real_A, pred), 1) pred_fake = netD(fake_AB) - loss_G_GAN = criterionGAN(pred_fake, True)*lambda_gan + loss_G_GAN = criterionGAN(pred_fake, True)*opt.lambda_gan # Second, G(A) = B - if use_L2: - loss_G_L1 = (criterion_L1(pred, target)+criterion_L2(pred, target)) * lambda_L1 + if opt.l2: + loss_G_L1 = (criterion_L1(pred, target)+criterion_L2(pred, target)) * opt.lambda_L1 else: - loss_G_L1 = criterion_L1(pred, target) * lambda_L1 + loss_G_L1 = criterion_L1(pred, target) * opt.lambda_L1 # combine loss and calculate gradients loss_G = loss_G_GAN + loss_G_L1 loss_sum[0] += loss_G_L1.item() @@ -202,10 +206,10 @@ for iter in range(start_iter+1,ITER): else: pred = netG(inputdata) - if use_L2: - loss_G_L1 = (criterion_L1(pred, target)+criterion_L2(pred, target)) * lambda_L1 + if opt.l2: + loss_G_L1 = (criterion_L1(pred, target)+criterion_L2(pred, target)) * opt.lambda_L1 else: - loss_G_L1 = criterion_L1(pred, target) * lambda_L1 + loss_G_L1 = criterion_L1(pred, target) * opt.lambda_L1 loss_sum[0] += loss_G_L1.item() optimizer_G.zero_grad() @@ -215,15 +219,16 @@ for iter in range(start_iter+1,ITER): if (iter+1)%100 == 0: try: data.showresult(inputdata[:,int((N-1)/2)*3:(int((N-1)/2)+1)*3,:,:], - target, pred,os.path.join(dir_checkpoint,'result_train.png')) + target, pred,os.path.join(dir_checkpoint,'result_train.jpg')) except Exception as e: print(e) if (iter+1)%1000 == 0: time_end = time.time() - if use_gan: - print('iter:',iter+1,' L1_loss:', round(loss_sum[0]/1000,4),' G_loss:', round(loss_sum[1]/1000,4), - ' D_f:',round(loss_sum[2]/1000,4),' D_r:',round(loss_sum[3]/1000,4),' time:',round((time_end-time_start)/1000,2)) + if opt.gan: + savestr ='iter:{0:d} L1_loss:{1:.4f} G_loss:{2:.4f} D_f:{3:.4f} D_r:{4:.4f} time:{5:.2f}'.format( + iter+1,loss_sum[0]/1000,loss_sum[1]/1000,loss_sum[2]/1000,loss_sum[3]/1000,(time_end-time_start)/1000) + util.writelog(os.path.join(dir_checkpoint,'loss.txt'), savestr,True) if (iter+1)/1000 >= 10: loss_plot[0].append(loss_sum[0]/1000) loss_plot[1].append(loss_sum[1]/1000) @@ -231,18 +236,19 @@ for iter in range(start_iter+1,ITER): try: plt.plot(item_plot,loss_plot[0]) plt.plot(item_plot,loss_plot[1]) - plt.savefig(os.path.join(dir_checkpoint,'loss.png')) + plt.savefig(os.path.join(dir_checkpoint,'loss.jpg')) plt.close() except Exception as e: print("error:",e) else: - print('iter:',iter+1,' L1_loss:',round(loss_sum[0]/1000,4),' time:',round((time_end-time_start)/1000,2)) + savestr ='iter:{0:d} L1_loss:{1:.4f} time:{2:.2f}'.format(iter+1,loss_sum[0]/1000,(time_end-time_start)/1000) + util.writelog(os.path.join(dir_checkpoint,'loss.txt'), savestr,True) if (iter+1)/1000 >= 10: loss_plot[0].append(loss_sum[0]/1000) item_plot.append(iter+1) try: plt.plot(item_plot,loss_plot[0]) - plt.savefig(os.path.join(dir_checkpoint,'loss.png')) + plt.savefig(os.path.join(dir_checkpoint,'loss.jpg')) plt.close() except Exception as e: print("error:",e) @@ -250,17 +256,17 @@ for iter in range(start_iter+1,ITER): time_start=time.time() - if (iter+1)%SAVE_FRE == 0: - if iter+1 != SAVE_FRE: - os.rename(os.path.join(dir_checkpoint,'last_G.pth'),os.path.join(dir_checkpoint,str(iter+1-SAVE_FRE)+'G.pth')) + if (iter+1)%opt.savefreq == 0: + if iter+1 != opt.savefreq: + os.rename(os.path.join(dir_checkpoint,'last_G.pth'),os.path.join(dir_checkpoint,str(iter+1-opt.savefreq)+'G.pth')) torch.save(netG.cpu().state_dict(),os.path.join(dir_checkpoint,'last_G.pth')) - if use_gan: - if iter+1 != SAVE_FRE: - os.rename(os.path.join(dir_checkpoint,'last_D.pth'),os.path.join(dir_checkpoint,str(iter+1-SAVE_FRE)+'D.pth')) + if opt.gan: + if iter+1 != opt.savefreq: + os.rename(os.path.join(dir_checkpoint,'last_D.pth'),os.path.join(dir_checkpoint,str(iter+1-opt.savefreq)+'D.pth')) torch.save(netD.cpu().state_dict(),os.path.join(dir_checkpoint,'last_D.pth')) - if use_gpu: + if opt.use_gpu: netG.cuda() - if use_gan: + if opt.gan: netD.cuda() f = open(os.path.join(dir_checkpoint,'iter'),'w+') f.write(str(iter+1)) @@ -272,27 +278,27 @@ for iter in range(start_iter+1,ITER): test_names = os.listdir('./test') test_names.sort() - result = np.zeros((finesize*2,finesize*len(test_names),3), dtype='uint8') + result = np.zeros((opt.finesize*2,opt.finesize*len(test_names),3), dtype='uint8') for cnt,test_name in enumerate(test_names,0): img_names = os.listdir(os.path.join('./test',test_name,'image')) img_names.sort() - inputdata = np.zeros((finesize,finesize,3*N+1), dtype='uint8') + inputdata = np.zeros((opt.finesize,opt.finesize,3*N+1), dtype='uint8') for i in range(0,N): img = impro.imread(os.path.join('./test',test_name,'image',img_names[i])) - img = impro.resize(img,finesize) + img = impro.resize(img,opt.finesize) inputdata[:,:,i*3:(i+1)*3] = img mask = impro.imread(os.path.join('./test',test_name,'mask.png'),'gray') - mask = impro.resize(mask,finesize) + mask = impro.resize(mask,opt.finesize) mask = impro.mask_threshold(mask,15,128) inputdata[:,:,-1] = mask - result[0:finesize,finesize*cnt:finesize*(cnt+1),:] = inputdata[:,:,int((N-1)/2)*3:(int((N-1)/2)+1)*3] + result[0:opt.finesize,opt.finesize*cnt:opt.finesize*(cnt+1),:] = inputdata[:,:,int((N-1)/2)*3:(int((N-1)/2)+1)*3] inputdata = data.im2tensor(inputdata,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False,is0_1 = False) pred = netG(inputdata) pred = data.tensor2im(pred,rgb2bgr = False, is0_1 = False) - result[finesize:finesize*2,finesize*cnt:finesize*(cnt+1),:] = pred + result[opt.finesize:opt.finesize*2,opt.finesize*cnt:opt.finesize*(cnt+1),:] = pred - cv2.imwrite(os.path.join(dir_checkpoint,str(iter+1)+'_test.png'), result) + cv2.imwrite(os.path.join(dir_checkpoint,str(iter+1)+'_test.jpg'), result) netG.train() diff --git a/clean_cache.py b/util/clean_cache.py similarity index 63% rename from clean_cache.py rename to util/clean_cache.py index a6f5104fc62c647aa1c2f270338b07702e2b4a3b..80c47b93d2693997cba79b3f4328d970baa572a3 100644 --- a/clean_cache.py +++ b/util/clean_cache.py @@ -35,16 +35,17 @@ def is_video(path): else: return False -file_list,dir_list = Traversal('./') -for file in file_list: - if ('tmp' in file) | ('pth' in file)|('pycache' in file) | is_video(file) | is_img(file): - if os.path.exists(file): - if 'imgs' not in file: - os.remove(file) - print('remove file:',file) +def cleanall(): + file_list,dir_list = Traversal('./') + for file in file_list: + if ('tmp' in file) | ('pth' in file)|('pycache' in file) | is_video(file) | is_img(file): + if os.path.exists(file): + if 'imgs' not in file: + os.remove(file) + print('remove file:',file) -for dir in dir_list: - if ('tmp'in dir)|('pycache'in dir): - if os.path.exists(dir): - shutil.rmtree(dir) - print('remove dir:',dir) \ No newline at end of file + for dir in dir_list: + if ('tmp'in dir)|('pycache'in dir): + if os.path.exists(dir): + shutil.rmtree(dir) + print('remove dir:',dir) \ No newline at end of file diff --git a/util/data.py b/util/data.py index b60a0d1a2f463829c7f2eff704e9b872913400b1..567c397cf7b804cca388a74c93e9e7014aff4b44 100755 --- a/util/data.py +++ b/util/data.py @@ -14,17 +14,21 @@ transform = transforms.Compose([ def tensor2im(image_tensor, imtype=np.uint8, gray=False, rgb2bgr = True ,is0_1 = False): image_tensor =image_tensor.data image_numpy = image_tensor[0].cpu().float().numpy() - # if gray: - # image_numpy = (image_numpy+1.0)/2.0 * 255.0 - # else: - if image_numpy.shape[0] == 1: - image_numpy = np.tile(image_numpy, (3, 1, 1)) - - image_numpy = image_numpy.transpose((1, 2, 0)) - + if not is0_1: image_numpy = (image_numpy + 1)/2.0 - image_numpy = np.clip(image_numpy * 255.0,0,255) + image_numpy = np.clip(image_numpy * 255.0,0,255) + + # gray -> output 1ch + if gray: + h, w = image_numpy.shape[1:] + image_numpy = image_numpy.reshape(h,w) + return image_numpy.astype(imtype) + + # output 3ch + if image_numpy.shape[0] == 1: + image_numpy = np.tile(image_numpy, (3, 1, 1)) + image_numpy = image_numpy.transpose((1, 2, 0)) if rgb2bgr and not gray: image_numpy = image_numpy[...,::-1]-np.zeros_like(image_numpy) return image_numpy.astype(imtype) diff --git a/util/ffmpeg.py b/util/ffmpeg.py index 9b02153e567779deb145910408628fb3feb2f217..f91f888aab84f285487f6033e6b98ee0366a6ea1 100755 --- a/util/ffmpeg.py +++ b/util/ffmpeg.py @@ -2,20 +2,28 @@ import os,json # ffmpeg 3.4.6 -def video2image(videopath,imagepath): - os.system('ffmpeg -i "'+videopath+'" -f image2 '+imagepath) +def video2image(videopath,imagepath,fps=0): + if fps == 0: + os.system('ffmpeg -i "'+videopath+'" -f image2 '+imagepath) + else: + os.system('ffmpeg -i "'+videopath+'" -r '+str(fps)+' -f image2 '+imagepath) def video2voice(videopath,voicepath): os.system('ffmpeg -i "'+videopath+'" -f mp3 '+voicepath) def image2video(fps,imagepath,voicepath,videopath): - os.system('ffmpeg -y -r '+str(fps)+' -i '+imagepath+' -vcodec libx264 -b 12M '+'./tmp/video_tmp.mp4') + os.system('ffmpeg -y -r '+str(fps)+' -i '+imagepath+' -vcodec libx264 '+'./tmp/video_tmp.mp4') #os.system('ffmpeg -f image2 -i '+imagepath+' -vcodec libx264 -r '+str(fps)+' ./tmp/video_tmp.mp4') os.system('ffmpeg -i ./tmp/video_tmp.mp4 -i "'+voicepath+'" -vcodec copy -acodec copy '+videopath) def get_video_infos(videopath): cmd_str = 'ffprobe -v quiet -print_format json -show_format -show_streams -i "' + videopath + '"' - out_string = os.popen(cmd_str).read() + #out_string = os.popen(cmd_str).read() + #For chinese path in Windows + #https://blog.csdn.net/weixin_43903378/article/details/91979025 + stream = os.popen(cmd_str)._stream + out_string = stream.buffer.read().decode(encoding='utf-8') + infos = json.loads(out_string) try: fps = eval(infos['streams'][0]['avg_frame_rate']) @@ -28,7 +36,7 @@ def get_video_infos(videopath): width = int(infos['streams'][1]['width']) height = int(infos['streams'][1]['height']) - return fps,endtime,width,height + return fps,endtime,height,width def cut_video(in_path,start_time,last_time,out_path,vcodec='h265'): if vcodec == 'copy': diff --git a/util/image_processing.py b/util/image_processing.py index 56e7b7c580adebfc791f44c4955e441cc8e0722a..8c6455f79831a2eba224ec0bf34351810eae3d75 100755 --- a/util/image_processing.py +++ b/util/image_processing.py @@ -19,7 +19,7 @@ def imread(file_path,mod = 'normal'): elif mod == 'all': img = cv2.imread(file_path,-1) - #For chinese path, use cv2.imdecode in windows. + #In windows, for chinese path, use cv2.imdecode insteaded. #It will loss EXIF, I can't fix it else: if mod == 'gray': @@ -133,7 +133,7 @@ def mergeimage(img1,img2,orgin_image,size = 128): result_img = cv2.add(new_img1,new_img2) return result_img -def find_best_ROI(mask): +def find_mostlikely_ROI(mask): contours,hierarchy=cv2.findContours(mask, cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE) if len(contours)>0: areas = [] @@ -182,9 +182,9 @@ def boundingSquare(mask,Ex_mul): center = ((point0+point1)/2).astype('int') return center[0],center[1],halfsize,area -def mask_threshold(mask,blur,threshold): +def mask_threshold(mask,ex_mun,threshold): mask = cv2.threshold(mask,threshold,255,cv2.THRESH_BINARY)[1] - mask = cv2.blur(mask, (blur, blur)) + mask = cv2.blur(mask, (ex_mun, ex_mun)) mask = cv2.threshold(mask,threshold/5,255,cv2.THRESH_BINARY)[1] return mask @@ -200,7 +200,7 @@ def mask_area(mask): def replace_mosaic(img_origin,img_fake,x,y,size,no_father): - img_fake = resize(img_fake,size*2) + img_fake = resize(img_fake,size*2,interpolation=cv2.INTER_LANCZOS4) if no_father: img_origin[y-size:y+size,x-size:x+size]=img_fake img_result = img_origin diff --git a/util/util.py b/util/util.py index 91461684e094a44d7a011656174b2b87a3b6f336..7ec3cb87ec3451af4e3cbc9e49967a4630c9058d 100755 --- a/util/util.py +++ b/util/util.py @@ -40,10 +40,12 @@ def is_videos(paths): tmp.append(path) return tmp -def writelog(path,log): +def writelog(path,log,isprint=False): f = open(path,'a+') f.write(log+'\n') f.close() + if isprint: + print(log) def makedirs(path): if os.path.isdir(path): @@ -87,3 +89,11 @@ def copyfile(src,dst): shutil.copyfile(src, dst) except Exception as e: print(e) + +def opt2str(opt): + message = '' + message += '---------------------- Options --------------------\n' + for k, v in sorted(vars(opt).items()): + message += '{:>25}: {:<35}\n'.format(str(k), str(v)) + message += '----------------- End -------------------' + return message