diff --git a/.gitignore b/.gitignore
index 03ab31855bc6f5fff72cb3a63b1e323593d9b219..973701941c8c07162d56b2656be17c5ea6bdf5e2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -153,6 +153,7 @@ result/
/python_test.py
/pretrained_models_old
/deepmosaic_window
+/sftp-config.json
#./make_datasets
/make_datasets/video
/make_datasets/tmp
@@ -160,6 +161,9 @@ result/
/make_datasets/datasets
/make_datasets/dataset
/make_datasets/datasets_img
+/make_datasets/videos
+#./models
+/models/videoHD_model.py
#./train
/train/clean/dataset
#mediafile
@@ -177,4 +181,5 @@ result/
*.rmvb
*.JPG
*.MP4
-*.JPEG
\ No newline at end of file
+*.JPEG
+*.exe
\ No newline at end of file
diff --git a/README.md b/README.md
index 6ac5fb35e288262f4639dbdc606b29b2b7a6f99b..15977a2819e6f1e469117d76a40e5aed069ef58f 100755
--- a/README.md
+++ b/README.md
@@ -48,17 +48,17 @@ git clone https://github.com/HypoX64/DeepMosaics
cd DeepMosaics
```
#### Get pre_trained models and test video
-You can download pre_trained models and test video and replace the files in the project.
+You can download pre_trained models and put them into './pretrained_models'.
[[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ)
#### Simple example
* Add Mosaic (output video will save in './result')
```bash
-python3 deepmosaic.py
+python3 deepmosaic.py --media_path ./imgs/ruoruo.jpg --model_path ./pretrained_models/mosaic/add_face.pth --use_gpu -1
```
* Clean Mosaic (output video will save in './result')
```bash
-python3 deepmosaic.py --mode clean --model_path ./pretrained_models/clean_hands_unet_128.pth --media_path ./result/hands_test_AddMosaic.mp4
+python3 deepmosaic.py --media_path ./result/ruoruo_add.jpg --model_path ./pretrained_models/mosaic/clean_face_HD.pth --use_gpu -1
```
#### More parameters
If you want to test other image or video, please refer to this file.
diff --git a/README_CN.md b/README_CN.md
index e6661f0cd54853a791862ed4b987cf8cbea69627..b4bef9bc188dad0d4d5eb3251e944c36b2707cb4 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -49,18 +49,18 @@
git clone https://github.com/HypoX64/DeepMosaics
cd DeepMosaics
```
-#### 下载测试视频以及预训练模型
-可以通过以下两种方法下载测试视频以及预训练模型,并将他们置于项目文件夹中.
+#### 下载预训练模型
+可以通过以下两种方法下载预训练模型,并将他们置于'./pretrained_models'文件夹中.
[[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ)
#### 简单的例子
-* 为视频添加马赛克,例子中认为手是需要打码的区域 ,可以通过切换预训练模型切换自动打码区域(输出结果将储存到 './result')
+* 为视频添加马赛克,例子中认为脸是需要打码的区域 ,可以通过切换预训练模型切换自动打码区域(输出结果将储存到 './result')
```bash
-python3 deepmosaic.py
+python3 deepmosaic.py --media_path ./imgs/ruoruo.jpg --model_path ./pretrained_models/mosaic/add_face.pth --use_gpu -1
```
* 将视频中的马赛克移除,对于不同的打码物体需要使用对应的预训练模型进行马赛克消除(输出结果将储存到 './result')
```bash
-python3 deepmosaic.py --mode clean --model_path ./pretrained_models/clean_hands_unet_128.pth --media_path ./result/hands_test_AddMosaic.mp4
+python3 deepmosaic.py --media_path ./result/ruoruo_add.jpg --model_path ./pretrained_models/mosaic/clean_face_HD.pth --use_gpu -1
```
#### 更多的参数
如果想要测试其他的图片或视频,请参照以下文件输入参数.
diff --git a/cores/core.py b/cores/core.py
index b320dbd1d0a3113a79f2f3fc788346c218a95470..e5ed377cf784d0f0d8c437eaf2d673d6c45df209 100644
--- a/cores/core.py
+++ b/cores/core.py
@@ -6,15 +6,23 @@ from models import runmodel,loadmodel
from util import mosaic,util,ffmpeg,filt,data
from util import image_processing as impro
+'''
+---------------------Video Init---------------------
+'''
def video_init(opt,path):
util.clean_tempfiles()
- fps = ffmpeg.get_video_infos(path)[0]
+ fps,endtime,height,width = ffmpeg.get_video_infos(path)
+ if opt.fps !=0:
+ fps = opt.fps
ffmpeg.video2voice(path,'./tmp/voice_tmp.mp3')
- ffmpeg.video2image(path,'./tmp/video2image/output_%05d.'+opt.tempimage_type)
+ ffmpeg.video2image(path,'./tmp/video2image/output_%05d.'+opt.tempimage_type,fps)
imagepaths=os.listdir('./tmp/video2image')
imagepaths.sort()
- return fps,imagepaths
+ return fps,imagepaths,height,width
+'''
+---------------------Add Mosaic---------------------
+'''
def addmosaic_img(opt,netS):
path = opt.media_path
print('Add Mosaic:',path)
@@ -25,7 +33,7 @@ def addmosaic_img(opt,netS):
def addmosaic_video(opt,netS):
path = opt.media_path
- fps,imagepaths = video_init(opt,path)
+ fps,imagepaths = video_init(opt,path)[:2]
# get position
positions = []
for i,imagepath in enumerate(imagepaths,1):
@@ -33,7 +41,7 @@ def addmosaic_video(opt,netS):
mask,x,y,area = runmodel.get_ROI_position(img,netS,opt)
positions.append([x,y,area])
cv2.imwrite(os.path.join('./tmp/ROI_mask',imagepath),mask)
- print('\r','Find ROI location:'+str(i)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=40),end='')
+ print('\r','Find ROI location:'+str(i)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=35),end='')
print('\nOptimize ROI locations...')
mask_index = filt.position_medfilt(np.array(positions), 7)
@@ -44,13 +52,16 @@ def addmosaic_video(opt,netS):
if impro.mask_area(mask)>100:
img = mosaic.addmosaic(img, mask, opt)
cv2.imwrite(os.path.join('./tmp/addmosaic_image',imagepaths[i]),img)
- print('\r','Add Mosaic:'+str(i+1)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=40),end='')
+ print('\r','Add Mosaic:'+str(i+1)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=35),end='')
print()
ffmpeg.image2video( fps,
'./tmp/addmosaic_image/output_%05d.'+opt.tempimage_type,
'./tmp/voice_tmp.mp3',
os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_add.mp4'))
+'''
+---------------------Style Transfer---------------------
+'''
def styletransfer_img(opt,netG):
print('Style Transfer_img:',opt.media_path)
img = impro.imread(opt.media_path)
@@ -61,13 +72,13 @@ def styletransfer_img(opt,netG):
def styletransfer_video(opt,netG):
path = opt.media_path
positions = []
- fps,imagepaths = video_init(opt,path)
+ fps,imagepaths = video_init(opt,path)[:2]
for i,imagepath in enumerate(imagepaths,1):
img = impro.imread(os.path.join('./tmp/video2image',imagepath))
img = runmodel.run_styletransfer(opt, netG, img)
cv2.imwrite(os.path.join('./tmp/style_transfer',imagepath),img)
- print('\r','Transfer:'+str(i)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=40),end='')
+ print('\r','Transfer:'+str(i)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=35),end='')
print()
suffix = os.path.basename(opt.model_path).replace('.pth','').replace('style_','')
ffmpeg.image2video( fps,
@@ -75,6 +86,24 @@ def styletransfer_video(opt,netG):
'./tmp/voice_tmp.mp3',
os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_'+suffix+'.mp4'))
+'''
+---------------------Clean Mosaic---------------------
+'''
+def get_mosaic_positions(opt,netM,imagepaths,savemask=True):
+ # get mosaic position
+ positions = []
+ for i,imagepath in enumerate(imagepaths,1):
+ img_origin = impro.imread(os.path.join('./tmp/video2image',imagepath))
+ x,y,size,mask = runmodel.get_mosaic_position(img_origin,netM,opt)
+ if savemask:
+ cv2.imwrite(os.path.join('./tmp/mosaic_mask',imagepath), mask)
+ positions.append([x,y,size])
+ print('\r','Find mosaic location:'+str(i)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=35),end='')
+ print('\nOptimize mosaic locations...')
+ positions =np.array(positions)
+ for i in range(3):positions[:,i] = filt.medfilt(positions[:,i],opt.medfilt_num)
+ return positions
+
def cleanmosaic_img(opt,netG,netM):
path = opt.media_path
@@ -85,7 +114,10 @@ def cleanmosaic_img(opt,netG,netM):
img_result = img_origin.copy()
if size != 0 :
img_mosaic = img_origin[y-size:y+size,x-size:x+size]
- img_fake = runmodel.run_pix2pix(img_mosaic,netG,opt)
+ if opt.traditional:
+ img_fake = runmodel.traditional_cleaner(img_mosaic,opt)
+ else:
+ img_fake = runmodel.run_pix2pix(img_mosaic,netG,opt)
img_result = impro.replace_mosaic(img_origin,img_fake,x,y,size,opt.no_feather)
else:
print('Do not find mosaic')
@@ -93,19 +125,8 @@ def cleanmosaic_img(opt,netG,netM):
def cleanmosaic_video_byframe(opt,netG,netM):
path = opt.media_path
- fps,imagepaths = video_init(opt,path)
- positions = []
- # get position
- for i,imagepath in enumerate(imagepaths,1):
- img_origin = impro.imread(os.path.join('./tmp/video2image',imagepath))
- x,y,size = runmodel.get_mosaic_position(img_origin,netM,opt)[:3]
- positions.append([x,y,size])
- print('\r','Find mosaic location:'+str(i)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=40),end='')
-
- print('\nOptimize mosaic locations...')
- positions =np.array(positions)
- for i in range(3):positions[:,i] = filt.medfilt(positions[:,i],opt.medfilt_num)
-
+ fps,imagepaths = video_init(opt,path)[:2]
+ positions = get_mosaic_positions(opt,netM,imagepaths,savemask=False)
# clean mosaic
for i,imagepath in enumerate(imagepaths,0):
x,y,size = positions[i][0],positions[i][1],positions[i][2]
@@ -113,10 +134,13 @@ def cleanmosaic_video_byframe(opt,netG,netM):
img_result = img_origin.copy()
if size != 0:
img_mosaic = img_origin[y-size:y+size,x-size:x+size]
- img_fake = runmodel.run_pix2pix(img_mosaic,netG,opt)
- img_result = impro.replace_mosaic(img_origin,img_fake,x,y,size,opt.no_feather)
+ if opt.traditional:
+ img_fake = runmodel.traditional_cleaner(img_mosaic,opt)
+ else:
+ img_fake = runmodel.run_pix2pix(img_mosaic,netG,opt)
+ img_result = impro.replace_mosaic(img_origin,img_fake,x,y,size,opt.no_feather)
cv2.imwrite(os.path.join('./tmp/replace_mosaic',imagepath),img_result)
- print('\r','Clean Mosaic:'+str(i+1)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=40),end='')
+ print('\r','Clean Mosaic:'+str(i+1)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=35),end='')
print()
ffmpeg.image2video( fps,
'./tmp/replace_mosaic/output_%05d.'+opt.tempimage_type,
@@ -127,48 +151,39 @@ def cleanmosaic_video_fusion(opt,netG,netM):
path = opt.media_path
N = 25
INPUT_SIZE = 128
- fps,imagepaths = video_init(opt,path)
- positions = []
- # get position
- for i,imagepath in enumerate(imagepaths,1):
- img_origin = impro.imread(os.path.join('./tmp/video2image',imagepath))
- # x,y,size = runmodel.get_mosaic_position(img_origin,net_mosaic_pos,opt)[:3]
- x,y,size,mask = runmodel.get_mosaic_position(img_origin,netM,opt)
- cv2.imwrite(os.path.join('./tmp/mosaic_mask',imagepath), mask)
- positions.append([x,y,size])
- print('\r','Find mosaic location:'+str(i)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=40),end='')
- print('\nOptimize mosaic locations...')
- positions =np.array(positions)
- for i in range(3):positions[:,i] = filt.medfilt(positions[:,i],opt.medfilt_num)
-
+ fps,imagepaths,height,width = video_init(opt,path)
+ positions = get_mosaic_positions(opt,netM,imagepaths,savemask=True)
+
# clean mosaic
+ img_pool = np.zeros((height,width,3*N), dtype='uint8')
for i,imagepath in enumerate(imagepaths,0):
x,y,size = positions[i][0],positions[i][1],positions[i][2]
- img_origin = impro.imread(os.path.join('./tmp/video2image',imagepath))
+
+ # image read stream
mask = cv2.imread(os.path.join('./tmp/mosaic_mask',imagepath),0)
+ if i==0 :
+ for j in range(0,N):
+ img_pool[:,:,j*3:(j+1)*3] = impro.imread(os.path.join('./tmp/video2image',imagepaths[np.clip(i+j-12,0,len(imagepaths)-1)]))
+ else:
+ img_pool[:,:,0:(N-1)*3] = img_pool[:,:,3:N*3]
+ img_pool[:,:,(N-1)*3:] = impro.imread(os.path.join('./tmp/video2image',imagepaths[np.clip(i+12,0,len(imagepaths)-1)]))
+ img_origin = img_pool[:,:,int((N-1)/2)*3:(int((N-1)/2)+1)*3]
- if size==0:
+ if size==0: # can not find mosaic,
cv2.imwrite(os.path.join('./tmp/replace_mosaic',imagepath),img_origin)
else:
+
mosaic_input = np.zeros((INPUT_SIZE,INPUT_SIZE,3*N+1), dtype='uint8')
- for j in range(0,N):
- img = impro.imread(os.path.join('./tmp/video2image',imagepaths[np.clip(i+j-12,0,len(imagepaths)-1)]))
- img = img[y-size:y+size,x-size:x+size]
- img = impro.resize(img,INPUT_SIZE)
- mosaic_input[:,:,j*3:(j+1)*3] = img
- mask = impro.resize(mask,np.min(img_origin.shape[:2]))
- mask = mask[y-size:y+size,x-size:x+size]
- mask = impro.resize(mask, INPUT_SIZE)
- mosaic_input[:,:,-1] = mask
+ mosaic_input[:,:,0:N*3] = impro.resize(img_pool[y-size:y+size,x-size:x+size,:], INPUT_SIZE)
+ mask = impro.resize(mask,np.min(img_origin.shape[:2]))[y-size:y+size,x-size:x+size]
+ mosaic_input[:,:,-1] = impro.resize(mask, INPUT_SIZE)
+
mosaic_input = data.im2tensor(mosaic_input,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False,is0_1 = False)
unmosaic_pred = netG(mosaic_input)
-
- #unmosaic_pred = (unmosaic_pred.cpu().detach().numpy()*255)[0]
- #img_fake = unmosaic_pred.transpose((1, 2, 0))
img_fake = data.tensor2im(unmosaic_pred,rgb2bgr = False ,is0_1 = False)
img_result = impro.replace_mosaic(img_origin,img_fake,x,y,size,opt.no_feather)
cv2.imwrite(os.path.join('./tmp/replace_mosaic',imagepath),img_result)
- print('\r','Clean Mosaic:'+str(i+1)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=40),end='')
+ print('\r','Clean Mosaic:'+str(i+1)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=35),end='')
print()
ffmpeg.image2video( fps,
'./tmp/replace_mosaic/output_%05d.'+opt.tempimage_type,
diff --git a/cores/options.py b/cores/options.py
index b621a026d2a0c813235ed35113f8700db3ed238d..cf31aacf7fb1a2319e89a29f6be75635a2a8865d 100644
--- a/cores/options.py
+++ b/cores/options.py
@@ -10,15 +10,16 @@ class Options():
def initialize(self):
#base
- self.parser.add_argument('--use_gpu',type=int,default=1, help='if 0 or -1, do not use gpu')
+ self.parser.add_argument('--use_gpu',type=int,default=0, help='if -1, do not use gpu')
# self.parser.add_argument('--use_gpu', action='store_true', help='if input it, use gpu')
- self.parser.add_argument('--media_path', type=str, default='./hands_test.mp4',help='your videos or images path')
+ self.parser.add_argument('--media_path', type=str, default='./imgs/ruoruo.jpg',help='your videos or images path')
self.parser.add_argument('--mode', type=str, default='auto',help='auto | add | clean | style')
- self.parser.add_argument('--model_path', type=str, default='./pretrained_models/add_hands_128.pth',help='pretrained model path')
+ self.parser.add_argument('--model_path', type=str, default='./pretrained_models/mosaic/add_face.pth',help='pretrained model path')
self.parser.add_argument('--result_dir', type=str, default='./result',help='output media will be saved here')
self.parser.add_argument('--tempimage_type', type=str, default='png',help='type of temp image, png | jpg, png is better but occupy more storage space')
self.parser.add_argument('--netG', type=str, default='auto',
help='select model to use for netG(Clean mosaic and Transfer style) -> auto | unet_128 | unet_256 | resnet_9blocks | HD | video')
+ self.parser.add_argument('--fps', type=int, default=0,help='read and output fps, if 0-> origin')
self.parser.add_argument('--output_size', type=int, default=0,help='size of output file,if 0 -> origin')
#AddMosaic
@@ -29,8 +30,11 @@ class Options():
#CleanMosaic
self.parser.add_argument('--mosaic_position_model_path', type=str, default='auto',help='name of model use to find mosaic position')
+ self.parser.add_argument('--traditional', action='store_true', help='if true, use traditional image processing methods to clean mosaic')
+ self.parser.add_argument('--tr_blur', type=int, default=10, help='ksize of blur when using traditional method, it will affect final quality')
+ self.parser.add_argument('--tr_down', type=int, default=10, help='downsample when using traditional method,it will affect final quality')
self.parser.add_argument('--no_feather', action='store_true', help='if true, no edge feather and color correction, but run faster')
- self.parser.add_argument('--no_large_area', action='store_true', help='if true, do not find the largest mosaic area')
+ self.parser.add_argument('--all_mosaic_area', action='store_true', help='if true, find all mosaic area, else only find the largest area')
self.parser.add_argument('--medfilt_num', type=int, default=11,help='medfilt window of mosaic movement in the video')
self.parser.add_argument('--ex_mult', type=str, default='auto',help='mosaic area expansion')
@@ -50,17 +54,16 @@ class Options():
model_name = os.path.basename(self.opt.model_path)
- if torch.cuda.is_available() and self.opt.use_gpu > 0:
+ if torch.cuda.is_available() and self.opt.use_gpu > -1:
self.opt.use_gpu = True
else:
self.opt.use_gpu = False
-
if self.opt.mode == 'auto':
- if 'add' in model_name:
- self.opt.mode = 'add'
- elif 'clean' in model_name:
+ if 'clean' in model_name or self.opt.traditional:
self.opt.mode = 'clean'
+ elif 'add' in model_name:
+ self.opt.mode = 'add'
elif 'style' in model_name or 'edges' in model_name:
self.opt.mode = 'style'
else:
diff --git a/deepmosaic.py b/deepmosaic.py
index c75a06897dc9cd131fa31a02443682c898982c64..dec510296deeb08d011adcaa4ea064a9d9782039 100644
--- a/deepmosaic.py
+++ b/deepmosaic.py
@@ -25,7 +25,9 @@ def main():
elif opt.mode == 'clean':
netM = loadmodel.unet_clean(opt)
- if opt.netG == 'video':
+ if opt.traditional:
+ netG = None
+ elif opt.netG == 'video':
netG = loadmodel.video(opt)
else:
netG = loadmodel.pix2pix(opt)
@@ -35,7 +37,7 @@ def main():
if util.is_img(file):
core.cleanmosaic_img(opt,netG,netM)
elif util.is_video(file):
- if opt.netG == 'video':
+ if opt.netG == 'video' and not opt.traditional:
core.cleanmosaic_video_fusion(opt,netG,netM)
else:
core.cleanmosaic_video_byframe(opt,netG,netM)
@@ -56,12 +58,12 @@ def main():
util.clean_tempfiles(tmp_init = False)
-# main()
-if __name__ == '__main__':
- try:
- main()
- except Exception as e:
- print('Error:',e)
- input('Please press any key to exit.\n')
- util.clean_tempfiles(tmp_init = False)
- exit(0)
+main()
+# if __name__ == '__main__':
+# try:
+# main()
+# except Exception as e:
+# print('Error:',e)
+# input('Please press any key to exit.\n')
+# util.clean_tempfiles(tmp_init = False)
+# exit(0)
diff --git a/make_datasets/use_irregular_holes_mask_make_dataset.py b/make_datasets/use_irregular_holes_make_dataset.py
similarity index 100%
rename from make_datasets/use_irregular_holes_mask_make_dataset.py
rename to make_datasets/use_irregular_holes_make_dataset.py
diff --git a/models/runmodel.py b/models/runmodel.py
index ff156f5a77ea83754a30dfd85e9674b9579e389e..11b14dd2878c69b59c7460b72201b7c8108bcdb7 100755
--- a/models/runmodel.py
+++ b/models/runmodel.py
@@ -8,22 +8,10 @@ import torch
import numpy as np
def run_unet(img,net,size = 224,use_gpu = True):
- img=impro.image2folat(img,3)
- img=img.reshape(1,3,size,size)
- img = torch.from_numpy(img)
- if use_gpu:
- img=img.cuda()
- pred = net(img)
- pred = (pred.cpu().detach().numpy()*255)
- pred = pred.reshape(size,size).astype('uint8')
- return pred
-
-def run_unet_rectim(img,net,size = 224,use_gpu = True):
img = impro.resize(img,size)
- img1,img2 = impro.spiltimage(img,size)
- mask1 = run_unet(img1,net,size,use_gpu = use_gpu)
- mask2 = run_unet(img2,net,size,use_gpu = use_gpu)
- mask = impro.mergeimage(mask1,mask2,img,size)
+ img = data.im2tensor(img,use_gpu = use_gpu, bgr2rgb = False,use_transform = False , is0_1 = True)
+ mask = net(img)
+ mask = data.tensor2im(mask, gray=True,rgb2bgr = False, is0_1 = True)
return mask
def run_pix2pix(img,net,opt):
@@ -36,6 +24,13 @@ def run_pix2pix(img,net,opt):
img_fake = data.tensor2im(img_fake)
return img_fake
+def traditional_cleaner(img,opt):
+ h,w = img.shape[:2]
+ img = cv2.blur(img, (opt.tr_blur,opt.tr_blur))
+ img = img[::opt.tr_down,::opt.tr_down,:]
+ img = cv2.resize(img, (w,h),interpolation=cv2.INTER_LANCZOS4)
+ return img
+
def run_styletransfer(opt, net, img):
if opt.output_size != 0:
@@ -60,23 +55,22 @@ def run_styletransfer(opt, net, img):
return img
img = data.im2tensor(img,use_gpu=opt.use_gpu,gray=True,use_transform = False,is0_1 = False)
else:
- img = data.im2tensor(img,use_gpu=opt.use_gpu)
+ img = data.im2tensor(img,use_gpu=opt.use_gpu,gray=False,use_transform = True)
img = net(img)
img = data.tensor2im(img)
return img
def get_ROI_position(img,net,opt):
- mask = run_unet_rectim(img,net,use_gpu = opt.use_gpu)
+ mask = run_unet(img,net,size=224,use_gpu = opt.use_gpu)
mask = impro.mask_threshold(mask,opt.mask_extend,opt.mask_threshold)
x,y,halfsize,area = impro.boundingSquare(mask, 1)
return mask,x,y,area
def get_mosaic_position(img_origin,net_mosaic_pos,opt,threshold = 128 ):
- mask = run_unet_rectim(img_origin,net_mosaic_pos,use_gpu = opt.use_gpu)
- #mask_1 = mask.copy()
+ mask = run_unet(img_origin,net_mosaic_pos,size=224,use_gpu = opt.use_gpu)
mask = impro.mask_threshold(mask,30,threshold)
- if not opt.no_large_area:
- mask = impro.find_best_ROI(mask)
+ if not opt.all_mosaic_area:
+ mask = impro.find_mostlikely_ROI(mask)
x,y,size,area = impro.boundingSquare(mask,Ex_mul=opt.ex_mult)
rat = min(img_origin.shape[:2])/224.0
x,y,size = int(rat*x),int(rat*y),int(rat*size)
diff --git a/models/video_model_unet.py b/models/video_model_unet.py
deleted file mode 100644
index 8e338b89df83dd425b3e291dead5216998b84a8f..0000000000000000000000000000000000000000
--- a/models/video_model_unet.py
+++ /dev/null
@@ -1,108 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from .unet_parts import *
-
-
-class conv_3d(nn.Module):
- def __init__(self,inchannel,outchannel,kernel_size=3,stride=2,padding=1):
- super(conv_3d, self).__init__()
- self.conv = nn.Sequential(
- nn.Conv3d(inchannel, outchannel, kernel_size=kernel_size, stride=stride, padding=padding, bias=False),
- nn.BatchNorm3d(outchannel),
- nn.ReLU(inplace=True),
- )
-
- def forward(self, x):
- x = self.conv(x)
- return x
-
-
-class encoder_3d(nn.Module):
- def __init__(self,in_channel):
- super(encoder_3d, self).__init__()
- self.down1 = conv_3d(1, 64, 3, 2, 1)
- self.down2 = conv_3d(64, 128, 3, 2, 1)
- self.down3 = conv_3d(128, 256, 3, 2, 1)
- self.down4 = conv_3d(256, 512, 3, 2, 1)
- self.conver2d = nn.Sequential(
- nn.Conv2d(int(in_channel/16)+1, 1, kernel_size=3, stride=1, padding=1, bias=False),
- nn.BatchNorm2d(1),
- nn.ReLU(inplace=True),
- )
-
-
- def forward(self, x):
- x = x.view(x.size(0),1,x.size(1),x.size(2),x.size(3))
- x = self.down1(x)
- x = self.down2(x)
- x = self.down3(x)
- x = self.down4(x)
- x = x.view(x.size(1),x.size(2),x.size(3),x.size(4))
- x = self.conver2d(x)
- x = x.view(x.size(1),x.size(0),x.size(2),x.size(3))
- # print(x.size())
- # x = self.avgpool(x)
- return x
-
-
-
-
-class encoder_2d(nn.Module):
- def __init__(self, in_channel):
- super(encoder_2d, self).__init__()
- self.inc = inconv(in_channel, 64)
- self.down1 = down(64, 128)
- self.down2 = down(128, 256)
- self.down3 = down(256, 512)
- self.down4 = down(512, 512)
-
- def forward(self, x):
- x1 = self.inc(x)
- x2 = self.down1(x1)
- x3 = self.down2(x2)
- x4 = self.down3(x3)
- x5 = self.down4(x4)
-
- return x1,x2,x3,x4,x5
-
-class decoder_2d(nn.Module):
- def __init__(self, out_channel):
- super(decoder_2d, self).__init__()
- self.up1 = up(1024, 256,bilinear=False)
- self.up2 = up(512, 128,bilinear=False)
- self.up3 = up(256, 64,bilinear=False)
- self.up4 = up(128, 64,bilinear=False)
- self.outc = outconv(64, out_channel)
-
- def forward(self,x5,x4,x3,x2,x1):
- x = self.up1(x5, x4)
- x = self.up2(x, x3)
- x = self.up3(x, x2)
- x = self.up4(x, x1)
- x = self.outc(x)
-
- return x
-
-
-class HypoNet(nn.Module):
- def __init__(self, in_channel, out_channel):
- super(HypoNet, self).__init__()
-
- self.encoder_2d = encoder_2d(4)
- self.encoder_3d = encoder_3d(in_channel)
- self.decoder_2d = decoder_2d(out_channel)
-
- def forward(self, x):
-
- N = int((x.size()[1])/3)
- x_2d = torch.cat((x[:,int((N-1)/2)*3:(int((N-1)/2)+1)*3,:,:], x[:,N-1:N,:,:]), 1)
- # print(x_2d.size())
- x_3d = self.encoder_3d(x)
-
- x1,x2,x3,x4,x5 = self.encoder_2d(x_2d)
- x5 = x5 + x_3d
- x_2d = self.decoder_2d(x5,x4,x3,x2,x1)
-
- return x_2d
-
diff --git a/train/clean/train.py b/train/clean/train.py
index 587e55b1219114c70c17e2c0595d1916a446a3c3..962aeeb12cb13e1df7f0a36395932adea649f850 100644
--- a/train/clean/train.py
+++ b/train/clean/train.py
@@ -12,37 +12,41 @@ sys.path.append("../..")
from util import mosaic,util,ffmpeg,filt,data
from util import image_processing as impro
from cores import Options
-from models import pix2pix_model,pix2pixHD_model,video_model,unet_model,loadmodel
+from models import pix2pix_model,pix2pixHD_model,video_model,unet_model,loadmodel,videoHD_model
from matplotlib import pyplot as plt
import torch.backends.cudnn as cudnn
-N = 25
-ITER = 10000000
-LR = 0.0002
-beta1 = 0.5
-use_gpu = True
-use_gan = False
-use_L2 = False
-CONTINUE = True
-lambda_L1 = 100.0
-lambda_gan = 0.5
+opt = Options()
+opt.parser.add_argument('--N',type=int,default=25, help='')
+opt.parser.add_argument('--lr',type=float,default=0.0002, help='')
+opt.parser.add_argument('--beta1',type=float,default=0.5, help='')
+opt.parser.add_argument('--gan', action='store_true', help='if input it, use gan')
+opt.parser.add_argument('--l2', action='store_true', help='if input it, use L2 loss')
+opt.parser.add_argument('--lambda_L1',type=float,default=100, help='')
+opt.parser.add_argument('--lambda_gan',type=float,default=1, help='')
+opt.parser.add_argument('--finesize',type=int,default=256, help='')
+opt.parser.add_argument('--loadsize',type=int,default=286, help='')
+opt.parser.add_argument('--batchsize',type=int,default=1, help='')
+opt.parser.add_argument('--perload_num',type=int,default=16, help='')
+opt.parser.add_argument('--norm',type=str,default='instance', help='')
-SAVE_FRE = 10000
-start_iter = 0
-finesize = 256
-loadsize = int(finesize*1.2)
-batchsize = 6
-perload_num = 16
-# savename = 'MosaicNet_instance_gan_256_hdD'
-savename = 'MosaicNet_instance_test'
-dir_checkpoint = 'checkpoints/'+savename
+opt.parser.add_argument('--maxiter',type=int,default=10000000, help='')
+opt.parser.add_argument('--savefreq',type=int,default=10000, help='')
+opt.parser.add_argument('--startiter',type=int,default=0, help='')
+opt.parser.add_argument('--continuetrain', action='store_true', help='')
+opt.parser.add_argument('--savename',type=str,default='MosaicNet', help='')
+
+opt = opt.getparse()
+dir_checkpoint = os.path.join('checkpoints/',opt.savename)
util.makedirs(dir_checkpoint)
+util.writelog(os.path.join(dir_checkpoint,'loss.txt'),
+ str(time.asctime(time.localtime(time.time())))+'\n'+util.opt2str(opt))
+N = opt.N
loss_sum = [0.,0.,0.,0.]
loss_plot = [[],[]]
item_plot = []
-opt = Options().getparse()
videos = os.listdir('./dataset')
videos.sort()
lengths = []
@@ -53,39 +57,39 @@ for video in videos:
#unet_128
#resnet_9blocks
#netG = pix2pix_model.define_G(3*N+1, 3, 128, 'resnet_6blocks', norm='instance',use_dropout=True, init_type='normal', gpu_ids=[])
-netG = video_model.MosaicNet(3*N+1, 3, norm='instance')
+netG = videoHD_model.MosaicNet(3*N+1, 3, norm=opt.norm)
loadmodel.show_paramsnumber(netG,'netG')
# netG = unet_model.UNet(3*N+1, 3)
-if use_gan:
- netD = pix2pixHD_model.define_D(6, 64, 3, norm='instance', use_sigmoid=False, num_D=2)
+if opt.gan:
+ netD = pix2pixHD_model.define_D(6, 64, 3, norm=opt.norm, use_sigmoid=False, num_D=2)
#netD = pix2pix_model.define_D(3*2+1, 64, 'pixel', norm='instance')
#netD = pix2pix_model.define_D(3*2, 64, 'basic', norm='instance')
#netD = pix2pix_model.define_D(3*2+1, 64, 'n_layers', n_layers_D=5, norm='instance')
-if CONTINUE:
+if opt.continuetrain:
if not os.path.isfile(os.path.join(dir_checkpoint,'last_G.pth')):
- CONTINUE = False
+ opt.continuetrain = False
print('can not load last_G, training on init weight.')
-if CONTINUE:
+if opt.continuetrain:
netG.load_state_dict(torch.load(os.path.join(dir_checkpoint,'last_G.pth')))
- if use_gan:
+ if opt.gan:
netD.load_state_dict(torch.load(os.path.join(dir_checkpoint,'last_D.pth')))
f = open(os.path.join(dir_checkpoint,'iter'),'r')
- start_iter = int(f.read())
+ opt.startiter = int(f.read())
f.close()
-optimizer_G = torch.optim.Adam(netG.parameters(), lr=LR,betas=(beta1, 0.999))
+optimizer_G = torch.optim.Adam(netG.parameters(), lr=opt.lr,betas=(opt.beta1, 0.999))
criterion_L1 = nn.L1Loss()
criterion_L2 = nn.MSELoss()
-if use_gan:
- optimizer_D = torch.optim.Adam(netG.parameters(), lr=LR,betas=(beta1, 0.999))
+if opt.gan:
+ optimizer_D = torch.optim.Adam(netG.parameters(), lr=opt.lr,betas=(opt.beta1, 0.999))
# criterionGAN = pix2pix_model.GANLoss(gan_mode='lsgan').cuda()
criterionGAN = pix2pixHD_model.GANLoss(tensor=torch.cuda.FloatTensor)
netD.train()
-if use_gpu:
+if opt.use_gpu:
netG.cuda()
- if use_gan:
+ if opt.gan:
netD.cuda()
criterionGAN.cuda()
cudnn.benchmark = True
@@ -93,22 +97,22 @@ if use_gpu:
def loaddata():
video_index = random.randint(0,len(videos)-1)
video = videos[video_index]
- img_index = random.randint(N,lengths[video_index]- N)
- input_img = np.zeros((loadsize,loadsize,3*N+1), dtype='uint8')
+ img_index = random.randint(int(N/2)+1,lengths[video_index]- int(N/2)-1)
+ input_img = np.zeros((opt.loadsize,opt.loadsize,3*N+1), dtype='uint8')
for i in range(0,N):
img = cv2.imread('./dataset/'+video+'/mosaic/output_'+'%05d'%(img_index+i-int(N/2))+'.png')
- img = impro.resize(img,loadsize)
+ img = impro.resize(img,opt.loadsize)
input_img[:,:,i*3:(i+1)*3] = img
mask = cv2.imread('./dataset/'+video+'/mask/output_'+'%05d'%(img_index)+'.png',0)
- mask = impro.resize(mask,loadsize)
+ mask = impro.resize(mask,opt.loadsize)
mask = impro.mask_threshold(mask,15,128)
input_img[:,:,-1] = mask
ground_true = cv2.imread('./dataset/'+video+'/ori/output_'+'%05d'%(img_index)+'.png')
- ground_true = impro.resize(ground_true,loadsize)
+ ground_true = impro.resize(ground_true,opt.loadsize)
- input_img,ground_true = data.random_transform_video(input_img,ground_true,finesize,N)
+ input_img,ground_true = data.random_transform_video(input_img,ground_true,opt.finesize,N)
input_img = data.im2tensor(input_img,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False,is0_1=False)
ground_true = data.im2tensor(ground_true,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False,is0_1=False)
@@ -116,17 +120,17 @@ def loaddata():
print('preloading data, please wait 5s...')
-if perload_num <= batchsize:
- perload_num = batchsize*2
-input_imgs = torch.rand(perload_num,N*3+1,finesize,finesize).cuda()
-ground_trues = torch.rand(perload_num,3,finesize,finesize).cuda()
+if opt.perload_num <= opt.batchsize:
+ opt.perload_num = opt.batchsize*2
+input_imgs = torch.rand(opt.perload_num,N*3+1,opt.finesize,opt.finesize).cuda()
+ground_trues = torch.rand(opt.perload_num,3,opt.finesize,opt.finesize).cuda()
load_cnt = 0
def preload():
global load_cnt
while 1:
try:
- ran = random.randint(0, perload_num-1)
+ ran = random.randint(0, opt.perload_num-1)
input_imgs[ran],ground_trues[ran] = loaddata()
load_cnt += 1
# time.sleep(0.1)
@@ -139,24 +143,24 @@ t.daemon = True
t.start()
time_start=time.time()
-while load_cnt < perload_num:
+while load_cnt < opt.perload_num:
time.sleep(0.1)
time_end=time.time()
-print('load speed:',round((time_end-time_start)/perload_num,3),'s/it')
+print('load speed:',round((time_end-time_start)/opt.perload_num,3),'s/it')
util.copyfile('./train.py', os.path.join(dir_checkpoint,'train.py'))
-util.copyfile('../../models/video_model.py', os.path.join(dir_checkpoint,'model.py'))
+util.copyfile('../../models/videoHD_model.py', os.path.join(dir_checkpoint,'model.py'))
netG.train()
time_start=time.time()
print("Begin training...")
-for iter in range(start_iter+1,ITER):
+for iter in range(opt.startiter+1,opt.maxiter):
- ran = random.randint(0, perload_num-batchsize-1)
- inputdata = input_imgs[ran:ran+batchsize].clone()
- target = ground_trues[ran:ran+batchsize].clone()
+ ran = random.randint(0, opt.perload_num-opt.batchsize-1)
+ inputdata = input_imgs[ran:ran+opt.batchsize].clone()
+ target = ground_trues[ran:ran+opt.batchsize].clone()
- if use_gan:
+ if opt.gan:
# compute fake images: G(A)
pred = netG(inputdata)
# update D
@@ -186,12 +190,12 @@ for iter in range(start_iter+1,ITER):
real_A = inputdata[:,int((N-1)/2)*3:(int((N-1)/2)+1)*3,:,:]
fake_AB = torch.cat((real_A, pred), 1)
pred_fake = netD(fake_AB)
- loss_G_GAN = criterionGAN(pred_fake, True)*lambda_gan
+ loss_G_GAN = criterionGAN(pred_fake, True)*opt.lambda_gan
# Second, G(A) = B
- if use_L2:
- loss_G_L1 = (criterion_L1(pred, target)+criterion_L2(pred, target)) * lambda_L1
+ if opt.l2:
+ loss_G_L1 = (criterion_L1(pred, target)+criterion_L2(pred, target)) * opt.lambda_L1
else:
- loss_G_L1 = criterion_L1(pred, target) * lambda_L1
+ loss_G_L1 = criterion_L1(pred, target) * opt.lambda_L1
# combine loss and calculate gradients
loss_G = loss_G_GAN + loss_G_L1
loss_sum[0] += loss_G_L1.item()
@@ -202,10 +206,10 @@ for iter in range(start_iter+1,ITER):
else:
pred = netG(inputdata)
- if use_L2:
- loss_G_L1 = (criterion_L1(pred, target)+criterion_L2(pred, target)) * lambda_L1
+ if opt.l2:
+ loss_G_L1 = (criterion_L1(pred, target)+criterion_L2(pred, target)) * opt.lambda_L1
else:
- loss_G_L1 = criterion_L1(pred, target) * lambda_L1
+ loss_G_L1 = criterion_L1(pred, target) * opt.lambda_L1
loss_sum[0] += loss_G_L1.item()
optimizer_G.zero_grad()
@@ -215,15 +219,16 @@ for iter in range(start_iter+1,ITER):
if (iter+1)%100 == 0:
try:
data.showresult(inputdata[:,int((N-1)/2)*3:(int((N-1)/2)+1)*3,:,:],
- target, pred,os.path.join(dir_checkpoint,'result_train.png'))
+ target, pred,os.path.join(dir_checkpoint,'result_train.jpg'))
except Exception as e:
print(e)
if (iter+1)%1000 == 0:
time_end = time.time()
- if use_gan:
- print('iter:',iter+1,' L1_loss:', round(loss_sum[0]/1000,4),' G_loss:', round(loss_sum[1]/1000,4),
- ' D_f:',round(loss_sum[2]/1000,4),' D_r:',round(loss_sum[3]/1000,4),' time:',round((time_end-time_start)/1000,2))
+ if opt.gan:
+ savestr ='iter:{0:d} L1_loss:{1:.4f} G_loss:{2:.4f} D_f:{3:.4f} D_r:{4:.4f} time:{5:.2f}'.format(
+ iter+1,loss_sum[0]/1000,loss_sum[1]/1000,loss_sum[2]/1000,loss_sum[3]/1000,(time_end-time_start)/1000)
+ util.writelog(os.path.join(dir_checkpoint,'loss.txt'), savestr,True)
if (iter+1)/1000 >= 10:
loss_plot[0].append(loss_sum[0]/1000)
loss_plot[1].append(loss_sum[1]/1000)
@@ -231,18 +236,19 @@ for iter in range(start_iter+1,ITER):
try:
plt.plot(item_plot,loss_plot[0])
plt.plot(item_plot,loss_plot[1])
- plt.savefig(os.path.join(dir_checkpoint,'loss.png'))
+ plt.savefig(os.path.join(dir_checkpoint,'loss.jpg'))
plt.close()
except Exception as e:
print("error:",e)
else:
- print('iter:',iter+1,' L1_loss:',round(loss_sum[0]/1000,4),' time:',round((time_end-time_start)/1000,2))
+ savestr ='iter:{0:d} L1_loss:{1:.4f} time:{2:.2f}'.format(iter+1,loss_sum[0]/1000,(time_end-time_start)/1000)
+ util.writelog(os.path.join(dir_checkpoint,'loss.txt'), savestr,True)
if (iter+1)/1000 >= 10:
loss_plot[0].append(loss_sum[0]/1000)
item_plot.append(iter+1)
try:
plt.plot(item_plot,loss_plot[0])
- plt.savefig(os.path.join(dir_checkpoint,'loss.png'))
+ plt.savefig(os.path.join(dir_checkpoint,'loss.jpg'))
plt.close()
except Exception as e:
print("error:",e)
@@ -250,17 +256,17 @@ for iter in range(start_iter+1,ITER):
time_start=time.time()
- if (iter+1)%SAVE_FRE == 0:
- if iter+1 != SAVE_FRE:
- os.rename(os.path.join(dir_checkpoint,'last_G.pth'),os.path.join(dir_checkpoint,str(iter+1-SAVE_FRE)+'G.pth'))
+ if (iter+1)%opt.savefreq == 0:
+ if iter+1 != opt.savefreq:
+ os.rename(os.path.join(dir_checkpoint,'last_G.pth'),os.path.join(dir_checkpoint,str(iter+1-opt.savefreq)+'G.pth'))
torch.save(netG.cpu().state_dict(),os.path.join(dir_checkpoint,'last_G.pth'))
- if use_gan:
- if iter+1 != SAVE_FRE:
- os.rename(os.path.join(dir_checkpoint,'last_D.pth'),os.path.join(dir_checkpoint,str(iter+1-SAVE_FRE)+'D.pth'))
+ if opt.gan:
+ if iter+1 != opt.savefreq:
+ os.rename(os.path.join(dir_checkpoint,'last_D.pth'),os.path.join(dir_checkpoint,str(iter+1-opt.savefreq)+'D.pth'))
torch.save(netD.cpu().state_dict(),os.path.join(dir_checkpoint,'last_D.pth'))
- if use_gpu:
+ if opt.use_gpu:
netG.cuda()
- if use_gan:
+ if opt.gan:
netD.cuda()
f = open(os.path.join(dir_checkpoint,'iter'),'w+')
f.write(str(iter+1))
@@ -272,27 +278,27 @@ for iter in range(start_iter+1,ITER):
test_names = os.listdir('./test')
test_names.sort()
- result = np.zeros((finesize*2,finesize*len(test_names),3), dtype='uint8')
+ result = np.zeros((opt.finesize*2,opt.finesize*len(test_names),3), dtype='uint8')
for cnt,test_name in enumerate(test_names,0):
img_names = os.listdir(os.path.join('./test',test_name,'image'))
img_names.sort()
- inputdata = np.zeros((finesize,finesize,3*N+1), dtype='uint8')
+ inputdata = np.zeros((opt.finesize,opt.finesize,3*N+1), dtype='uint8')
for i in range(0,N):
img = impro.imread(os.path.join('./test',test_name,'image',img_names[i]))
- img = impro.resize(img,finesize)
+ img = impro.resize(img,opt.finesize)
inputdata[:,:,i*3:(i+1)*3] = img
mask = impro.imread(os.path.join('./test',test_name,'mask.png'),'gray')
- mask = impro.resize(mask,finesize)
+ mask = impro.resize(mask,opt.finesize)
mask = impro.mask_threshold(mask,15,128)
inputdata[:,:,-1] = mask
- result[0:finesize,finesize*cnt:finesize*(cnt+1),:] = inputdata[:,:,int((N-1)/2)*3:(int((N-1)/2)+1)*3]
+ result[0:opt.finesize,opt.finesize*cnt:opt.finesize*(cnt+1),:] = inputdata[:,:,int((N-1)/2)*3:(int((N-1)/2)+1)*3]
inputdata = data.im2tensor(inputdata,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False,is0_1 = False)
pred = netG(inputdata)
pred = data.tensor2im(pred,rgb2bgr = False, is0_1 = False)
- result[finesize:finesize*2,finesize*cnt:finesize*(cnt+1),:] = pred
+ result[opt.finesize:opt.finesize*2,opt.finesize*cnt:opt.finesize*(cnt+1),:] = pred
- cv2.imwrite(os.path.join(dir_checkpoint,str(iter+1)+'_test.png'), result)
+ cv2.imwrite(os.path.join(dir_checkpoint,str(iter+1)+'_test.jpg'), result)
netG.train()
diff --git a/clean_cache.py b/util/clean_cache.py
similarity index 63%
rename from clean_cache.py
rename to util/clean_cache.py
index a6f5104fc62c647aa1c2f270338b07702e2b4a3b..80c47b93d2693997cba79b3f4328d970baa572a3 100644
--- a/clean_cache.py
+++ b/util/clean_cache.py
@@ -35,16 +35,17 @@ def is_video(path):
else:
return False
-file_list,dir_list = Traversal('./')
-for file in file_list:
- if ('tmp' in file) | ('pth' in file)|('pycache' in file) | is_video(file) | is_img(file):
- if os.path.exists(file):
- if 'imgs' not in file:
- os.remove(file)
- print('remove file:',file)
+def cleanall():
+ file_list,dir_list = Traversal('./')
+ for file in file_list:
+ if ('tmp' in file) | ('pth' in file)|('pycache' in file) | is_video(file) | is_img(file):
+ if os.path.exists(file):
+ if 'imgs' not in file:
+ os.remove(file)
+ print('remove file:',file)
-for dir in dir_list:
- if ('tmp'in dir)|('pycache'in dir):
- if os.path.exists(dir):
- shutil.rmtree(dir)
- print('remove dir:',dir)
\ No newline at end of file
+ for dir in dir_list:
+ if ('tmp'in dir)|('pycache'in dir):
+ if os.path.exists(dir):
+ shutil.rmtree(dir)
+ print('remove dir:',dir)
\ No newline at end of file
diff --git a/util/data.py b/util/data.py
index b60a0d1a2f463829c7f2eff704e9b872913400b1..567c397cf7b804cca388a74c93e9e7014aff4b44 100755
--- a/util/data.py
+++ b/util/data.py
@@ -14,17 +14,21 @@ transform = transforms.Compose([
def tensor2im(image_tensor, imtype=np.uint8, gray=False, rgb2bgr = True ,is0_1 = False):
image_tensor =image_tensor.data
image_numpy = image_tensor[0].cpu().float().numpy()
- # if gray:
- # image_numpy = (image_numpy+1.0)/2.0 * 255.0
- # else:
- if image_numpy.shape[0] == 1:
- image_numpy = np.tile(image_numpy, (3, 1, 1))
-
- image_numpy = image_numpy.transpose((1, 2, 0))
-
+
if not is0_1:
image_numpy = (image_numpy + 1)/2.0
- image_numpy = np.clip(image_numpy * 255.0,0,255)
+ image_numpy = np.clip(image_numpy * 255.0,0,255)
+
+ # gray -> output 1ch
+ if gray:
+ h, w = image_numpy.shape[1:]
+ image_numpy = image_numpy.reshape(h,w)
+ return image_numpy.astype(imtype)
+
+ # output 3ch
+ if image_numpy.shape[0] == 1:
+ image_numpy = np.tile(image_numpy, (3, 1, 1))
+ image_numpy = image_numpy.transpose((1, 2, 0))
if rgb2bgr and not gray:
image_numpy = image_numpy[...,::-1]-np.zeros_like(image_numpy)
return image_numpy.astype(imtype)
diff --git a/util/ffmpeg.py b/util/ffmpeg.py
index 9b02153e567779deb145910408628fb3feb2f217..f91f888aab84f285487f6033e6b98ee0366a6ea1 100755
--- a/util/ffmpeg.py
+++ b/util/ffmpeg.py
@@ -2,20 +2,28 @@ import os,json
# ffmpeg 3.4.6
-def video2image(videopath,imagepath):
- os.system('ffmpeg -i "'+videopath+'" -f image2 '+imagepath)
+def video2image(videopath,imagepath,fps=0):
+ if fps == 0:
+ os.system('ffmpeg -i "'+videopath+'" -f image2 '+imagepath)
+ else:
+ os.system('ffmpeg -i "'+videopath+'" -r '+str(fps)+' -f image2 '+imagepath)
def video2voice(videopath,voicepath):
os.system('ffmpeg -i "'+videopath+'" -f mp3 '+voicepath)
def image2video(fps,imagepath,voicepath,videopath):
- os.system('ffmpeg -y -r '+str(fps)+' -i '+imagepath+' -vcodec libx264 -b 12M '+'./tmp/video_tmp.mp4')
+ os.system('ffmpeg -y -r '+str(fps)+' -i '+imagepath+' -vcodec libx264 '+'./tmp/video_tmp.mp4')
#os.system('ffmpeg -f image2 -i '+imagepath+' -vcodec libx264 -r '+str(fps)+' ./tmp/video_tmp.mp4')
os.system('ffmpeg -i ./tmp/video_tmp.mp4 -i "'+voicepath+'" -vcodec copy -acodec copy '+videopath)
def get_video_infos(videopath):
cmd_str = 'ffprobe -v quiet -print_format json -show_format -show_streams -i "' + videopath + '"'
- out_string = os.popen(cmd_str).read()
+ #out_string = os.popen(cmd_str).read()
+ #For chinese path in Windows
+ #https://blog.csdn.net/weixin_43903378/article/details/91979025
+ stream = os.popen(cmd_str)._stream
+ out_string = stream.buffer.read().decode(encoding='utf-8')
+
infos = json.loads(out_string)
try:
fps = eval(infos['streams'][0]['avg_frame_rate'])
@@ -28,7 +36,7 @@ def get_video_infos(videopath):
width = int(infos['streams'][1]['width'])
height = int(infos['streams'][1]['height'])
- return fps,endtime,width,height
+ return fps,endtime,height,width
def cut_video(in_path,start_time,last_time,out_path,vcodec='h265'):
if vcodec == 'copy':
diff --git a/util/image_processing.py b/util/image_processing.py
index 56e7b7c580adebfc791f44c4955e441cc8e0722a..8c6455f79831a2eba224ec0bf34351810eae3d75 100755
--- a/util/image_processing.py
+++ b/util/image_processing.py
@@ -19,7 +19,7 @@ def imread(file_path,mod = 'normal'):
elif mod == 'all':
img = cv2.imread(file_path,-1)
- #For chinese path, use cv2.imdecode in windows.
+ #In windows, for chinese path, use cv2.imdecode insteaded.
#It will loss EXIF, I can't fix it
else:
if mod == 'gray':
@@ -133,7 +133,7 @@ def mergeimage(img1,img2,orgin_image,size = 128):
result_img = cv2.add(new_img1,new_img2)
return result_img
-def find_best_ROI(mask):
+def find_mostlikely_ROI(mask):
contours,hierarchy=cv2.findContours(mask, cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)
if len(contours)>0:
areas = []
@@ -182,9 +182,9 @@ def boundingSquare(mask,Ex_mul):
center = ((point0+point1)/2).astype('int')
return center[0],center[1],halfsize,area
-def mask_threshold(mask,blur,threshold):
+def mask_threshold(mask,ex_mun,threshold):
mask = cv2.threshold(mask,threshold,255,cv2.THRESH_BINARY)[1]
- mask = cv2.blur(mask, (blur, blur))
+ mask = cv2.blur(mask, (ex_mun, ex_mun))
mask = cv2.threshold(mask,threshold/5,255,cv2.THRESH_BINARY)[1]
return mask
@@ -200,7 +200,7 @@ def mask_area(mask):
def replace_mosaic(img_origin,img_fake,x,y,size,no_father):
- img_fake = resize(img_fake,size*2)
+ img_fake = resize(img_fake,size*2,interpolation=cv2.INTER_LANCZOS4)
if no_father:
img_origin[y-size:y+size,x-size:x+size]=img_fake
img_result = img_origin
diff --git a/util/util.py b/util/util.py
index 91461684e094a44d7a011656174b2b87a3b6f336..7ec3cb87ec3451af4e3cbc9e49967a4630c9058d 100755
--- a/util/util.py
+++ b/util/util.py
@@ -40,10 +40,12 @@ def is_videos(paths):
tmp.append(path)
return tmp
-def writelog(path,log):
+def writelog(path,log,isprint=False):
f = open(path,'a+')
f.write(log+'\n')
f.close()
+ if isprint:
+ print(log)
def makedirs(path):
if os.path.isdir(path):
@@ -87,3 +89,11 @@ def copyfile(src,dst):
shutil.copyfile(src, dst)
except Exception as e:
print(e)
+
+def opt2str(opt):
+ message = ''
+ message += '---------------------- Options --------------------\n'
+ for k, v in sorted(vars(opt).items()):
+ message += '{:>25}: {:<35}\n'.format(str(k), str(v))
+ message += '----------------- End -------------------'
+ return message