From 57467a8f1ab5a172a4620615cf6bed23d9b15b39 Mon Sep 17 00:00:00 2001 From: hypox64 Date: Tue, 28 Apr 2020 23:47:45 +0800 Subject: [PATCH] New version V0.3.0 #5 #8 --- .gitignore | 1 + README.md | 17 +- README_CN.md | 12 +- cores/core.py | 17 +- cores/options.py | 6 +- deepmosaic.py | 4 +- docs/Release_notes.txt | 23 ++ docs/exe_help.md | 182 ++++++------ docs/exe_help_CN.md | 184 ++++++------ docs/how_to_train.md | 1 + docs/options_introduction.md | 72 ++--- docs/options_introduction_CN.md | 72 ++--- docs/pre-trained_models_introduction.md | 56 ++-- docs/pre-trained_models_introduction_CN.md | 56 ++-- docs/training_with_your_own_dataset.md | 73 +++++ make_datasets/csv/video_used_time.csv | 40 --- make_datasets/draw_mask.py | 121 ++++---- make_datasets/get_image_from_video.py | 22 +- make_datasets/make_pix2pix_dataset.py | 190 ++++++++++++ make_datasets/make_video_dataset.py | 145 +++++++++ .../use_addmosaic_model_make_dataset.py | 73 ----- .../use_addmosaic_model_make_video_dataset.py | 86 ------ make_datasets/use_drawn_mask_make_dataset.py | 68 ----- .../use_irregular_holes_make_dataset.py | 93 ------ models/BiSeNet_model.py | 264 +++++++++++++++++ models/__init__.py | 3 +- models/components.py | 234 +++++++++++++++ models/loadmodel.py | 39 ++- models/runmodel.py | 24 +- models/unet_model.py | 97 +++++- models/unet_parts.py | 102 ------- models/videoHD_model.py | 12 +- models/video_model.py | 1 - train/add/train.py | 276 ++++++++++-------- train/clean/train.py | 72 +++-- util/data.py | 92 +++--- util/ffmpeg.py | 17 +- util/image_processing.py | 102 +++++-- util/mosaic.py | 118 ++++---- util/util.py | 17 +- 40 files changed, 1888 insertions(+), 1196 deletions(-) create mode 100644 docs/Release_notes.txt create mode 100644 docs/how_to_train.md create mode 100644 docs/training_with_your_own_dataset.md delete mode 100644 make_datasets/csv/video_used_time.csv create mode 100644 make_datasets/make_pix2pix_dataset.py create mode 100644 make_datasets/make_video_dataset.py delete mode 100644 make_datasets/use_addmosaic_model_make_dataset.py delete mode 100644 make_datasets/use_addmosaic_model_make_video_dataset.py delete mode 100644 make_datasets/use_drawn_mask_make_dataset.py delete mode 100644 make_datasets/use_irregular_holes_make_dataset.py create mode 100644 models/BiSeNet_model.py create mode 100644 models/components.py delete mode 100755 models/unet_parts.py diff --git a/.gitignore b/.gitignore index c1a7e93..1635c56 100644 --- a/.gitignore +++ b/.gitignore @@ -154,6 +154,7 @@ result/ /pretrained_models_old /deepmosaic_window /sftp-config.json +/exe #./make_datasets /make_datasets/video /make_datasets/tmp diff --git a/README.md b/README.md index 601f838..2b5aed7 100755 --- a/README.md +++ b/README.md @@ -6,25 +6,19 @@ This porject based on "semantic segmentation" and "Image-to-Image Translation".< * [中文版README](./README_CN.md)
### More example - origin | auto add mosaic | auto clean mosaic :-:|:-:|:-: ![image](./imgs/example/lena.jpg) | ![image](./imgs/example/lena_add.jpg) | ![image](./imgs/example/lena_clean.jpg) ![image](./imgs/example/youknow.png) | ![image](./imgs/example/youknow_add.png) | ![image](./imgs/example/youknow_clean.png) - * Compared with [DeepCreamPy](https://github.com/deeppomf/DeepCreamPy) - mosaic image | DeepCreamPy | ours :-:|:-:|:-: ![image](./imgs/example/face_a_mosaic.jpg) | ![image](./imgs/example/a_dcp.png) | ![image](./imgs/example/face_a_clean.jpg) ![image](./imgs/example/face_b_mosaic.jpg) | ![image](./imgs/example/b_dcp.png) | ![image](./imgs/example/face_b_clean.jpg) - * Style Transfer - origin | to Van Gogh | to winter :-:|:-:|:-: ![image](./imgs/example/SZU.jpg) | ![image](./imgs/example/SZU_vangogh.jpg) | ![image](./imgs/example/SZU_summer2winter.jpg) - An interesting example:[Ricardo Milos to cat](https://www.bilibili.com/video/BV1Q7411W7n6) ## Run DeepMosaics @@ -33,6 +27,7 @@ You can either run DeepMosaics via pre-built binary package or from source.
### Pre-built binary package For windows, we bulid a GUI version for easy test.
Download this version and pre-trained model via [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ)
+ * [[How to use]](./docs/exe_help.md)
![image](./imgs/GUI.png)
@@ -64,11 +59,11 @@ You can download pre_trained models and put them into './pretrained_models'.
[[Introduction to pre-trained models]](./docs/pre-trained_models_introduction.md)
#### Simple example -* Add Mosaic (output video will save in './result')
+* Add Mosaic (output media will save in './result')
```bash python3 deepmosaic.py --media_path ./imgs/ruoruo.jpg --model_path ./pretrained_models/mosaic/add_face.pth --use_gpu -1 ``` -* Clean Mosaic (output video will save in './result')
+* Clean Mosaic (output media will save in './result')
```bash python3 deepmosaic.py --media_path ./result/ruoruo_add.jpg --model_path ./pretrained_models/mosaic/clean_face_HD.pth --use_gpu -1 ``` @@ -76,5 +71,9 @@ python3 deepmosaic.py --media_path ./result/ruoruo_add.jpg --model_path ./pretra If you want to test other image or video, please refer to this file.
[[options_introduction.md]](./docs/options_introduction.md)
+## Training with your own dataset +If you want to train with your own dataset, please refer to [training_with_your_own_dataset.md](./docs/training_with_your_own_dataset.md) + ## Acknowledgments -This code borrows heavily from [[pytorch-CycleGAN-and-pix2pix]](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix) [[Pytorch-UNet]](https://github.com/milesial/Pytorch-UNet)[[pix2pixHD]](https://github.com/NVIDIA/pix2pixHD). +This code borrows heavily from [[pytorch-CycleGAN-and-pix2pix]](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix) [[Pytorch-UNet]](https://github.com/milesial/Pytorch-UNet) [[pix2pixHD]](https://github.com/NVIDIA/pix2pixHD) [[BiSeNet]](https://github.com/ooooverflow/BiSeNet). + diff --git a/README_CN.md b/README_CN.md index 94a2e50..fac53c0 100644 --- a/README_CN.md +++ b/README_CN.md @@ -3,25 +3,19 @@ 这是一个通过深度学习自动的为图片/视频添加马赛克,或消除马赛克的项目.
它基于“语义分割”以及“图像翻译”.
### 更多例子 - 原始 | 自动打码 | 自动去码 :-:|:-:|:-: ![image](./imgs/example/lena.jpg) | ![image](./imgs/example/lena_add.jpg) | ![image](./imgs/example/lena_clean.jpg) ![image](./imgs/example/youknow.png) | ![image](./imgs/example/youknow_add.png) | ![image](./imgs/example/youknow_clean.png) - * 与 [DeepCreamPy](https://github.com/deeppomf/DeepCreamPy)相比较 - 马赛克图片 | DeepCreamPy | ours :-:|:-:|:-: ![image](./imgs/example/face_a_mosaic.jpg) | ![image](./imgs/example/a_dcp.png) | ![image](./imgs/example/face_a_clean.jpg) ![image](./imgs/example/face_b_mosaic.jpg) | ![image](./imgs/example/b_dcp.png) | ![image](./imgs/example/face_b_clean.jpg) - * 风格转换 - 原始 | 梵高风格 | 转化为冬天 :-:|:-:|:-: ![image](./imgs/example/SZU.jpg) | ![image](./imgs/example/SZU_vangogh.jpg) | ![image](./imgs/example/SZU_summer2winter.jpg) - 一个有意思的尝试:[香蕉君♂猫](https://www.bilibili.com/video/BV1Q7411W7n6) ## 如何运行 @@ -74,5 +68,9 @@ python3 deepmosaic.py --media_path ./result/ruoruo_add.jpg --model_path ./pretra 如果想要测试其他的图片或视频,请参照以下文件输入参数.
[[options_introduction_CN.md]](./docs/options_introduction_CN.md)
+## 使用自己的数据训练模型 +如果需要使用自己的数据训练模型,请参照 [training_with_your_own_dataset.md](./docs/training_with_your_own_dataset.md) + ## 鸣谢 -代码大量的参考了以下项目:[[pytorch-CycleGAN-and-pix2pix]](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix) [[Pytorch-UNet]](https://github.com/milesial/Pytorch-UNet)[[pix2pixHD]](https://github.com/NVIDIA/pix2pixHD). \ No newline at end of file +代码大量的参考了以下项目:[[pytorch-CycleGAN-and-pix2pix]](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix) [[Pytorch-UNet]](https://github.com/milesial/Pytorch-UNet) [[pix2pixHD]](https://github.com/NVIDIA/pix2pixHD) [[BiSeNet]](https://github.com/ooooverflow/BiSeNet). + diff --git a/cores/core.py b/cores/core.py index 22a408a..d48ae51 100644 --- a/cores/core.py +++ b/cores/core.py @@ -38,7 +38,7 @@ def addmosaic_video(opt,netS): positions = [] for i,imagepath in enumerate(imagepaths,1): img = impro.imread(os.path.join('./tmp/video2image',imagepath)) - mask,x,y,area = runmodel.get_ROI_position(img,netS,opt) + mask,x,y,size,area = runmodel.get_ROI_position(img,netS,opt) positions.append([x,y,area]) cv2.imwrite(os.path.join('./tmp/ROI_mask',imagepath),mask) print('\r','Find ROI location:'+str(i)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=35),end='') @@ -110,7 +110,7 @@ def cleanmosaic_img(opt,netG,netM): print('Clean Mosaic:',path) img_origin = impro.imread(path) x,y,size,mask = runmodel.get_mosaic_position(img_origin,netM,opt) - #cv2.imwrite('./mask/'+os.path.basename(path), mask) + cv2.imwrite('./mask/'+os.path.basename(path), mask) img_result = img_origin.copy() if size != 0 : img_mosaic = img_origin[y-size:y+size,x-size:x+size] @@ -118,7 +118,7 @@ def cleanmosaic_img(opt,netG,netM): img_fake = runmodel.traditional_cleaner(img_mosaic,opt) else: img_fake = runmodel.run_pix2pix(img_mosaic,netG,opt) - img_result = impro.replace_mosaic(img_origin,img_fake,x,y,size,opt.no_feather) + img_result = impro.replace_mosaic(img_origin,img_fake,mask,x,y,size,opt.no_feather) else: print('Do not find mosaic') impro.imwrite(os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_clean.jpg'),img_result) @@ -126,7 +126,7 @@ def cleanmosaic_img(opt,netG,netM): def cleanmosaic_video_byframe(opt,netG,netM): path = opt.media_path fps,imagepaths = video_init(opt,path)[:2] - positions = get_mosaic_positions(opt,netM,imagepaths,savemask=False) + positions = get_mosaic_positions(opt,netM,imagepaths,savemask=True) # clean mosaic for i,imagepath in enumerate(imagepaths,0): x,y,size = positions[i][0],positions[i][1],positions[i][2] @@ -138,7 +138,8 @@ def cleanmosaic_video_byframe(opt,netG,netM): img_fake = runmodel.traditional_cleaner(img_mosaic,opt) else: img_fake = runmodel.run_pix2pix(img_mosaic,netG,opt) - img_result = impro.replace_mosaic(img_origin,img_fake,x,y,size,opt.no_feather) + mask = cv2.imread(os.path.join('./tmp/mosaic_mask',imagepath),0) + img_result = impro.replace_mosaic(img_origin,img_fake,mask,x,y,size,opt.no_feather) cv2.imwrite(os.path.join('./tmp/replace_mosaic',imagepath),img_result) print('\r','Clean Mosaic:'+str(i+1)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=35),end='') print() @@ -178,13 +179,13 @@ def cleanmosaic_video_fusion(opt,netG,netM): mosaic_input = np.zeros((INPUT_SIZE,INPUT_SIZE,3*N+1), dtype='uint8') mosaic_input[:,:,0:N*3] = impro.resize(img_pool[y-size:y+size,x-size:x+size,:], INPUT_SIZE) - mask = impro.resize(mask,np.min(img_origin.shape[:2]))[y-size:y+size,x-size:x+size] - mosaic_input[:,:,-1] = impro.resize(mask, INPUT_SIZE) + mask_input = impro.resize(mask,np.min(img_origin.shape[:2]))[y-size:y+size,x-size:x+size] + mosaic_input[:,:,-1] = impro.resize(mask_input, INPUT_SIZE) mosaic_input = data.im2tensor(mosaic_input,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False,is0_1 = False) unmosaic_pred = netG(mosaic_input) img_fake = data.tensor2im(unmosaic_pred,rgb2bgr = False ,is0_1 = False) - img_result = impro.replace_mosaic(img_origin,img_fake,x,y,size,opt.no_feather) + img_result = impro.replace_mosaic(img_origin,img_fake,mask,x,y,size,opt.no_feather) cv2.imwrite(os.path.join('./tmp/replace_mosaic',imagepath),img_result) print('\r','Clean Mosaic:'+str(i+1)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=35),end='') print() diff --git a/cores/options.py b/cores/options.py index b3cb051..d0a4a77 100644 --- a/cores/options.py +++ b/cores/options.py @@ -16,17 +16,17 @@ class Options(): self.parser.add_argument('--mode', type=str, default='auto',help='Program running mode. auto | add | clean | style') self.parser.add_argument('--model_path', type=str, default='./pretrained_models/mosaic/add_face.pth',help='pretrained model path') self.parser.add_argument('--result_dir', type=str, default='./result',help='output media will be saved here') - self.parser.add_argument('--tempimage_type', type=str, default='png',help='type of temp image, png | jpg, png is better but occupy more storage space') + self.parser.add_argument('--tempimage_type', type=str, default='jpg',help='type of temp image, png | jpg, png is better but occupy more storage space') self.parser.add_argument('--netG', type=str, default='auto', help='select model to use for netG(Clean mosaic and Transfer style) -> auto | unet_128 | unet_256 | resnet_9blocks | HD | video') self.parser.add_argument('--fps', type=int, default=0,help='read and output fps, if 0-> origin') self.parser.add_argument('--output_size', type=int, default=0,help='size of output media, if 0 -> origin') - + self.parser.add_argument('--mask_threshold', type=int, default=64,help='threshold of recognize clean or add mosaic position 0~255') + #AddMosaic self.parser.add_argument('--mosaic_mod', type=str, default='squa_avg',help='type of mosaic -> squa_avg | squa_random | squa_avg_circle_edge | rect_avg | random') self.parser.add_argument('--mosaic_size', type=int, default=0,help='mosaic size,if 0 auto size') self.parser.add_argument('--mask_extend', type=int, default=10,help='extend mosaic area') - self.parser.add_argument('--mask_threshold', type=int, default=64,help='threshold of recognize mosaic position 0~255') #CleanMosaic self.parser.add_argument('--mosaic_position_model_path', type=str, default='auto',help='name of model use to find mosaic position') diff --git a/deepmosaic.py b/deepmosaic.py index 58a6031..a92f941 100644 --- a/deepmosaic.py +++ b/deepmosaic.py @@ -15,7 +15,7 @@ def main(): else: files = [opt.media_path] if opt.mode == 'add': - netS = loadmodel.unet(opt) + netS = loadmodel.bisenet(opt,'roi') for file in files: opt.media_path = file if util.is_img(file): @@ -26,7 +26,7 @@ def main(): print('This type of file is not supported') elif opt.mode == 'clean': - netM = loadmodel.unet_clean(opt) + netM = loadmodel.bisenet(opt,'mosaic') if opt.traditional: netG = None elif opt.netG == 'video': diff --git a/docs/Release_notes.txt b/docs/Release_notes.txt new file mode 100644 index 0000000..b62014f --- /dev/null +++ b/docs/Release_notes.txt @@ -0,0 +1,23 @@ +DeepMosaics V0.3.0 +Core program building with windows10_1703_x86_64 + + python 3.68 + + pyinstaller 3.5 +GUI building with C# +For more detail, please view on github: https://github.com/HypoX64/DeepMosaics + +Releases History + V0.3.0 + 1. Support BiSeNet(Better recognition of mosaics). + 2. New videoHD model. + 3. Better feathering method. + V0.2.0 + 1. Add video model. + 2. Now you can input chinese path + 3. Support style transfer + 4. Support fps limit + V0.1.2 + 1. Support pix2pixHD model + V0.1.1 + 1. Check path, can't input illegal path + V0.1.0 + 1. Initial release. \ No newline at end of file diff --git a/docs/exe_help.md b/docs/exe_help.md index 3f9f242..98ee6a2 100644 --- a/docs/exe_help.md +++ b/docs/exe_help.md @@ -1,92 +1,92 @@ -## DeepMosaics.exe Instructions -[[中文版]](./exe_help_CN.md) -This is a GUI version compiled in Windows.
-Download this version and pre-trained model via [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ)
-Attentions:
- - - Require Windows_x86_64, Windows10 is better.
- - Different pre-trained models are suitable for different effects.
- - Run time depends on computer performance.
- - If output video cannot be played, you can try with [potplayer](https://daumpotplayer.com/download/).
- - GUI version update slower than source.
-### How to use -* step 1: Choose image or video. -* step 2: Choose model(Different pre-trained models are suitable for different effects) -* step3: Run program and wait. -* step4: Cheek reult in './result'. - -### Introduction to pre-trained models -* Mosaic - -| Name | Description | -| :------------------------------: | :---------------------------------------------------------: | -| add_face.pth | Add mosaic to all faces in images/videos. | -| clean_face_HD.pth | Clean mosaic to all faces in images/video.
(RAM > 8GB). | -| add_youknow.pth | Add mosaic to all (FBI Warning) in images/videos. | -| clean_youknow_resnet_9blocks.pth | Clean mosaic to all (FBI Warning) in images/videos. | -| clean_youknow_video.pth | Clean mosaic to all (FBI Warning) in videos. | -| clean_youknow_video_HD.pth | Clean mosaic to all (FBI Warning) in videos.
(RAM > 8GB) | - -* Style Transfer - -| Name | Description | -| :---------------------: | :-------------------------------------------------------: | -| style_apple2orange.pth | Convert apples to oranges. | -| style_orange2apple.pth | Convert oranges to apples | -| style_summer2winter.pth | Convert summer to winter. | -| style_winter2summer.pth | Convert winter to summer. | -| style_cezanne.pth | Convert photos/video to Paul Cézanne style. | -| style_monet.pth | Convert photos/video to Claude Monet style. | -| style_ukiyoe.pth | Convert photos/video to Ukiyoe style. | -| style_vangogh.pth | Convert photos/video to Van Gogh style. | -### Annotation -![image](../imgs/GUI_Instructions.jpg)
-* 1. Choose image or video. -* 2. Choose model(Different pre-trained models are suitable for different effects). -* 3. Program running mode. (auto | add | clean | style) -* 4. Use GPU to run deep learning model. (The current version does not support gpu, if you need to use gpu please run source). -* 5. Limit the fps of the output video(0->original fps). -* 6. More options. -* 7. More options can be input. -* 8. Run program. -* 9. Open help file. -* 10. Sponsor our project. -* 11. Version information. -* 12. Open the URL on github. - -### Introduction to options -If you need more effects, use '--option your-parameters' to enter what you need. -* Base - -| Option | Description | Default | -| :----------: | :----------------------------------------: | :-------------------------------------: | -| --use_gpu | if -1, do not use gpu | 0 | -| --media_path | your videos or images path | ./imgs/ruoruo.jpg | -| --mode | program running mode(auto/clean/add/style) | 'auto' | -| --model_path | pretrained model path | ./pretrained_models/mosaic/add_face.pth | -| --result_dir | output media will be saved here | ./result | -| --fps | read and output fps, if 0-> origin | 0 | - -* AddMosaic - -| Option | Description | Default | -| :--------------: | :----------------------------------------------------------: | :------: | -| --mosaic_mod | type of mosaic -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random | squa_avg | -| --mosaic_size | mosaic size,if 0 -> auto size | 0 | -| --mask_extend | extend mosaic area | 10 | -| --mask_threshold | threshold of recognize mosaic position 0~255 | 64 | - -* CleanMosaic - -| Option | Description | Default | -| :-----------: | :----------------------------------------------------------: | :-----: | -| --traditional | if specified, use traditional image processing methods to clean mosaic | | -| --tr_blur | ksize of blur when using traditional method, it will affect final quality | 10 | -| --tr_down | downsample when using traditional method,it will affect final quality | 10 | -| --medfilt_num | medfilt window of mosaic movement in the video | 11 | - -* Style Transfer - -| Option | Description | Default | -| :-----------: | :----------------------------------: | :-----: | +## DeepMosaics.exe Instructions +[[中文版]](./exe_help_CN.md) +This is a GUI version compiled in Windows.
+Download this version and pre-trained model via [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ)
+Attentions:
+ + - Require Windows_x86_64, Windows10 is better.
+ - Different pre-trained models are suitable for different effects.
+ - Run time depends on computer performance.
+ - If output video cannot be played, you can try with [potplayer](https://daumpotplayer.com/download/).
+ - GUI version update slower than source.
+### How to use +* step 1: Choose image or video. +* step 2: Choose model(Different pre-trained models are suitable for different effects) +* step3: Run program and wait. +* step4: Cheek reult in './result'. + +### Introduction to pre-trained models +* Mosaic + +| Name | Description | +| :------------------------------: | :---------------------------------------------------------: | +| add_face.pth | Add mosaic to all faces in images/videos. | +| clean_face_HD.pth | Clean mosaic to all faces in images/video.
(RAM > 8GB). | +| add_youknow.pth | Add mosaic to all (FBI Warning) in images/videos. | +| clean_youknow_resnet_9blocks.pth | Clean mosaic to all (FBI Warning) in images/videos. | +| clean_youknow_video.pth | Clean mosaic to all (FBI Warning) in videos. | +| clean_youknow_video_HD.pth | Clean mosaic to all (FBI Warning) in videos.
(RAM > 8GB) | + +* Style Transfer + +| Name | Description | +| :---------------------: | :-------------------------------------------------------: | +| style_apple2orange.pth | Convert apples to oranges. | +| style_orange2apple.pth | Convert oranges to apples | +| style_summer2winter.pth | Convert summer to winter. | +| style_winter2summer.pth | Convert winter to summer. | +| style_cezanne.pth | Convert photos/video to Paul Cézanne style. | +| style_monet.pth | Convert photos/video to Claude Monet style. | +| style_ukiyoe.pth | Convert photos/video to Ukiyoe style. | +| style_vangogh.pth | Convert photos/video to Van Gogh style. | +### Annotation +![image](../imgs/GUI_Instructions.jpg)
+* 1. Choose image or video. +* 2. Choose model(Different pre-trained models are suitable for different effects). +* 3. Program running mode. (auto | add | clean | style) +* 4. Use GPU to run deep learning model. (The current version does not support gpu, if you need to use gpu please run source). +* 5. Limit the fps of the output video(0->original fps). +* 6. More options. +* 7. More options can be input. +* 8. Run program. +* 9. Open help file. +* 10. Sponsor our project. +* 11. Version information. +* 12. Open the URL on github. + +### Introduction to options +If you need more effects, use '--option your-parameters' to enter what you need. +* Base + +| Option | Description | Default | +| :----------: | :----------------------------------------: | :-------------------------------------: | +| --use_gpu | if -1, do not use gpu | 0 | +| --media_path | your videos or images path | ./imgs/ruoruo.jpg | +| --mode | program running mode(auto/clean/add/style) | 'auto' | +| --model_path | pretrained model path | ./pretrained_models/mosaic/add_face.pth | +| --result_dir | output media will be saved here | ./result | +| --fps | read and output fps, if 0-> origin | 0 | + +* AddMosaic + +| Option | Description | Default | +| :--------------: | :----------------------------------------------------------: | :------: | +| --mosaic_mod | type of mosaic -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random | squa_avg | +| --mosaic_size | mosaic size,if 0 -> auto size | 0 | +| --mask_extend | extend mosaic area | 10 | +| --mask_threshold | threshold of recognize mosaic position 0~255 | 64 | + +* CleanMosaic + +| Option | Description | Default | +| :-----------: | :----------------------------------------------------------: | :-----: | +| --traditional | if specified, use traditional image processing methods to clean mosaic | | +| --tr_blur | ksize of blur when using traditional method, it will affect final quality | 10 | +| --tr_down | downsample when using traditional method,it will affect final quality | 10 | +| --medfilt_num | medfilt window of mosaic movement in the video | 11 | + +* Style Transfer + +| Option | Description | Default | +| :-----------: | :----------------------------------: | :-----: | | --output_size | size of output media, if 0 -> origin | 512 | \ No newline at end of file diff --git a/docs/exe_help_CN.md b/docs/exe_help_CN.md index a160c72..a38b6e5 100644 --- a/docs/exe_help_CN.md +++ b/docs/exe_help_CN.md @@ -1,93 +1,93 @@ -## DeepMosaics.exe 使用说明 -下载程序以及预训练模型 [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ)
-注意事项:
- - - 程序的运行要求在64位Windows操作系统,我仅在Windows10运行过,其他版本暂未经过测试
- - 请根据需求选择合适的预训练模型进行测试
- - 运行时间取决于电脑性能,对于视频文件,我们建议使用源码以及GPU运行
- - 如果输出的视频无法播放,这边建议您尝试[potplayer](https://daumpotplayer.com/download/).
- - 相比于源码,该版本的更新将会延后. - -### 如何使用 -* step 1: 选择需要处理的图片或视频 -* step 2: 选择预训练模型(不同的预训练模型有不同的效果) -* step3: 运行程序并等待 -* step4: 查看结果(储存在result文件夹下) - -## 预训练模型说明 -当前的预训练模型分为两类——添加/移除马赛克以及风格转换. - -* 马赛克 - -| 文件名 | 描述 | -| :------------------------------: | :-------------------------------------------: | -| add_face.pth | 对图片或视频中的脸部打码 | -| clean_face_HD.pth | 对图片或视频中的脸部去码
(要求内存 > 8GB). | -| add_youknow.pth | 对图片或视频中的十八禁内容打码 | -| clean_youknow_resnet_9blocks.pth | 对图片或视频中的十八禁内容去码 | -| clean_youknow_video.pth | 对视频中的十八禁内容去码 | -| clean_youknow_video_HD.pth | 对视频中的十八禁内容去码
(要求内存 > 8GB) | - -* 风格转换 - -| 文件名 | 描述 | -| :---------------------: | :-------------------------------------------------------: | -| style_apple2orange.pth | 苹果变橙子 | -| style_orange2apple.pth | 橙子变苹果 | -| style_summer2winter.pth | 夏天变冬天 | -| style_winter2summer.pth | 冬天变夏天 | -| style_cezanne.pth | 转化为Paul Cézanne 的绘画风格 | -| style_monet.pth | 转化为Claude Monet的绘画风格 | -| style_ukiyoe.pth | 转化为Ukiyoe的绘画风格 | -| style_vangogh.pth | 转化为Van Gogh的绘画风格 | - -### GUI界面注释 -![image](../imgs/GUI_Instructions.jpg)
-* 1. 选择需要处理的图片或视频 -* 2. 选择预训练模型 -* 3. 程序运行模式 (auto | add | clean | style) -* 4. 使用GPU (该版本目前不支持GPU,若需要使用GPU请使用源码运行). -* 5. 限制输出的视频帧率(0->原始帧率). -* 6. 更多的选项以及参数 -* 7. 自行输入更多参数,详见下文 -* 8. 运行 -* 9. 打开帮助文件 -* 10. 支持我们 -* 11. 版本信息 -* 12. 打开项目的github页面 - -### 参数说明 -如果需要更多的效果, 请按照 '--option your-parameters' 输入所需要的参数 -* 基本 - -| 选项 | 描述 | 默认 | -| :----------: | :------------------------: | :-------------------------------------: | -| --use_gpu | if -1, do not use gpu | 0 | -| --media_path | 需要处理的视频或者照片的路径 | ./imgs/ruoruo.jpg | -| --mode | 运行模式(auto/clean/add/style) | 'auto' | -| --model_path | 预训练模型的路径 | ./pretrained_models/mosaic/add_face.pth | -| --result_dir | 保存路径 | ./result | -| --fps | 限制视频输出的fps,0则为默认 | 0 | -* 添加马赛克 - -| 选项 | 描述 | 默认 | -| :----------: | :------------------------: | :-------------------------------------: | -| --mosaic_mod | 马赛克类型 -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random | squa_avg | -| --mosaic_size | 马赛克大小,0则为自动 | 0 | -| --mask_extend | 拓展马赛克区域 | 10 | -| --mask_threshold | 马赛克区域识别阈值 0~255 | 64 | - -* 去除马赛克 - -| 选项 | 描述 | 默认 | -| :----------: | :------------------------: | :-------------------------------------: | -| --traditional | 如果输入这个参数则使用传统方法清除马赛克 | | -| --tr_blur | 传统方法模糊尺寸 | 10 | -| --tr_down | 传统方法下采样尺寸 | 10 | -| --medfilt_num | medfilt window of mosaic movement in the video | 11 | - -* 风格转换 - -| 选项 | 描述 | 默认 | -| :----------: | :------------------------: | :-------------------------------------: | +## DeepMosaics.exe 使用说明 +下载程序以及预训练模型 [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ)
+注意事项:
+ + - 程序的运行要求在64位Windows操作系统,我仅在Windows10运行过,其他版本暂未经过测试
+ - 请根据需求选择合适的预训练模型进行测试
+ - 运行时间取决于电脑性能,对于视频文件,我们建议使用源码以及GPU运行
+ - 如果输出的视频无法播放,这边建议您尝试[potplayer](https://daumpotplayer.com/download/).
+ - 相比于源码,该版本的更新将会延后. + +### 如何使用 +* step 1: 选择需要处理的图片或视频 +* step 2: 选择预训练模型(不同的预训练模型有不同的效果) +* step3: 运行程序并等待 +* step4: 查看结果(储存在result文件夹下) + +## 预训练模型说明 +当前的预训练模型分为两类——添加/移除马赛克以及风格转换. + +* 马赛克 + +| 文件名 | 描述 | +| :------------------------------: | :-------------------------------------------: | +| add_face.pth | 对图片或视频中的脸部打码 | +| clean_face_HD.pth | 对图片或视频中的脸部去码
(要求内存 > 8GB). | +| add_youknow.pth | 对图片或视频中的十八禁内容打码 | +| clean_youknow_resnet_9blocks.pth | 对图片或视频中的十八禁内容去码 | +| clean_youknow_video.pth | 对视频中的十八禁内容去码 | +| clean_youknow_video_HD.pth | 对视频中的十八禁内容去码
(要求内存 > 8GB) | + +* 风格转换 + +| 文件名 | 描述 | +| :---------------------: | :-------------------------------------------------------: | +| style_apple2orange.pth | 苹果变橙子 | +| style_orange2apple.pth | 橙子变苹果 | +| style_summer2winter.pth | 夏天变冬天 | +| style_winter2summer.pth | 冬天变夏天 | +| style_cezanne.pth | 转化为Paul Cézanne 的绘画风格 | +| style_monet.pth | 转化为Claude Monet的绘画风格 | +| style_ukiyoe.pth | 转化为Ukiyoe的绘画风格 | +| style_vangogh.pth | 转化为Van Gogh的绘画风格 | + +### GUI界面注释 +![image](../imgs/GUI_Instructions.jpg)
+* 1. 选择需要处理的图片或视频 +* 2. 选择预训练模型 +* 3. 程序运行模式 (auto | add | clean | style) +* 4. 使用GPU (该版本目前不支持GPU,若需要使用GPU请使用源码运行). +* 5. 限制输出的视频帧率(0->原始帧率). +* 6. 更多的选项以及参数 +* 7. 自行输入更多参数,详见下文 +* 8. 运行 +* 9. 打开帮助文件 +* 10. 支持我们 +* 11. 版本信息 +* 12. 打开项目的github页面 + +### 参数说明 +如果需要更多的效果, 请按照 '--option your-parameters' 输入所需要的参数 +* 基本 + +| 选项 | 描述 | 默认 | +| :----------: | :------------------------: | :-------------------------------------: | +| --use_gpu | if -1, do not use gpu | 0 | +| --media_path | 需要处理的视频或者照片的路径 | ./imgs/ruoruo.jpg | +| --mode | 运行模式(auto/clean/add/style) | 'auto' | +| --model_path | 预训练模型的路径 | ./pretrained_models/mosaic/add_face.pth | +| --result_dir | 保存路径 | ./result | +| --fps | 限制视频输出的fps,0则为默认 | 0 | +* 添加马赛克 + +| 选项 | 描述 | 默认 | +| :----------: | :------------------------: | :-------------------------------------: | +| --mosaic_mod | 马赛克类型 -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random | squa_avg | +| --mosaic_size | 马赛克大小,0则为自动 | 0 | +| --mask_extend | 拓展马赛克区域 | 10 | +| --mask_threshold | 马赛克区域识别阈值 0~255 | 64 | + +* 去除马赛克 + +| 选项 | 描述 | 默认 | +| :----------: | :------------------------: | :-------------------------------------: | +| --traditional | 如果输入这个参数则使用传统方法清除马赛克 | | +| --tr_blur | 传统方法模糊尺寸 | 10 | +| --tr_down | 传统方法下采样尺寸 | 10 | +| --medfilt_num | medfilt window of mosaic movement in the video | 11 | + +* 风格转换 + +| 选项 | 描述 | 默认 | +| :----------: | :------------------------: | :-------------------------------------: | | --output_size | 输出媒体的尺寸,如果是0则为原始尺寸 |512| \ No newline at end of file diff --git a/docs/how_to_train.md b/docs/how_to_train.md new file mode 100644 index 0000000..ec94773 --- /dev/null +++ b/docs/how_to_train.md @@ -0,0 +1 @@ +### make datasets diff --git a/docs/options_introduction.md b/docs/options_introduction.md index 410d136..95ccfc0 100644 --- a/docs/options_introduction.md +++ b/docs/options_introduction.md @@ -1,37 +1,37 @@ -## Introduction to options -If you need more effects, use '--option your-parameters' to enter what you need. - -### Base - -| Option | Description | Default | -| :----------: | :------------------------: | :-------------------------------------: | -| --use_gpu | if -1, do not use gpu | 0 | -| --media_path | your videos or images path | ./imgs/ruoruo.jpg | -| --mode | program running mode(auto/clean/add/style) | 'auto' | -| --model_path | pretrained model path | ./pretrained_models/mosaic/add_face.pth | -| --result_dir | output media will be saved here| ./result | -| --fps | read and output fps, if 0-> origin | 0 | - -### AddMosaic - -| Option | Description | Default | -| :----------: | :------------------------: | :-------------------------------------: | -| --mosaic_mod | type of mosaic -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random | squa_avg | -| --mosaic_size | mosaic size,if 0 -> auto size | 0 | -| --mask_extend | extend mosaic area | 10 | -| --mask_threshold | threshold of recognize mosaic position 0~255 | 64 | - -### CleanMosaic - -| Option | Description | Default | -| :----------: | :------------------------: | :-------------------------------------: | -| --traditional | if specified, use traditional image processing methods to clean mosaic | | -| --tr_blur | ksize of blur when using traditional method, it will affect final quality | 10 | -| --tr_down | downsample when using traditional method,it will affect final quality | 10 | -| --medfilt_num | medfilt window of mosaic movement in the video | 11 | - -### Style Transfer - -| Option | Description | Default | -| :----------: | :------------------------: | :-------------------------------------: | +## Introduction to options +If you need more effects, use '--option your-parameters' to enter what you need. + +### Base + +| Option | Description | Default | +| :----------: | :------------------------: | :-------------------------------------: | +| --use_gpu | if -1, do not use gpu | 0 | +| --media_path | your videos or images path | ./imgs/ruoruo.jpg | +| --mode | program running mode(auto/clean/add/style) | 'auto' | +| --model_path | pretrained model path | ./pretrained_models/mosaic/add_face.pth | +| --result_dir | output media will be saved here| ./result | +| --fps | read and output fps, if 0-> origin | 0 | + +### AddMosaic + +| Option | Description | Default | +| :----------: | :------------------------: | :-------------------------------------: | +| --mosaic_mod | type of mosaic -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random | squa_avg | +| --mosaic_size | mosaic size,if 0 -> auto size | 0 | +| --mask_extend | extend mosaic area | 10 | +| --mask_threshold | threshold of recognize mosaic position 0~255 | 64 | + +### CleanMosaic + +| Option | Description | Default | +| :----------: | :------------------------: | :-------------------------------------: | +| --traditional | if specified, use traditional image processing methods to clean mosaic | | +| --tr_blur | ksize of blur when using traditional method, it will affect final quality | 10 | +| --tr_down | downsample when using traditional method,it will affect final quality | 10 | +| --medfilt_num | medfilt window of mosaic movement in the video | 11 | + +### Style Transfer + +| Option | Description | Default | +| :----------: | :------------------------: | :-------------------------------------: | | --output_size | size of output media, if 0 -> origin |512| \ No newline at end of file diff --git a/docs/options_introduction_CN.md b/docs/options_introduction_CN.md index b71e079..7695740 100644 --- a/docs/options_introduction_CN.md +++ b/docs/options_introduction_CN.md @@ -1,37 +1,37 @@ -## 参数说明 -如果需要更多的效果, 请按照 '--option your-parameters' 输入所需要的参数 - -### 基本 - -| 选项 | 描述 | 默认 | -| :----------: | :------------------------: | :-------------------------------------: | -| --use_gpu | if -1, do not use gpu | 0 | -| --media_path | 需要处理的视频或者照片的路径 | ./imgs/ruoruo.jpg | -| --mode | 运行模式(auto/clean/add/style) | 'auto' | -| --model_path | 预训练模型的路径 | ./pretrained_models/mosaic/add_face.pth | -| --result_dir | 保存路径 | ./result | -| --fps | 限制视频输出的fps,0则为默认 | 0 | - -### 添加马赛克 - -| 选项 | 描述 | 默认 | -| :----------: | :------------------------: | :-------------------------------------: | -| --mosaic_mod | 马赛克类型 -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random | squa_avg | -| --mosaic_size | 马赛克大小,0则为自动 | 0 | -| --mask_extend | 拓展马赛克区域 | 10 | -| --mask_threshold | 马赛克区域识别阈值 0~255 | 64 | - -### 去除马赛克 - -| 选项 | 描述 | 默认 | -| :----------: | :------------------------: | :-------------------------------------: | -| --traditional | 如果输入这个参数则使用传统方法清除马赛克 | | -| --tr_blur | 传统方法模糊尺寸 | 10 | -| --tr_down | 传统方法下采样尺寸 | 10 | -| --medfilt_num | medfilt window of mosaic movement in the video | 11 | - -### 风格转换 - -| 选项 | 描述 | 默认 | -| :----------: | :------------------------: | :-------------------------------------: | +## 参数说明 +如果需要更多的效果, 请按照 '--option your-parameters' 输入所需要的参数 + +### 基本 + +| 选项 | 描述 | 默认 | +| :----------: | :------------------------: | :-------------------------------------: | +| --use_gpu | if -1, do not use gpu | 0 | +| --media_path | 需要处理的视频或者照片的路径 | ./imgs/ruoruo.jpg | +| --mode | 运行模式(auto/clean/add/style) | 'auto' | +| --model_path | 预训练模型的路径 | ./pretrained_models/mosaic/add_face.pth | +| --result_dir | 保存路径 | ./result | +| --fps | 限制视频输出的fps,0则为默认 | 0 | + +### 添加马赛克 + +| 选项 | 描述 | 默认 | +| :----------: | :------------------------: | :-------------------------------------: | +| --mosaic_mod | 马赛克类型 -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random | squa_avg | +| --mosaic_size | 马赛克大小,0则为自动 | 0 | +| --mask_extend | 拓展马赛克区域 | 10 | +| --mask_threshold | 马赛克区域识别阈值 0~255 | 64 | + +### 去除马赛克 + +| 选项 | 描述 | 默认 | +| :----------: | :------------------------: | :-------------------------------------: | +| --traditional | 如果输入这个参数则使用传统方法清除马赛克 | | +| --tr_blur | 传统方法模糊尺寸 | 10 | +| --tr_down | 传统方法下采样尺寸 | 10 | +| --medfilt_num | medfilt window of mosaic movement in the video | 11 | + +### 风格转换 + +| 选项 | 描述 | 默认 | +| :----------: | :------------------------: | :-------------------------------------: | | --output_size | 输出媒体的尺寸,如果是0则为原始尺寸 |512| \ No newline at end of file diff --git a/docs/pre-trained_models_introduction.md b/docs/pre-trained_models_introduction.md index 0428573..2c4a3f3 100644 --- a/docs/pre-trained_models_introduction.md +++ b/docs/pre-trained_models_introduction.md @@ -1,28 +1,28 @@ -## Introduction to pre-trained models -The current pre-trained models are divided into two categories(Add/Clean mosaic and StyleTransfer). -Download pre-trained model via [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ)
- -### Mosaic - -| Name | Description | -| :------------------------------: | :---------------------------------------------------------: | -| add_face.pth | Add mosaic to all faces in images/videos. | -| clean_face_HD.pth | Clean mosaic to all faces in images/video.
(RAM > 8GB). | -| add_youknow.pth | Add mosaic to all (FBI Warning) in images/videos. | -| clean_youknow_resnet_9blocks.pth | Clean mosaic to all (FBI Warning) in images/videos. | -| clean_youknow_video.pth | Clean mosaic to all (FBI Warning) in videos. | -| clean_youknow_video_HD.pth | Clean mosaic to all (FBI Warning) in videos.
(RAM > 8GB) | - -### Style Transfer - -| Name | Description | -| :---------------------: | :-------------------------------------------------------: | -| style_apple2orange.pth | Convert apples to oranges. | -| style_orange2apple.pth | Convert oranges to apples | -| style_summer2winter.pth | Convert summer to winter. | -| style_winter2summer.pth | Convert winter to summer. | -| style_cezanne.pth | Convert photos/video to Paul Cézanne style. | -| style_monet.pth | Convert photos/video to Claude Monet style. | -| style_ukiyoe.pth | Convert photos/video to Ukiyoe style. | -| style_vangogh.pth | Convert photos/video to Van Gogh style. | - +## Introduction to pre-trained models +The current pre-trained models are divided into two categories(Add/Clean mosaic and StyleTransfer). +Download pre-trained model via [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ)
+ +### Mosaic + +| Name | Description | +| :------------------------------: | :-----------------------------------------------------: | +| add_face.pth | Add mosaic to faces in images/videos. | +| clean_face_HD.pth | Clean mosaic to faces in images/video.
(RAM > 8GB). | +| add_youknow.pth | Add mosaic to ... in images/videos. | +| clean_youknow_resnet_9blocks.pth | Clean mosaic to ... in images/videos. | +| clean_youknow_video.pth | Clean mosaic to ... in videos. | +| clean_youknow_video_HD.pth | Clean mosaic to ... in videos.
(RAM > 8GB) | + +### Style Transfer + +| Name | Description | +| :---------------------: | :-------------------------------------------------------: | +| style_apple2orange.pth | Convert apples to oranges. | +| style_orange2apple.pth | Convert oranges to apples | +| style_summer2winter.pth | Convert summer to winter. | +| style_winter2summer.pth | Convert winter to summer. | +| style_cezanne.pth | Convert photos/video to Paul Cézanne style. | +| style_monet.pth | Convert photos/video to Claude Monet style. | +| style_ukiyoe.pth | Convert photos/video to Ukiyoe style. | +| style_vangogh.pth | Convert photos/video to Van Gogh style. | + diff --git a/docs/pre-trained_models_introduction_CN.md b/docs/pre-trained_models_introduction_CN.md index fa60d1e..9156391 100644 --- a/docs/pre-trained_models_introduction_CN.md +++ b/docs/pre-trained_models_introduction_CN.md @@ -1,28 +1,28 @@ -## 预训练模型说明 -当前的预训练模型分为两类——添加/移除马赛克以及风格转换. -可以通过以下方式下载预训练模型 [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ)
- -### 马赛克 - -| 文件名 | 描述 | -| :------------------------------: | :-------------------------------------------: | -| add_face.pth | 对图片或视频中的脸部打码 | -| clean_face_HD.pth | 对图片或视频中的脸部去码
(要求内存 > 8GB). | -| add_youknow.pth | 对图片或视频中的十八禁内容打码 | -| clean_youknow_resnet_9blocks.pth | 对图片或视频中的十八禁内容去码 | -| clean_youknow_video.pth | 对视频中的十八禁内容去码 | -| clean_youknow_video_HD.pth | 对视频中的十八禁内容去码
(要求内存 > 8GB) | - -### 风格转换 - -| 文件名 | 描述 | -| :---------------------: | :-------------------------------------------------------: | -| style_apple2orange.pth | 苹果变橙子 | -| style_orange2apple.pth | 橙子变苹果 | -| style_summer2winter.pth | 夏天变冬天 | -| style_winter2summer.pth | 冬天变夏天 | -| style_cezanne.pth | 转化为Paul Cézanne 的绘画风格 | -| style_monet.pth | 转化为Claude Monet的绘画风格 | -| style_ukiyoe.pth | 转化为Ukiyoe的绘画风格 | -| style_vangogh.pth | 转化为Van Gogh的绘画风格 | - +## 预训练模型说明 +当前的预训练模型分为两类——添加/移除马赛克以及风格转换. +可以通过以下方式下载预训练模型 [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ)
+ +### 马赛克 + +| 文件名 | 描述 | +| :------------------------------: | :-------------------------------------------: | +| add_face.pth | 对图片或视频中的脸部打码 | +| clean_face_HD.pth | 对图片或视频中的脸部去码
(要求内存 > 8GB). | +| add_youknow.pth | 对图片或视频中的...内容打码 | +| clean_youknow_resnet_9blocks.pth | 对图片或视频中的...内容去码 | +| clean_youknow_video.pth | 对视频中的...内容去码 | +| clean_youknow_video_HD.pth | 对视频中的...内容去码
(要求内存 > 8GB) | + +### 风格转换 + +| 文件名 | 描述 | +| :---------------------: | :-------------------------------------------------------: | +| style_apple2orange.pth | 苹果变橙子 | +| style_orange2apple.pth | 橙子变苹果 | +| style_summer2winter.pth | 夏天变冬天 | +| style_winter2summer.pth | 冬天变夏天 | +| style_cezanne.pth | 转化为Paul Cézanne 的绘画风格 | +| style_monet.pth | 转化为Claude Monet的绘画风格 | +| style_ukiyoe.pth | 转化为Ukiyoe的绘画风格 | +| style_vangogh.pth | 转化为Van Gogh的绘画风格 | + diff --git a/docs/training_with_your_own_dataset.md b/docs/training_with_your_own_dataset.md new file mode 100644 index 0000000..cc05289 --- /dev/null +++ b/docs/training_with_your_own_dataset.md @@ -0,0 +1,73 @@ +# Training with your own dataset +Training with your own dataset requires a GPU with 6G memory (above GTX1060).
+We will make "face" as an example. If you don't have any picture, you can download [CelebA](http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html) or [WIDER](http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/WiderFace_Results.html). + +## Getting Started +#### Prerequisites + - Linux, Mac OS, Windows + - Python 3.6+ + - [ffmpeg 3.4.6](http://ffmpeg.org/) + - [Pytorch 1.0+](https://pytorch.org/) + - NVIDIA GPU(with more than 6G memory) + CUDA CuDNN
+#### Dependencies +This code depends on opencv-python, torchvision, matplotlib available via pip install. +#### Clone this repo +```bash +git clone https://github.com/HypoX64/DeepMosaics +cd DeepMosaics +``` +## Make training datasets +```bash +cd make_datasets +``` +### Add mosaic dataset +Please generate mask from images which you want to add mosaic(number of images should be above 1000). And then put the images in ```face/origin_image```, and masks in ```face/mask```.
+* You can use ```draw_mask.py```to generate them. +```bash +python draw_mask.py --datadir 'dir for your pictures' --savedir ../datasets/draw/face +#Press the left mouse button to draw the mask . Press 'S' to save mask, 'A' to reduce brush size, 'D' to increase brush size, 'W' to cancel drawing. +``` +* If you want to get images from videos, you can use ```get_image_from_video.py``` +```bash +python get_image_from_video.py --datadir 'dir for your videos' --savedir ../datasets/video2image --fps 1 +``` +### Clean mosaic dataset +We provide several methods for generating clean mosaic datasets. However, for better effect, we recommend train a addmosaic model in a small data first and use it to automatically generate datasets in a big data.(recommend: Method 2(for image) & Method 4(for video)) +* Method 1: Use drawn mask to make pix2pix(HD) datasets(Require``` origin_image``` and ```mask```) +```bash +python make_pix2pix_dataset.py --datadir ../datasets/draw/face --hd --outsize 512 --fold 1 --name face --savedir ../datasets/pix2pix/face --mod drawn --minsize 128 --square +``` +* Method 2: Use addmosaic model to make pix2pix(HD) datasets(Require addmosaic pre-trained model) +```bash +python make_pix2pix_dataset.py --datadir 'dir for your pictures' --hd --outsize 512 --fold 1 --name face --savedir ../datasets/pix2pix/face --mod network --model_path ../pretrained_models/mosaic/add_face.pth --minsize 128 --square --mask_threshold 128 +``` +* Method 3: Use Irregular Masks to make pix2pix(HD) datasets(Require [Irregular Masks](https://nv-adlr.github.io/publication/partialconv-inpainting)) +```bash +python make_pix2pix_dataset.py --datadir 'dir for your pictures' --hd --outsize 512 --fold 1 --name face --savedir ../datasets/pix2pix/face --mod irregular --irrholedir ../datasets/Irregular_Holes_mask --square +``` +* Method 4: Use addmosaic model to make video datasets(Require addmosaic pre-trained model. This is better for processing video mosaics) +```bash +python make_video_dataset.py --datadir 'dir for your videos' --model_path ../pretrained_models/mosaic/add_face.pth --mask_threshold 96 --savedir ../datasets/video/face +``` +## Training +### Add +```bash +cd train/add +python train.py --gpu_id 0 --dataset ../../datasets/draw/face --savename face --loadsize 512 --finesize 360 --batchsize 16 +``` +### Clean +* For image datasets(generated by ```make_pix2pix_dataset.py```) +We use [pix2pix](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix) or [pix2pixHD](https://github.com/NVIDIA/pix2pixHD) to train model. We just take pix2pixHD as an example. +```bash +git clone https://github.com/NVIDIA/pix2pixHD +cd pix2pixHD +pip install dominate +python train.py --name face --resize_or_crop resize_and_crop --loadSize 563 --fineSize 512 --label_nc 0 --no_instance --dataroot ../datasets/pix2pix/face +``` +* For video datasets(generated by ```make_video_dataset.py```) +```bash +cd train/clean +python train.py --dataset ../../datasets/video/face --savename face --savefreq 100000 --gan --hd --lr 0.0002 --lambda_gan 1 --gpu_id 0 --perload_num 8 +``` +## Testing +Put saved network to ```./pretrained_models/mosaic/``` and rename it as ```add_face.pth``` or ```clean_face_HD.pth``` or ```clean_face_video_HD.pth``` diff --git a/make_datasets/csv/video_used_time.csv b/make_datasets/csv/video_used_time.csv deleted file mode 100644 index 3250579..0000000 --- a/make_datasets/csv/video_used_time.csv +++ /dev/null @@ -1,40 +0,0 @@ -010412_249-1pon-whole1_hd.avi,00:12:00,00:13:33,00:14:26,00:15:06,00:19:35,00:24:30,00:25:53,00:29:29,00:29:55,00:30:30,00:31:43,00:32:54,00:33:39,00:35:55,00:38:30,00:38:49,00:39:47,00:41:15,00:42:35,00:43:15,00:43:50,00:45:30,00:46:33,00:47:35,00:49:10,00:49:20,00:51:04,00:51:20,00:53:10,00:55:05 -011013_511-1pon-whole1_hd.avi,00:16:09,00:16:43,00:19:12,00:19:54,00:24:52,00:26:23,00:29:20,00:31:40,00:32:16,00:36:45,00:37:15,00:37:35,00:38:00,00:38:40,00:41:40,00:46:09,00:57:50,00:58:10 -012514_744-1pon-whole1_hd.mp4,00:08:12,00:12:00,00:12:30,00:17:40,00:19:35,00:20:50,00:21:50,00:24:35,00:29:10,00:30:25,00:33:10,00:39:35,00:40:35,00:42:25,00:42:35,00:57:05,00:58:25,00:59:15 -020916_242-1pon-1080p.mp4,00:13:35,00:15:10,00:18:20,00:26:50,00:31:25,00:33:15,00:34:55,00:37:15,00:38:25,00:39:35,00:41:05,00:41:55,00:42:10,00:43:10,00:43:20,00:45:15,00:45:20,00:46:10,00:47:50,00:49:10,00:50:00,00:50:20,00:52:10,00:56:55,00:57:05,00:57:35,00:59:15,00:59:30 -031516_262-1pon-1080p.mp4,00:09:30,00:13:00,00:13:50,00:14:50,00:16:00,00:20:55,00:25:50,00:26:35,00:30:30,00:32:40,00:38:20,00:38:30,00:39:55,00:42:10,00:43:45,00:45:40,00:46:20,00:47:50,00:48:05,00:49:50,00:51:45,00:51:50,00:57:00 -031716_001-1pon-1080p.mp4,00:02:30,00:02:40,00:02:55,00:04:00,00:04:20,00:05:40,00:06:05,00:06:50,00:08:10,00:08:20,00:08:30,00:08:47,00:10:00,00:10:05,00:10:20,00:10:30,00:11:50,00:12:00,00:12:35,00:13:20,00:14:20,00:15:35 -032113_554-1pon-whole1_hd.avi,00:13:20,00:21:20,00:23:15,00:23:35,00:24:00,00:25:10,0:25:30,00:25:50,00:26:35,00:26:50,00:31:40,00:35:15,00:35:25,00:37:10,00:45:35,00:46:05,00:48:00,00:49:50,00:50:30,00:51:50,00:52:30,00:52:40,00:52:50,00:58:20,00:58:30,00:59:30,00:59:45,01:01:45,01:02:00,01:03:50,01:04:05,01:04:20,01:04:30,01:05:35,01:07:40 -032313_556-1pon-whole1_hd.avi,00:04:05,00:05:00,00:06:40,00:06:50,00:07:50,00:09:10,00:10:30,00:13:15,00:16:05,00:17:35,00:18:20,00:20:25,00:20:30,00:22:30,00:26:50,00:27:30,00:35:30,00:42:40,00:44:09,00:50:00,00:52:50,00:53:40,00:54:15,00:58:00,00:58:25,01:04:05,01:05:05,01:06:15,01:06:50,01:07:51,01:08:10 -032715_001-1pon-1080p.mp4,00:09:20,00:10:35,00:10:45,00:13:25,00:21:20,00:24:50,00:28:10,00:29:26,00:29:52,00:30:55,00:31:10,00:31:55,00:32:20,00:32:40,00:33:10,00:34:30,00:35:40,00:35:50,00:48:30,00:48:50,00:49:45,00:50:15,00:53:55,00:57:13,00:57:20,00:59:00,00:59:55 -032715_004-1pon-1080p.mp4,00:22:30,00:22:55,00:24:44,00:26:15,00:28:00,00:28:40,00:30:40,00:35:40,00:38:20,00:38:50,00:39:50,00:41:30,00:42:10,00:42:30,00:43:40,00:44:05,00:44:35,00:45:17,00:45:36,00:46:23,00:46:55,00:47:20,00:47:40,00:48:05,00:48:30,00:50:50,00:52:00,00:53:30,00:53:45,00:54:25,00:54:45,00:57:40,00:58:00,00:58:40,00:58:50 -040111_063-1pon-whole1_hd.avi,00:08:25,00:08:45,00:09:00,00:10:55,00:16:40,00:17:05,00:17:35,00:19:10,00:27:00,00:28:05,00:29:05,00:31:40,00:36:00,00:37:50,00:45:30,00:46:15,00:47:45,00:50:15,00:52:50,00:53:47,00:53:58,00:55:05,00:56:15,00:58:40,00:59:00,00:59:20,00:59:45 -040814_786-1pon-whole1_hd.avi,00:04:40,00:05:00,00:06:50,00:10:20,00:21:00,00:23:35,00:24:10,00:26:40,00:28:35,00:29:15,00:29:20,00:31:15,00:32:50,00:36:10,00:39:40,00:42:00,00:42:50,00:44:00,00:44:15,00:44:36,00:45:00,00:45:20,00:47:20,00:48:10,00:48:30,00:53:50,00:54:43,00:55:20,00:59:15,00:59:30 -050915_077-1pon-1080p,00:11:00,00:12:30,00:19:20,00:19:50,00:21:00,00:22:00,00:23:40,00:24:30,00:28:20,00:33:50,00:36:00,00:37:30,00:38:50,00:39:30,00:41:50,00:44:20,00:48:45,00:49:25,00:50:45,00:51:00,00:53:05,00:54:00,00:54:27,00:57:30,00:59:10,01:00:30,01:04:10,01:04:20,01:04:30,01:04:50,01:05:20 -052215_084-1pon-1080p.mp4,00:26:50,00:27:15,00:30:20,00:33:20,00:34:00,00:37:00,00:41:00,00:43:00,00:44:30,00:47:40,00:50:35,00:50:40,00:51:40,00:55:20,00:55:50,00:55:55,00:56:20,00:57:30,00:57:40,00:59:10,00:59:15,01:00:05 -062015_101-1pon-1080p.mp4,00:11:00,00:12:47,00:13:10,00:14:20,00:15:20,00:16:20,00:17:10,00:17:25,00:19:45,00:21:05,00:23:40,00:27:40,00:28:10,00:37:15,00:41:30,00:43:20,00:44:25,00:46:51,00:47:20,00:49:00,00:50:40,00:51:50,00:52:50,00:55:00,00:56:20,00:58:10,00:59:00,10:00:00 -062715_105-1pon-1080p.mp4,00:11:30,00:11:55,00:12:00,00:12:30,00:13:45,00:16:50,00:18:25,00:19:20,00:20:40,00:25:15,00:36:20,00:36:40,00:37:25,00:39:05,00:39:50,00:40:55,00:41:55,00:45:40,00:43:30,00:44:15,00:45:30,00:47:40,00:50:05,00:50:10,00:50:20,00:50:30,00:55:10,00:56:35,00:58:40,01:00:15,01:05:05,01:05:15,01:05:30,01:05:50 -1pondo_070315_108_1080p.mp4,00:11:10,00:11:50,00:13:50,00:14:20,00:14:35,00:15:50,00:17:20,00:18:35,00:20:45,00:24:35,00:25:05,00:29:15,00:30:40,00:31:55,00:35:20,00:42:55,00:43:05,00:46:15,00:48:00,00:51:45,00:52:33,00:54:20,00:59:25,00:59:40,01:00:05 -071114_842-1pon-whole1_hd.mp4,00:09:50,00:11:25,00:16:35,00:18:20,00:22:10,00:25:25,00:26:35,00:33:50,00:35:40,00:43:10 -071715_116-1pon-1080p.mp4,00:10:50,00:11:30,00:12:50,00:15:10,00:16:45,00:17:05,00:25:20,00:26:45,00:28:30,00:30:20,00:32:55,00:34:30,00:37:40,00:38:40,00:40:20,00:41:20,00:44:10,00:47:15,00:55:00,00:59:40,00:59:50 -071815_117-1pon-1080p.mp4,00:14:50,00:15:10,00:18:05,00:14:50,00:25:55,00:26:25,00:32:45,00:33:40,00:43:15,00:45:05,00:45:45,00:48:40,00:48:50,00:55:45,10:00:20,01:00:35,01:01:00,01:01:10 -080815_130-1pon-1080p,00:14:50,00:17:15,00:17:20,00:23:55,00:25:30,00:25:55,00:28:20,00:28:30,00:30:10,00:31:00,00:33:25,00:33:35,00:33:45,00:33:50,00:39:25,00:39:50,00:40:25,00:44:05,00:45:00,00:45:40,00:45:50,00:46:55,00:49:15,00:49:25,00:46:40,00:50:10,00:50:15,00:51:25,00:51:50,00:53:14,00:53:20,00:54:15,00:56:15,00:56:25,00:56:45,00:57:45,00:57:30,00:58:00,00:56:45,00:56:55,01:00:00,01:00:05,01:00:25,01:00:30 -081514_863-1pon-whole1_hd.avi,00:10:30,00:26:00,00:30:00,00:38:21,00:40:15,00:40:30,00:49:10,00:50:05,00:57:10,00:59:00 -090614_877-1pon-whole1_hd.mp4,00:04:45,00:05:15,00:12:25,00:12:40,00:15:00,00:15:15,00:16:25,00:20:50,00:21:45,00:26:10,00:33:35,00:35:55,00:37:50,00:37:55,00:38:12,00:39:55,00:41:50,00:44:27,00:44:37,00:46:30,00:47:35,00:47:40,00:48:20,00:59:50 -091215_152-1pon-1080p.mp4,00:05:30,00:06:10,00:06:20,00:08:15,00:10:10,00:11:15,00:12:15,00:12:55,0:15:15,00:15:35,00:18:00,00:24:45,00:25:45,00:33:45,00:35:32,00:37:35,00:37:55,00:38:50,00:42:15,00:45:00,00:47:55,00:48:20,00:48:35,00:48:42,00:49:43,00:50:15,00:51:10,00:55:35,00:57:00,00:57:55,01:03:30,01:05:00 -092813_670-1pon-whole1_hd.avi,00:16:32,00:19:00,00:22:10,00:23:20,00:23:40,00:30:20,00:32:00,00:35:00,00:36:50,00:41:40,00:44:50,00:52:45,00:54:00 -103015_180-1pon-1080p.mp4,00:24:50,00:31:25,00:41:20,00:48:10,00:48:50,00:49:20,00:50:15,00:52:45,00:53:30,01:02:40,01:03:35,01:09:50,01:15:05,01:16:50 -110615_185-1pon-1080p.mp4,00:15:00,00:15:40,00:34:15,00:34:50,00:35:30,00:37:05,00:39:35,00:40:30,00:41:40,00:47:35,00:50:15,00:51:01,00:51:35,00:54:15,00:55:40,00:55:50,00:57:20,00:59:35,01:00:00,01:00:25 -120310_979-1pon-whole1_hd.avi,00:15:10,00:14:25,00:14:30,00:14:50,00:15:45,00:16:35,00:16:55,00:17:25,00:19:25,00:20:45,00:27:05,00:30:17,00:32:00,00:33:50,00:35:45,00:38:55,00:40:25,00:40:40,00:41:10,00:42:50,00:44:35,00:45:15,00:46:15,00:48:00,00:49:10,00:50:10,00:54:00,00:55:23,00:55:30,00:55:50 -021315-806-carib-1080p.mp4,00:13:30,00:15:20,00:17:40,00:21:50,00:22:25,00:24:35,00:28:50,00:28:52,00:31:00,00:37:25,00:37:35,00:38:20,00:38:45,00:43:30,00:48:35,00:51:30,00:51:50,00:52:19,00:56:20,00:58:35 -021715-809-carib-1080p.mp4,00:17:30,00:20:35,00:21:00,00:22:00,00:23:55,00:24:15,00:28:40,00:37:20,00:39:05,00:40:05,00:40:50,00:42:45,00:45:00,00:46:40,00:48:00,00:48:20,00:51:30,00:52:10,00:53:35,00:54:10,00:54:20,00:56:45,00:56:55,00:59:10,00:59:35,00:59:55 -022715-817-carib-1080p.mp4,00:57:52,00:08:50,00:10:00,00:12:50,00:14:05,00:18:25,00:20:45,00:20:57,00:22:15,00:23:30,00:23:55,00:24:18,00:24:50,00:25:25,00:26:30,00:26:55,00:28:50,00:31:55,00:34:00,00:34:35,00:42:45,00:44:33 -030914-558-carib-high_1.mp4,00:10:45,00:12:45,00:14:40,00:16:33,00:19:40,00:21:35,00:21:55,00:23:05,00:26:15,00:27:30,00:29:55,00:31:10,00:31:40,00:36:40,00:41:40,00:42:40,00:44:50,00:49:50,00:52:25,00:53:50,00:54:30,00:55:20,00:55:10,00:57:05,00:57:25,00:59:05,01:00:15,01:02:11,01:03:55,01:05:10 -031815-830-carib-1080p.mp4,00:13:15,00:13:25,00:13:55,00:14:40,00:15:40,00:17:30,00:18:20,00:19:10,00:21:00,00:22:10,00:22:25,00:23:25,00:27:10,00:28:33,00:35:05,00:35:40,00:37:50,00:38:00,00:39:35,00:41:35,00:42:40,00:47:40,00:50:33,00:55:50,01:02:10,01:05:20,01:05:30 -032016-121-carib-1080p.mp4,00:27:20,00:28:40,00:28:55,00:30:35,00:36:10,00:39:10,00:40:30,00:43:00,00:46:05,00:50:00,00:56:05,00:56:20,00:59:20 -032913-301-carib-whole_hd1.wmv,00:06:00,00:09:40,00:11:00,00:13:00,00:15:05,00:16:40,00:18:05,00:20:00,00:39:31,00:34:35,00:44:50,00:47:25,00:49:50,00:51:20,00:54:58,00:56:55,00:59:50,01:00:50 -032914-571-carib-high_1.mp4,00:13:30,00:13:55,00:16:40,00:15:25,00:20:40,00:26:45,00:32:05,00:33:15,00:36:40,00:38:55,00:39:00,00:39:25,00:47:30,00:49:20 -042514-588-carib-high_1.mp4,00:10:30,00:11:15,00:19:15,00:20:00,00:20:30,00:22:05,00:22:45,00:22:53,00:24:15,00:30:50,00:32:25,00:34:15,00:34:45,00:34:55,0:36:05,00:37:20,00:37:40,00:38:30,00:39:35,00:41:00,00:43:30,00:43:40 -052315-884-carib-1080p.mp4,00:09:35,00:14:10,00:14:30,00:14:40,00:17:10,00:17:50,00:19:00,00:20:20,01:21:55,00:22:40,00:23:05,00:24:00,00:26:00,00:27:15,00:30:25,00:32:50,00:37:55,0:39:35,00:40:10,00:41:40,00:43:15,00:43:40,00:47:55,00:49:30,00:49:55,00:58:55,01:00:40 -053114-612-carib-high_1.mp4,00:08:35,00:13:35,00:15:25,00:16:40,00:20:35,00:22:25,00:26:10,00:29:10,00:32:55,00:34:10,00:37:05,00:37:40,00:39:40,00:40:52,00:42:08,00:42:15 -062615-908-carib-1080p.mp4,00:13:45,00:14:40,00:15:45,00:16:11,00:17:00,00:22:10,00:23:40,00:26:10,00:27:15,00:27:50,00:31:30,00:35:00,00:40:20,00:43:10,00:44:35,00:47:17,00:50:25,00:51:15,00:52:20,00:54:10,00:55:30,01:00:20 \ No newline at end of file diff --git a/make_datasets/draw_mask.py b/make_datasets/draw_mask.py index 75a6950..cb66ed9 100644 --- a/make_datasets/draw_mask.py +++ b/make_datasets/draw_mask.py @@ -6,18 +6,25 @@ import random import sys sys.path.append("..") +from cores import Options from util import util from util import image_processing as impro -image_dir = './datasets_img/v2im' -mask_dir = './datasets_img/v2im_mask' -util.makedirs(mask_dir) -files = os.listdir(image_dir) -files_new =files.copy() -print('find image:',len(files)) -masks = os.listdir(mask_dir) -print('mask:',len(masks)) +opt = Options() +opt.parser.add_argument('--datadir',type=str,default=' ', help='your images dir') +opt.parser.add_argument('--savedir',type=str,default='../datasets/draw/face', help='') +opt = opt.getparse() + +mask_savedir = os.path.join(opt.savedir,'mask') +img_savedir = os.path.join(opt.savedir,'origin_image') +util.makedirs(mask_savedir) +util.makedirs(img_savedir) + +filepaths = util.Traversal(opt.datadir) +filepaths = util.is_imgs(filepaths) +random.shuffle(filepaths) +print('find image:',len(filepaths)) # mouse callback function drawing = False # true if mouse is pressed @@ -32,68 +39,58 @@ def draw_circle(event,x,y,flags,param): elif event == cv2.EVENT_MOUSEMOVE: if drawing == True: - cv2.circle(img,(x,y),brushsize,(0,255,0),-1) + cv2.circle(img_drawn,(x,y),brushsize,(0,255,0),-1) elif event == cv2.EVENT_LBUTTONUP: drawing = False - cv2.circle(img,(x,y),brushsize,(0,255,0),-1) + cv2.circle(img_drawn,(x,y),brushsize,(0,255,0),-1) -def makemask(img): +def makemask(img_drawn): # starttime = datetime.datetime.now() - mask = np.zeros(img.shape, np.uint8) - for row in range(img.shape[0]): - for col in range(img.shape[1]): - # if (img[row,col,:] == [0,255,0]).all(): #too slow - if img[row,col,0] == 0: - if img[row,col,1] == 255: - if img[row,col,2] == 0: + mask = np.zeros(img_drawn.shape, np.uint8) + for row in range(img_drawn.shape[0]): + for col in range(img_drawn.shape[1]): + # if (img_drawn[row,col,:] == [0,255,0]).all(): #too slow + if img_drawn[row,col,0] == 0: + if img_drawn[row,col,1] == 255: + if img_drawn[row,col,2] == 0: mask[row,col,:] = [255,255,255] - # endtime = datetime.datetime.now() - # print('Cost time:',(endtime-starttime)) return mask - -for i in range(len(masks)): - masks[i]=masks[i].replace('.png','.jpg') -for file in files: - if file in masks: - files_new.remove(file) -files = files_new -# files = list(set(files)) #Distinct -print('remain:',len(files)) -random.shuffle(files) -# files.sort() cnt = 0 +for file in filepaths: + try: + cnt += 1 + img = impro.imread(file,loadsize=512) + img_drawn = img.copy() + cv2.namedWindow('image') + cv2.setMouseCallback('image',draw_circle) #MouseCallback + while(1): -for file in files: - cnt += 1 - img = cv2.imread(os.path.join(image_dir,file)) - img = impro.resize(img,512) - cv2.namedWindow('image') - cv2.setMouseCallback('image',draw_circle) #MouseCallback - while(1): - - cv2.imshow('image',img) - k = cv2.waitKey(1) & 0xFF - if k == ord(' '): - img = impro.resize(img,256) - mask = makemask(img) - cv2.imwrite(os.path.join(mask_dir,os.path.splitext(file)[0]+'.png'),mask) - print(os.path.join(mask_dir,os.path.splitext(file)[0]+'.png')) - # cv2.destroyAllWindows() - print('remain:',len(files)-cnt) - brushsize = 20 - break - elif k == ord('a'): - brushsize -= 5 - if brushsize<5: - brushsize = 5 - print('brushsize:',brushsize) - elif k == ord('d'): - brushsize += 5 - print('brushsize:',brushsize) - elif k == ord('w'): - print('remain:',len(files)-cnt) - break + cv2.imshow('image',img_drawn) + k = cv2.waitKey(1) & 0xFF + if k == ord('s'): + + img_drawn = impro.resize(img_drawn,256) + mask = makemask(img_drawn) + cv2.imwrite(os.path.join(mask_savedir,os.path.splitext(os.path.basename(file))[0]+'.png'),mask) + cv2.imwrite(os.path.join(img_savedir,os.path.basename(file)),img) + print('Saved:',os.path.join(mask_savedir,os.path.splitext(os.path.basename(file))[0]+'.png'),mask) + # cv2.destroyAllWindows() + print('remain:',len(filepaths)-cnt) + brushsize = 20 + break + elif k == ord('a'): + brushsize -= 5 + if brushsize<5: + brushsize = 5 + print('brushsize:',brushsize) + elif k == ord('d'): + brushsize += 5 + print('brushsize:',brushsize) + elif k == ord('w'): + print('remain:',len(filepaths)-cnt) + break + except Exception as e: + print(file,e) -# cv2.destroyAllWindows() \ No newline at end of file diff --git a/make_datasets/get_image_from_video.py b/make_datasets/get_image_from_video.py index fffcde5..b8dedbc 100644 --- a/make_datasets/get_image_from_video.py +++ b/make_datasets/get_image_from_video.py @@ -1,19 +1,17 @@ import os -import numpy as np -import cv2 -import random -import csv - import sys sys.path.append("..") +from cores import Options from util import util,ffmpeg -from util import image_processing as impro -files = util.Traversal('./videos') +opt = Options() +opt.parser.add_argument('--datadir',type=str,default='', help='your video dir') +opt.parser.add_argument('--savedir',type=str,default='../datasets/video2image', help='') +opt = opt.getparse() + +files = util.Traversal(opt.datadir) videos = util.is_videos(files) -output_dir = './datasets_img/v2im' -util.makedirs(output_dir) -FPS = 1 -util.makedirs(output_dir) + +util.makedirs(opt.savedir) for video in videos: - ffmpeg.continuous_screenshot(video, output_dir, FPS) \ No newline at end of file + ffmpeg.continuous_screenshot(video, opt.savedir, opt.fps) \ No newline at end of file diff --git a/make_datasets/make_pix2pix_dataset.py b/make_datasets/make_pix2pix_dataset.py new file mode 100644 index 0000000..68c39c8 --- /dev/null +++ b/make_datasets/make_pix2pix_dataset.py @@ -0,0 +1,190 @@ +import os +import random +import sys +import datetime +import time +import shutil +import threading +import warnings +warnings.filterwarnings(action='ignore') + +import numpy as np +import cv2 + +sys.path.append("..") +from models import runmodel,loadmodel +import util.image_processing as impro +from util import util,mosaic,data +from cores import Options + + +opt = Options() +opt.parser.add_argument('--datadir',type=str,default='../datasets/draw/face', help='') +opt.parser.add_argument('--savedir',type=str,default='../datasets/pix2pix/face', help='') +opt.parser.add_argument('--name',type=str,default='', help='save name') +opt.parser.add_argument('--mod',type=str,default='drawn', help='drawn | network | irregular | drawn,irregular | network,irregular') +opt.parser.add_argument('--square', action='store_true', help='if specified, crop to square') +opt.parser.add_argument('--irrholedir',type=str,default='../datasets/Irregular_Holes_mask', help='') +opt.parser.add_argument('--hd', action='store_true', help='if false make dataset for pix2pix, if Ture for pix2pix_HD') +opt.parser.add_argument('--savemask', action='store_true', help='if specified,save mask') +opt.parser.add_argument('--outsize', type=int ,default= 512,help='') +opt.parser.add_argument('--fold', type=int ,default= 1,help='') +opt.parser.add_argument('--start', type=int ,default= 0,help='') +opt.parser.add_argument('--minsize', type=int ,default= 128,help='when [square], minimal roi size') +opt.parser.add_argument('--quality', type=int ,default= 40,help='when [square], minimal quality') + +opt = opt.getparse() + +util.makedirs(opt.savedir) +util.writelog(os.path.join(opt.savedir,'opt.txt'), + str(time.asctime(time.localtime(time.time())))+'\n'+util.opt2str(opt)) +opt.mod = (opt.mod).split(',') + +#save dir +if opt.hd: + train_A_path = os.path.join(opt.savedir,'train_A') + train_B_path = os.path.join(opt.savedir,'train_B') + util.makedirs(train_A_path) + util.makedirs(train_B_path) +else: + train_path = os.path.join(opt.savedir,'train') + util.makedirs(train_path) +if opt.savemask: + mask_save_path = os.path.join(opt.savedir,'mask') + util.makedirs(mask_save_path) + +#read dir +if 'drawn' in opt.mod: + imgpaths = util.Traversal(os.path.join(opt.datadir,'origin_image')) + imgpaths.sort() + maskpaths = util.Traversal(os.path.join(opt.datadir,'mask')) + maskpaths.sort() +if 'network' in opt.mod or 'irregular' in opt.mod: + imgpaths = util.Traversal(opt.datadir) + random.shuffle (imgpaths) +if 'irregular' in opt.mod: + irrpaths = util.Traversal(opt.irrholedir) + + +#def network +if 'network' in opt.mod: + net = loadmodel.bisenet(opt,'roi') + + +# def checksaveimage(opt,img,mask): + +# #check +# saveflag = True +# x,y,size,area = impro.boundingSquare(mask, random.uniform(1.4,1.6)) +# if area < 1000: +# saveflag = False +# else: +# if opt.square: +# if size < opt.minsize: +# saveflag = False +# else: +# img = impro.resize(img[y-size:y+size,x-size:x+size],opt.outsize,interpolation=cv2.INTER_CUBIC) +# mask = impro.resize(mask[y-size:y+size,x-size:x+size],opt.outsize,interpolation=cv2.INTER_CUBIC) +# if impro.Q_lapulase(img) opt.minmaskarea and size>opt.minsize and impro.Q_lapulase(img)>opt.quality: + cnt +=1 + if cnt == opt.time: + # print(second) + timestamps.append(util.second2stamp(cut_point*opt.interval)) + util.writelog(os.path.join(opt.savedir,'opt.txt'),videopath+'\n'+str(timestamps)) + #print(timestamps) + + # util.clean_tempfiles() + # fps,endtime,height,width = ffmpeg.get_video_infos(videopath) + # # print(fps,endtime,height,width) + # ffmpeg.continuous_screenshot(videopath, './tmp/video2image', 1) + + # # find where to cut + # print('Find where to cut...') + # timestamps=[] + # imagepaths = util.Traversal('./tmp/video2image') + # for second in range(int(endtime)): + # if second%opt.interval==0: + # cnt = 0 + # for i in range(opt.time): + # img = impro.imread(imagepaths[second+i]) + # mask = runmodel.get_ROI_position(img,net,opt)[0] + # if not opt.all_mosaic_area: + # mask = impro.find_mostlikely_ROI(mask) + # if impro.mask_area(mask) > opt.minmaskarea and impro.Q_lapulase(img)>opt.quality: + # # print(impro.mask_area(mask)) + # cnt +=1 + # if cnt == opt.time: + # # print(second) + # timestamps.append(util.second2stamp(second)) + + #generate datasets + print('Generate datasets...') + for timestamp in timestamps: + savecnt = '%05d' % result_cnt + origindir = os.path.join(opt.savedir,savecnt,'origin_image') + maskdir = os.path.join(opt.savedir,savecnt,'mask') + util.makedirs(origindir) + util.makedirs(maskdir) + + util.clean_tempfiles() + ffmpeg.video2image(videopath, './tmp/video2image/%05d.'+opt.tempimage_type, + start_time = timestamp,last_time = util.second2stamp(opt.time)) + + endtime = datetime.datetime.now() + print(str(video_cnt)+'/'+str(len(videopaths))+' ', + util.get_bar(100*video_cnt/len(videopaths),35),'', + util.second2stamp((endtime-starttime).seconds)+'/'+util.second2stamp((endtime-starttime).seconds/video_cnt*len(videopaths))) + + imagepaths = util.Traversal('./tmp/video2image') + imagepaths = sorted(imagepaths) + imgs=[];masks=[] + mask_flag = False + + for imagepath in imagepaths: + img = impro.imread(imagepath) + mask = runmodel.get_ROI_position(img,net,opt,keepsize=True)[0] + imgs.append(img) + masks.append(mask) + if not mask_flag: + mask_avg = mask.astype(np.float64) + mask_flag = True + else: + mask_avg += mask.astype(np.float64) + + mask_avg = np.clip(mask_avg/len(imagepaths),0,255).astype('uint8') + mask_avg = impro.mask_threshold(mask_avg,20,64) + if not opt.all_mosaic_area: + mask_avg = impro.find_mostlikely_ROI(mask_avg) + x,y,size,area = impro.boundingSquare(mask_avg,Ex_mul=random.uniform(1.1,1.5)) + + for i in range(len(imagepaths)): + img = impro.resize(imgs[i][y-size:y+size,x-size:x+size],opt.outsize,interpolation=cv2.INTER_CUBIC) + mask = impro.resize(masks[i][y-size:y+size,x-size:x+size],opt.outsize,interpolation=cv2.INTER_CUBIC) + impro.imwrite(os.path.join(origindir,'%05d'%(i+1)+'.jpg'), img) + impro.imwrite(os.path.join(maskdir,'%05d'%(i+1)+'.png'), mask) + + result_cnt+=1 + + except Exception as e: + video_cnt +=1 + util.writelog(os.path.join(opt.savedir,'opt.txt'), + videopath+'\n'+str(result_cnt)+'\n'+str(e)) + video_cnt +=1 diff --git a/make_datasets/use_addmosaic_model_make_dataset.py b/make_datasets/use_addmosaic_model_make_dataset.py deleted file mode 100644 index 7c04dc0..0000000 --- a/make_datasets/use_addmosaic_model_make_dataset.py +++ /dev/null @@ -1,73 +0,0 @@ -import sys -import os -import random -import datetime - -import numpy as np -import cv2 - -import torch -import torch.backends.cudnn as cudnn -import torch.nn as nn -from torch import optim - -from unet import UNet -from mosaic import random_mosaic -import image_processing as impro - - - -def runmodel(img,net): - img=impro.image2folat(img,3) - img=img.reshape(1,3,128,128) - img = torch.from_numpy(img) - img=img.cuda() - pred = net(img) - pred = (pred.cpu().detach().numpy()*255) - pred = pred.reshape(128,128).astype('uint8') - return pred - - - -dir_img = './origin_image/' -dir_mosaic = './mosaic/' -dir_mask = './mask/' -dir_dataset = './dataset/' -dir_checkpoint = 'checkpoints/' - -net = UNet(n_channels = 3, n_classes = 1) -net.load_state_dict(torch.load(dir_checkpoint+'mosaic_position.pth')) -net.cuda() -net.eval() -# cudnn.benchmark = True -files = os.listdir(dir_mosaic) - -for i,file in enumerate(files,1): - orgin_image = cv2.imread(dir_img+file) - mosaic_image = cv2.imread(dir_mosaic+file) - img = impro.resize(mosaic_image,128) - img1,img2 = impro.spiltimage(img) - mask1 =runmodel(img1,net) - mask2 =runmodel(img2,net) - mask = impro.mergeimage(mask1,mask2,img) - - # test_mask = mask.copy() - - mask = impro.mask_threshold(mask,blur=5,threshold=128) - if impro.mask_area(mask) > 1: - h,w = orgin_image.shape[:2] - mosaic_image = cv2.resize(mosaic_image,(w,h)) - # test_mask = cv2.resize(test_mask,(w,h)) - # test_mask = impro.ch_one2three(test_mask) - - x,y,size,area = impro.boundingSquare(mask,Ex_mul=1.5) - rat = min(orgin_image.shape[:2])/128.0 - x,y,size = int(rat*x),int(rat*y),int(rat*size) - orgin_crop = orgin_image[y-size:y+size,x-size:x+size] - mosaic_crop = mosaic_image[y-size:y+size,x-size:x+size] - # mosaic_crop = test_mask[y-size:y+size,x-size:x+size] - - result = impro.makedataset(mosaic_crop,orgin_crop) - cv2.imwrite(dir_dataset+file,result) - if i%1000==0: - print(i,'image finished.') diff --git a/make_datasets/use_addmosaic_model_make_video_dataset.py b/make_datasets/use_addmosaic_model_make_video_dataset.py deleted file mode 100644 index c972fed..0000000 --- a/make_datasets/use_addmosaic_model_make_video_dataset.py +++ /dev/null @@ -1,86 +0,0 @@ -import os -import numpy as np -import cv2 -import random - -import sys -sys.path.append("..") -from models import runmodel,loadmodel -from util import mosaic,util,ffmpeg,filt -from util import image_processing as impro -from cores import options - -opt = options.Options().getparse() -util.file_init(opt) - -videos = os.listdir('./video') -videos.sort() -opt.model_path = '../pretrained_models/add_youknow_128.pth' -opt.use_gpu = True -Ex = 1.4 -Area_Type = 'normal' -suffix = '' - -net = loadmodel.unet(opt) -for i,path in enumerate(videos,0): - try: - path = os.path.join('./video',path) - util.clean_tempfiles() - ffmpeg.video2voice(path,'./tmp/voice_tmp.mp3') - ffmpeg.video2image(path,'./tmp/video2image/output_%05d.'+opt.tempimage_type) - imagepaths=os.listdir('./tmp/video2image') - imagepaths.sort() - - # get position - positions = [] - img_ori_example = impro.imread(os.path.join('./tmp/video2image',imagepaths[0])) - mask_avg = np.zeros((impro.resize(img_ori_example, 128)).shape[:2]) - for imagepath in imagepaths: - imagepath = os.path.join('./tmp/video2image',imagepath) - #print('Find ROI location:',imagepath) - img = impro.imread(imagepath) - x,y,size,mask = runmodel.get_mosaic_position(img,net,opt,threshold = 80) - cv2.imwrite(os.path.join('./tmp/ROI_mask', - os.path.basename(imagepath)),mask) - positions.append([x,y,size]) - mask_avg = mask_avg + mask - #print('Optimize ROI locations...') - mask_index = filt.position_medfilt(np.array(positions), 13) - - mask = np.clip(mask_avg/len(imagepaths),0,255).astype('uint8') - mask = impro.mask_threshold(mask,20,32) - x,y,size,area = impro.boundingSquare(mask,Ex_mul=Ex) - rat = min(img_ori_example.shape[:2])/128.0 - x,y,size = int(rat*x),int(rat*y),int(rat*size) - cv2.imwrite(os.path.join('./tmp/ROI_mask_check', - 'test_show.png'),mask) - if size !=0 : - mask_path = './dataset/'+os.path.splitext(os.path.basename(path))[0]+suffix+'/mask' - ori_path = './dataset/'+os.path.splitext(os.path.basename(path))[0]+suffix+'/ori' - mosaic_path = './dataset/'+os.path.splitext(os.path.basename(path))[0]+suffix+'/mosaic' - os.makedirs('./dataset/'+os.path.splitext(os.path.basename(path))[0]+suffix) - os.makedirs(mask_path) - os.makedirs(ori_path) - os.makedirs(mosaic_path) - #print('Add mosaic to images...') - mosaic_size = mosaic.get_autosize(img_ori_example,mask,area_type = Area_Type)*random.uniform(1,2) - models = ['squa_avg','rect_avg','squa_mid'] - mosaic_type = random.randint(0,len(models)-1) - rect_rat = random.uniform(1.2,1.6) - for i in range(len(imagepaths)): - mask = impro.imread(os.path.join('./tmp/ROI_mask',imagepaths[mask_index[i]]),mod = 'gray') - img_ori = impro.imread(os.path.join('./tmp/video2image',imagepaths[i])) - img_mosaic = mosaic.addmosaic_normal(img_ori,mask,mosaic_size,model = models[mosaic_type],rect_rat=rect_rat) - mask = impro.resize(mask, min(img_ori.shape[:2])) - - img_ori_crop = impro.resize(img_ori[y-size:y+size,x-size:x+size],256) - img_mosaic_crop = impro.resize(img_mosaic[y-size:y+size,x-size:x+size],256) - mask_crop = impro.resize(mask[y-size:y+size,x-size:x+size],256) - - cv2.imwrite(os.path.join(ori_path,os.path.basename(imagepaths[i])),img_ori_crop) - cv2.imwrite(os.path.join(mosaic_path,os.path.basename(imagepaths[i])),img_mosaic_crop) - cv2.imwrite(os.path.join(mask_path,os.path.basename(imagepaths[i])),mask_crop) - except Exception as e: - print(e) - - print(util.get_bar(100*i/len(videos),num=50)) \ No newline at end of file diff --git a/make_datasets/use_drawn_mask_make_dataset.py b/make_datasets/use_drawn_mask_make_dataset.py deleted file mode 100644 index 56dfe54..0000000 --- a/make_datasets/use_drawn_mask_make_dataset.py +++ /dev/null @@ -1,68 +0,0 @@ -import numpy as np -import cv2 -import os -from torchvision import transforms -from PIL import Image -import random -import sys -sys.path.append("..") -import util.image_processing as impro -from util import util,mosaic -import datetime -import shutil - -mask_dir = '/media/hypo/Project/MyProject/DeepMosaics/DeepMosaics/train/add/datasets/av/mask' -img_dir ='/media/hypo/Project/MyProject/DeepMosaics/DeepMosaics/train/add/datasets/av/origin_image' -output_dir = './datasets_img' -util.makedirs(output_dir) -HD = True # if false make dataset for pix2pix, if Ture for pix2pix_HD -MASK = True # if True, output mask,too -OUT_SIZE = 256 -FOLD_NUM = 2 -Bounding = False - -if HD: - train_A_path = os.path.join(output_dir,'train_A') - train_B_path = os.path.join(output_dir,'train_B') - util.makedirs(train_A_path) - util.makedirs(train_B_path) -else: - train_path = os.path.join(output_dir,'train') - util.makedirs(train_path) -if MASK: - mask_path = os.path.join(output_dir,'mask') - util.makedirs(mask_path) - -mask_names = os.listdir(mask_dir) -img_names = os.listdir(img_dir) -mask_names.sort() -img_names.sort() -print('Find images:',len(img_names)) - -cnt = 0 -for fold in range(FOLD_NUM): - for img_name,mask_name in zip(img_names,mask_names): - try: - img = impro.imread(os.path.join(img_dir,img_name)) - mask = impro.imread(os.path.join(mask_dir,mask_name),'gray') - mask = impro.resize_like(mask, img) - x,y,size,area = impro.boundingSquare(mask, 1.5) - if area > 100: - if Bounding: - img = impro.resize(img[y-size:y+size,x-size:x+size],OUT_SIZE) - mask = impro.resize(mask[y-size:y+size,x-size:x+size],OUT_SIZE) - img_mosaic = mosaic.addmosaic_random(img, mask) - - if HD: - cv2.imwrite(os.path.join(train_A_path,'%05d' % cnt+'.jpg'), img_mosaic) - cv2.imwrite(os.path.join(train_B_path,'%05d' % cnt+'.jpg'), img) - else: - merge_img = impro.makedataset(img_mosaic, img) - cv2.imwrite(os.path.join(train_path,'%05d' % cnt+'.jpg'), merge_img) - if MASK: - cv2.imwrite(os.path.join(mask_path,'%05d' % cnt+'.png'), mask) - print("Processing:",img_name," ","Remain:",len(img_names)*FOLD_NUM-cnt) - - except Exception as e: - print(img_name,e) - cnt += 1 diff --git a/make_datasets/use_irregular_holes_make_dataset.py b/make_datasets/use_irregular_holes_make_dataset.py deleted file mode 100644 index 4f9557f..0000000 --- a/make_datasets/use_irregular_holes_make_dataset.py +++ /dev/null @@ -1,93 +0,0 @@ -import numpy as np -import cv2 -import os -from torchvision import transforms -from PIL import Image -import random -import sys -sys.path.append("..") -import util.image_processing as impro -from util import util,mosaic -import datetime - -ir_mask_path = './Irregular_Holes_mask' -img_dir ='/media/hypo/Hypoyun/Datasets/other/face512' -MOD = 'mosaic' #HD | pix2pix | mosaic -MASK = False # if True, output mask,too -BOUNDING = True # if true the mosaic size will be more big -suffix = '_1' -output_dir = os.path.join('./datasets_img',MOD) -util.makedirs(output_dir) - -if MOD == 'HD': - train_A_path = os.path.join(output_dir,'train_A') - train_B_path = os.path.join(output_dir,'train_B') - util.makedirs(train_A_path) - util.makedirs(train_B_path) -elif MOD == 'pix2pix': - train_path = os.path.join(output_dir,'train') - util.makedirs(train_path) -elif MOD == 'mosaic': - ori_path = os.path.join(output_dir,'ori') - mosaic_path = os.path.join(output_dir,'mosaic') - mask_path = os.path.join(output_dir,'mask') - util.makedirs(ori_path) - util.makedirs(mosaic_path) - util.makedirs(mask_path) -if MASK: - mask_path = os.path.join(output_dir,'mask') - util.makedirs(mask_path) - -transform_mask = transforms.Compose([ - transforms.RandomResizedCrop(size=512, scale=(0.5,1)), - transforms.RandomHorizontalFlip(), - ]) - -transform_img = transforms.Compose([ - - transforms.Resize(512), - transforms.RandomCrop(512) - ]) - -mask_names = os.listdir(ir_mask_path) -img_paths = util.Traversal(img_dir) -img_paths = util.is_imgs(img_paths) -print('Find images:',len(img_paths)) - -for i,img_path in enumerate(img_paths,1): - try: - img = Image.open(img_path) - img = transform_img(img) - img = np.array(img) - img = img[...,::-1] - - if BOUNDING: - mosaic_area = 0 - while mosaic_area < 16384: - mask = Image.open(os.path.join(ir_mask_path,random.choices(mask_names)[0])) - mask = transform_mask(mask) - mask = np.array(mask) - mosaic_area = impro.mask_area(mask) - mosaic_img = mosaic.addmosaic_random(img, mask,'bounding') - else: - mask = Image.open(os.path.join(ir_mask_path,random.choices(mask_names)[0])) - mask = transform_mask(mask) - mask = np.array(mask) - mosaic_img = mosaic.addmosaic_random(img, mask) - - if MOD == 'HD':#[128:384,128:384,:] --->256 - cv2.imwrite(os.path.join(train_A_path,'%05d' % i+suffix+'.jpg'), mosaic_img) - cv2.imwrite(os.path.join(train_B_path,'%05d' % i+suffix+'.jpg'), img) - if MASK: - cv2.imwrite(os.path.join(mask_path,'%05d' % i+suffix+'.png'), mask) - elif MOD == 'pix2pix': - merge_img = impro.makedataset(mosaic_img, img) - cv2.imwrite(os.path.join(train_path,'%05d' % i+suffix+'.jpg'), merge_img) - elif MOD == 'mosaic': - cv2.imwrite(os.path.join(mosaic_path,'%05d' % i+suffix+'.jpg'), mosaic_img) - cv2.imwrite(os.path.join(ori_path,'%05d' % i+suffix+'.jpg'), img) - cv2.imwrite(os.path.join(mask_path,'%05d' % i+suffix+'.png'), mask) - - print('\r','Proc/all:'+str(i)+'/'+str(len(img_paths)),util.get_bar(100*i/len(img_paths),num=40),end='') - except Exception as e: - print(img_path,e) diff --git a/models/BiSeNet_model.py b/models/BiSeNet_model.py new file mode 100644 index 0000000..b58ea5b --- /dev/null +++ b/models/BiSeNet_model.py @@ -0,0 +1,264 @@ +# This code clone from https://github.com/ooooverflow/BiSeNet +import torch.nn as nn +import torch +import torch.nn.functional as F +from . import components +import warnings +warnings.filterwarnings(action='ignore') + +def flatten(tensor): + """Flattens a given tensor such that the channel axis is first. + The shapes are transformed as follows: + (N, C, D, H, W) -> (C, N * D * H * W) + """ + C = tensor.size(1) + # new axis order + axis_order = (1, 0) + tuple(range(2, tensor.dim())) + # Transpose: (N, C, D, H, W) -> (C, N, D, H, W) + transposed = tensor.permute(axis_order) + # Flatten: (C, N, D, H, W) -> (C, N * D * H * W) + return transposed.contiguous().view(C, -1) + + +class DiceLoss(nn.Module): + def __init__(self): + super().__init__() + self.epsilon = 1e-5 + + def forward(self, output, target): + assert output.size() == target.size(), "'input' and 'target' must have the same shape" + output = F.softmax(output, dim=1) + output = flatten(output) + target = flatten(target) + # intersect = (output * target).sum(-1).sum() + self.epsilon + # denominator = ((output + target).sum(-1)).sum() + self.epsilon + + intersect = (output * target).sum(-1) + denominator = (output + target).sum(-1) + dice = intersect / denominator + dice = torch.mean(dice) + return 1 - dice + # return 1 - 2. * intersect / denominator + +class resnet18(torch.nn.Module): + def __init__(self, pretrained=True): + super().__init__() + self.features = components.resnet18(pretrained=pretrained) + self.conv1 = self.features.conv1 + self.bn1 = self.features.bn1 + self.relu = self.features.relu + self.maxpool1 = self.features.maxpool + self.layer1 = self.features.layer1 + self.layer2 = self.features.layer2 + self.layer3 = self.features.layer3 + self.layer4 = self.features.layer4 + + def forward(self, input): + x = self.conv1(input) + x = self.relu(self.bn1(x)) + x = self.maxpool1(x) + feature1 = self.layer1(x) # 1 / 4 + feature2 = self.layer2(feature1) # 1 / 8 + feature3 = self.layer3(feature2) # 1 / 16 + feature4 = self.layer4(feature3) # 1 / 32 + # global average pooling to build tail + tail = torch.mean(feature4, 3, keepdim=True) + tail = torch.mean(tail, 2, keepdim=True) + return feature3, feature4, tail + + +class resnet101(torch.nn.Module): + def __init__(self, pretrained=True): + super().__init__() + self.features = components.resnet101(pretrained=pretrained) + self.conv1 = self.features.conv1 + self.bn1 = self.features.bn1 + self.relu = self.features.relu + self.maxpool1 = self.features.maxpool + self.layer1 = self.features.layer1 + self.layer2 = self.features.layer2 + self.layer3 = self.features.layer3 + self.layer4 = self.features.layer4 + + def forward(self, input): + x = self.conv1(input) + x = self.relu(self.bn1(x)) + x = self.maxpool1(x) + feature1 = self.layer1(x) # 1 / 4 + feature2 = self.layer2(feature1) # 1 / 8 + feature3 = self.layer3(feature2) # 1 / 16 + feature4 = self.layer4(feature3) # 1 / 32 + # global average pooling to build tail + tail = torch.mean(feature4, 3, keepdim=True) + tail = torch.mean(tail, 2, keepdim=True) + return feature3, feature4, tail + +def build_contextpath(name,pretrained): + model = { + 'resnet18': resnet18(pretrained=pretrained), + 'resnet101': resnet101(pretrained=pretrained) + } + return model[name] + +class ConvBlock(torch.nn.Module): + def __init__(self, in_channels, out_channels, kernel_size=3, stride=2,padding=1): + super().__init__() + self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False) + self.bn = nn.BatchNorm2d(out_channels) + self.relu = nn.ReLU() + + def forward(self, input): + x = self.conv1(input) + return self.relu(self.bn(x)) + +class Spatial_path(torch.nn.Module): + def __init__(self): + super().__init__() + self.convblock1 = ConvBlock(in_channels=3, out_channels=64) + self.convblock2 = ConvBlock(in_channels=64, out_channels=128) + self.convblock3 = ConvBlock(in_channels=128, out_channels=256) + + def forward(self, input): + x = self.convblock1(input) + x = self.convblock2(x) + x = self.convblock3(x) + return x + +class AttentionRefinementModule(torch.nn.Module): + def __init__(self, in_channels, out_channels): + super().__init__() + self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1) + self.bn = nn.BatchNorm2d(out_channels) + self.sigmoid = nn.Sigmoid() + self.in_channels = in_channels + self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1)) + + def forward(self, input): + # global average pooling + x = self.avgpool(input) + assert self.in_channels == x.size(1), 'in_channels and out_channels should all be {}'.format(x.size(1)) + x = self.conv(x) + # x = self.sigmoid(self.bn(x)) + x = self.sigmoid(x) + # channels of input and x should be same + x = torch.mul(input, x) + return x + +class FeatureFusionModule(torch.nn.Module): + def __init__(self, num_classes, in_channels): + super().__init__() + # self.in_channels = input_1.channels + input_2.channels + # resnet101 3328 = 256(from context path) + 1024(from spatial path) + 2048(from spatial path) + # resnet18 1024 = 256(from context path) + 256(from spatial path) + 512(from spatial path) + self.in_channels = in_channels + + self.convblock = ConvBlock(in_channels=self.in_channels, out_channels=num_classes, stride=1) + self.conv1 = nn.Conv2d(num_classes, num_classes, kernel_size=1) + self.relu = nn.ReLU() + self.conv2 = nn.Conv2d(num_classes, num_classes, kernel_size=1) + self.sigmoid = nn.Sigmoid() + self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1)) + + + def forward(self, input_1, input_2): + x = torch.cat((input_1, input_2), dim=1) + assert self.in_channels == x.size(1), 'in_channels of ConvBlock should be {}'.format(x.size(1)) + feature = self.convblock(x) + x = self.avgpool(feature) + + x = self.relu(self.conv1(x)) + x = self.sigmoid(self.conv2(x)) + x = torch.mul(feature, x) + x = torch.add(x, feature) + return x + +class BiSeNet(torch.nn.Module): + def __init__(self, num_classes, context_path, train_flag=True): + super().__init__() + # build spatial path + self.saptial_path = Spatial_path() + self.sigmoid = nn.Sigmoid() + # build context path + if train_flag: + self.context_path = build_contextpath(name=context_path,pretrained=True) + else: + self.context_path = build_contextpath(name=context_path,pretrained=False) + + # build attention refinement module for resnet 101 + if context_path == 'resnet101': + self.attention_refinement_module1 = AttentionRefinementModule(1024, 1024) + self.attention_refinement_module2 = AttentionRefinementModule(2048, 2048) + # supervision block + self.supervision1 = nn.Conv2d(in_channels=1024, out_channels=num_classes, kernel_size=1) + self.supervision2 = nn.Conv2d(in_channels=2048, out_channels=num_classes, kernel_size=1) + # build feature fusion module + self.feature_fusion_module = FeatureFusionModule(num_classes, 3328) + + elif context_path == 'resnet18': + # build attention refinement module for resnet 18 + self.attention_refinement_module1 = AttentionRefinementModule(256, 256) + self.attention_refinement_module2 = AttentionRefinementModule(512, 512) + # supervision block + self.supervision1 = nn.Conv2d(in_channels=256, out_channels=num_classes, kernel_size=1) + self.supervision2 = nn.Conv2d(in_channels=512, out_channels=num_classes, kernel_size=1) + # build feature fusion module + self.feature_fusion_module = FeatureFusionModule(num_classes, 1024) + else: + print('Error: unspport context_path network \n') + + # build final convolution + self.conv = nn.Conv2d(in_channels=num_classes, out_channels=num_classes, kernel_size=1) + + self.init_weight() + + self.mul_lr = [] + self.mul_lr.append(self.saptial_path) + self.mul_lr.append(self.attention_refinement_module1) + self.mul_lr.append(self.attention_refinement_module2) + self.mul_lr.append(self.supervision1) + self.mul_lr.append(self.supervision2) + self.mul_lr.append(self.feature_fusion_module) + self.mul_lr.append(self.conv) + + def init_weight(self): + for name, m in self.named_modules(): + if 'context_path' not in name: + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu') + elif isinstance(m, nn.BatchNorm2d): + m.eps = 1e-5 + m.momentum = 0.1 + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + def forward(self, input): + # output of spatial path + sx = self.saptial_path(input) + + # output of context path + cx1, cx2, tail = self.context_path(input) + cx1 = self.attention_refinement_module1(cx1) + cx2 = self.attention_refinement_module2(cx2) + cx2 = torch.mul(cx2, tail) + # upsampling + cx1 = torch.nn.functional.interpolate(cx1, size=sx.size()[-2:], mode='bilinear') + cx2 = torch.nn.functional.interpolate(cx2, size=sx.size()[-2:], mode='bilinear') + cx = torch.cat((cx1, cx2), dim=1) + + if self.training == True: + cx1_sup = self.supervision1(cx1) + cx2_sup = self.supervision2(cx2) + cx1_sup = torch.nn.functional.interpolate(cx1_sup, size=input.size()[-2:], mode='bilinear') + cx2_sup = torch.nn.functional.interpolate(cx2_sup, size=input.size()[-2:], mode='bilinear') + + # output of feature fusion module + result = self.feature_fusion_module(sx, cx) + + # upsampling + result = torch.nn.functional.interpolate(result, scale_factor=8, mode='bilinear') + result = self.conv(result) + + if self.training == True: + return self.sigmoid(result), self.sigmoid(cx1_sup), self.sigmoid(cx2_sup) + + return self.sigmoid(result) \ No newline at end of file diff --git a/models/__init__.py b/models/__init__.py index 54739ba..8b13789 100755 --- a/models/__init__.py +++ b/models/__init__.py @@ -1,2 +1 @@ -from .pix2pix_model import * -from .unet_model import UNet + diff --git a/models/components.py b/models/components.py new file mode 100644 index 0000000..59cb333 --- /dev/null +++ b/models/components.py @@ -0,0 +1,234 @@ +import torch.nn as nn +import torch.utils.model_zoo as model_zoo + + +__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', + 'resnet152'] + + +model_urls = { + 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', + 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', + 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', + 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', + 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', +} + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + + +def conv1x1(in_planes, out_planes, stride=1): + """1x1 convolution""" + return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None, norm_layer=None): + super(BasicBlock, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + # Both self.conv1 and self.downsample layers downsample the input when stride != 1 + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = norm_layer(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = norm_layer(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None, norm_layer=None): + super(Bottleneck, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + # Both self.conv2 and self.downsample layers downsample the input when stride != 1 + self.conv1 = conv1x1(inplanes, planes) + self.bn1 = norm_layer(planes) + self.conv2 = conv3x3(planes, planes, stride) + self.bn2 = norm_layer(planes) + self.conv3 = conv1x1(planes, planes * self.expansion) + self.bn3 = norm_layer(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class ResNet(nn.Module): + + def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, norm_layer=None): + super(ResNet, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + self.inplanes = 64 + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = norm_layer(64) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0], norm_layer=norm_layer) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2, norm_layer=norm_layer) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2, norm_layer=norm_layer) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2, norm_layer=norm_layer) + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.fc = nn.Linear(512 * block.expansion, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + # Zero-initialize the last BN in each residual branch, + # so that the residual branch starts with zeros, and each residual block behaves like an identity. + # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 + if zero_init_residual: + for m in self.modules(): + if isinstance(m, Bottleneck): + nn.init.constant_(m.bn3.weight, 0) + elif isinstance(m, BasicBlock): + nn.init.constant_(m.bn2.weight, 0) + + def _make_layer(self, block, planes, blocks, stride=1, norm_layer=None): + if norm_layer is None: + norm_layer = nn.BatchNorm2d + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + conv1x1(self.inplanes, planes * block.expansion, stride), + norm_layer(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample, norm_layer)) + self.inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append(block(self.inplanes, planes, norm_layer=norm_layer)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = x.view(x.size(0), -1) + x = self.fc(x) + + return x + + +def resnet18(pretrained=False, **kwargs): + """Constructs a ResNet-18 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) + return model + + +def resnet34(pretrained=False, **kwargs): + """Constructs a ResNet-34 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) + return model + + +def resnet50(pretrained=False, **kwargs): + """Constructs a ResNet-50 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) + return model + + +def resnet101(pretrained=False, **kwargs): + """Constructs a ResNet-101 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) + return model + + +def resnet152(pretrained=False, **kwargs): + """Constructs a ResNet-152 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) + return model \ No newline at end of file diff --git a/models/loadmodel.py b/models/loadmodel.py index 00607d7..eedd34e 100755 --- a/models/loadmodel.py +++ b/models/loadmodel.py @@ -4,6 +4,7 @@ from .pix2pixHD_model import define_G as define_G_HD from .unet_model import UNet from .video_model import MosaicNet from .videoHD_model import MosaicNet as MosaicNet_HD +from .BiSeNet_model import BiSeNet def show_paramsnumber(net,netname='net'): parameters = sum(param.numel() for param in net.parameters()) @@ -75,21 +76,35 @@ def video(opt): netG.cuda() return netG - -def unet_clean(opt): - net = UNet(n_channels = 3, n_classes = 1) +def bisenet(opt,type='roi'): + ''' + type: roi or mosaic + ''' + net = BiSeNet(num_classes=1, context_path='resnet18',train_flag=False) show_paramsnumber(net,'segment') - net.load_state_dict(torch.load(opt.mosaic_position_model_path)) + if type == 'roi': + net.load_state_dict(torch.load(opt.model_path)) + elif type == 'mosaic': + net.load_state_dict(torch.load(opt.mosaic_position_model_path)) net.eval() if opt.use_gpu: net.cuda() return net -def unet(opt): - net = UNet(n_channels = 3, n_classes = 1) - show_paramsnumber(net,'segment') - net.load_state_dict(torch.load(opt.model_path)) - net.eval() - if opt.use_gpu: - net.cuda() - return net +# def unet_clean(opt): +# net = UNet(n_channels = 3, n_classes = 1) +# show_paramsnumber(net,'segment') +# net.load_state_dict(torch.load(opt.mosaic_position_model_path)) +# net.eval() +# if opt.use_gpu: +# net.cuda() +# return net + +# def unet(opt): +# net = UNet(n_channels = 3, n_classes = 1) +# show_paramsnumber(net,'segment') +# net.load_state_dict(torch.load(opt.model_path)) +# net.eval() +# if opt.use_gpu: +# net.cuda() +# return net diff --git a/models/runmodel.py b/models/runmodel.py index 11b14dd..2ff8414 100755 --- a/models/runmodel.py +++ b/models/runmodel.py @@ -7,7 +7,7 @@ from util import data import torch import numpy as np -def run_unet(img,net,size = 224,use_gpu = True): +def run_segment(img,net,size = 360,use_gpu = True): img = impro.resize(img,size) img = data.im2tensor(img,use_gpu = use_gpu, bgr2rgb = False,use_transform = False , is0_1 = True) mask = net(img) @@ -60,18 +60,26 @@ def run_styletransfer(opt, net, img): img = data.tensor2im(img) return img -def get_ROI_position(img,net,opt): - mask = run_unet(img,net,size=224,use_gpu = opt.use_gpu) +def get_ROI_position(img,net,opt,keepsize=True): + mask = run_segment(img,net,size=360,use_gpu = opt.use_gpu) mask = impro.mask_threshold(mask,opt.mask_extend,opt.mask_threshold) + if keepsize: + mask = impro.resize_like(mask, img) x,y,halfsize,area = impro.boundingSquare(mask, 1) - return mask,x,y,area + return mask,x,y,halfsize,area -def get_mosaic_position(img_origin,net_mosaic_pos,opt,threshold = 128 ): - mask = run_unet(img_origin,net_mosaic_pos,size=224,use_gpu = opt.use_gpu) - mask = impro.mask_threshold(mask,30,threshold) +def get_mosaic_position(img_origin,net_mosaic_pos,opt): + h,w = img_origin.shape[:2] + mask = run_segment(img_origin,net_mosaic_pos,size=360,use_gpu = opt.use_gpu) + # mask_1 = mask.copy() + mask = impro.mask_threshold(mask,ex_mun=int(min(h,w)/20),threshold=opt.mask_threshold) if not opt.all_mosaic_area: mask = impro.find_mostlikely_ROI(mask) x,y,size,area = impro.boundingSquare(mask,Ex_mul=opt.ex_mult) - rat = min(img_origin.shape[:2])/224.0 + #Location fix + rat = min(h,w)/360.0 x,y,size = int(rat*x),int(rat*y),int(rat*size) + x,y = np.clip(x, 0, w),np.clip(y, 0, h) + size = np.clip(size, 0, min(w-x,h-y)) + # print(x,y,size) return x,y,size,mask \ No newline at end of file diff --git a/models/unet_model.py b/models/unet_model.py index de16f64..5dce46e 100755 --- a/models/unet_model.py +++ b/models/unet_model.py @@ -1,10 +1,101 @@ # This code clone from https://github.com/milesial/Pytorch-UNet # LICENSE file : https://github.com/milesial/Pytorch-UNet/blob/master/LICENSE -# full assembly of the sub-parts to form the complete net - +import torch +import torch.nn as nn import torch.nn.functional as F -from .unet_parts import * + +class double_conv(nn.Module): + '''(conv => BN => ReLU) * 2''' + def __init__(self, in_ch, out_ch): + super(double_conv, self).__init__() + self.conv = nn.Sequential( + nn.Conv2d(in_ch, out_ch, 3, padding=1), + nn.BatchNorm2d(out_ch), + nn.ReLU(inplace=True), + nn.Conv2d(out_ch, out_ch, 3, padding=1), + nn.BatchNorm2d(out_ch), + nn.ReLU(inplace=True) + ) + + def forward(self, x): + x = self.conv(x) + return x + + +class inconv(nn.Module): + def __init__(self, in_ch, out_ch): + super(inconv, self).__init__() + self.conv = double_conv(in_ch, out_ch) + + def forward(self, x): + x = self.conv(x) + return x + + +class down(nn.Module): + def __init__(self, in_ch, out_ch): + super(down, self).__init__() + self.mpconv = nn.Sequential( + nn.MaxPool2d(2), + double_conv(in_ch, out_ch) + ) + + def forward(self, x): + x = self.mpconv(x) + return x + +class Upsample(nn.Module): + def __init__(self, scale_factor): + super(Upsample, self).__init__() + self.scale_factor = scale_factor + def forward(self, x): + return F.interpolate(x, scale_factor=self.scale_factor,mode='bilinear', align_corners=True) + + +class up(nn.Module): + def __init__(self, in_ch, out_ch, bilinear=True): + super(up, self).__init__() + + # would be a nice idea if the upsampling could be learned too, + # but my machine do not have enough memory to handle all those weights + if bilinear: + self.up = Upsample(scale_factor=2) + else: + self.up = nn.ConvTranspose2d(in_ch//2, in_ch//2, 2, stride=2) + + self.conv = double_conv(in_ch, out_ch) + + def forward(self, x1, x2): + x1 = self.up(x1) + + # input is CHW + diffY = x2.size()[2] - x1.size()[2] + diffX = x2.size()[3] - x1.size()[3] + + x1 = F.pad(x1, (diffX // 2, diffX - diffX//2, + diffY // 2, diffY - diffY//2)) + + # for padding issues, see + # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a + # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd + + x = torch.cat([x2, x1], dim=1) + x = self.conv(x) + return x + + +class outconv(nn.Module): + def __init__(self, in_ch, out_ch): + super(outconv, self).__init__() + self.conv = nn.Sequential( + nn.Conv2d(in_ch, out_ch, 1), + nn.Sigmoid() + ) + + def forward(self, x): + x = self.conv(x) + return x class UNet(nn.Module): def __init__(self, n_channels, n_classes): diff --git a/models/unet_parts.py b/models/unet_parts.py deleted file mode 100755 index 2d93833..0000000 --- a/models/unet_parts.py +++ /dev/null @@ -1,102 +0,0 @@ -# This code clone from https://github.com/milesial/Pytorch-UNet -# LICENSE file : https://github.com/milesial/Pytorch-UNet/blob/master/LICENSE - -# sub-parts of the U-Net model -import torch -import torch.nn as nn -import torch.nn.functional as F - - -class double_conv(nn.Module): - '''(conv => BN => ReLU) * 2''' - def __init__(self, in_ch, out_ch): - super(double_conv, self).__init__() - self.conv = nn.Sequential( - nn.Conv2d(in_ch, out_ch, 3, padding=1), - nn.BatchNorm2d(out_ch), - nn.ReLU(inplace=True), - nn.Conv2d(out_ch, out_ch, 3, padding=1), - nn.BatchNorm2d(out_ch), - nn.ReLU(inplace=True) - ) - - def forward(self, x): - x = self.conv(x) - return x - - -class inconv(nn.Module): - def __init__(self, in_ch, out_ch): - super(inconv, self).__init__() - self.conv = double_conv(in_ch, out_ch) - - def forward(self, x): - x = self.conv(x) - return x - - -class down(nn.Module): - def __init__(self, in_ch, out_ch): - super(down, self).__init__() - self.mpconv = nn.Sequential( - nn.MaxPool2d(2), - double_conv(in_ch, out_ch) - ) - - def forward(self, x): - x = self.mpconv(x) - return x - -class Upsample(nn.Module): - def __init__(self, scale_factor): - super(Upsample, self).__init__() - self.scale_factor = scale_factor - def forward(self, x): - return F.interpolate(x, scale_factor=self.scale_factor,mode='bilinear', align_corners=True) - - -class up(nn.Module): - def __init__(self, in_ch, out_ch, bilinear=True): - super(up, self).__init__() - - # would be a nice idea if the upsampling could be learned too, - # but my machine do not have enough memory to handle all those weights - if bilinear: - self.up = Upsample(scale_factor=2) - else: - self.up = nn.ConvTranspose2d(in_ch//2, in_ch//2, 2, stride=2) - - self.conv = double_conv(in_ch, out_ch) - - def forward(self, x1, x2): - x1 = self.up(x1) - - # input is CHW - diffY = x2.size()[2] - x1.size()[2] - diffX = x2.size()[3] - x1.size()[3] - - x1 = F.pad(x1, (diffX // 2, diffX - diffX//2, - diffY // 2, diffY - diffY//2)) - - # for padding issues, see - # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a - # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd - - x = torch.cat([x2, x1], dim=1) - x = self.conv(x) - return x - - -class outconv(nn.Module): - def __init__(self, in_ch, out_ch): - super(outconv, self).__init__() - self.conv = nn.Sequential( - nn.Conv2d(in_ch, out_ch, 1), - nn.Sigmoid() - ) - - - - def forward(self, x): - x = self.conv(x) - return x diff --git a/models/videoHD_model.py b/models/videoHD_model.py index 9f214c5..20e901f 100644 --- a/models/videoHD_model.py +++ b/models/videoHD_model.py @@ -15,7 +15,7 @@ class encoder_2d(nn.Module): ### downsample for i in range(n_downsampling): mult = 2**i - model += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1), + model += [nn.ReflectionPad2d(1),nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=0), norm_layer(ngf * mult * 2), activation] self.model = nn.Sequential(*model) @@ -39,16 +39,6 @@ class decoder_2d(nn.Module): ### upsample for i in range(n_downsampling): mult = 2**(n_downsampling - i) - # if i%2 ==0: - # model += [ nn.Upsample(scale_factor = 2, mode='nearest'), - # nn.ReflectionPad2d(1), - # nn.Conv2d(ngf * mult, int(ngf * mult / 2),kernel_size=3, stride=1, padding=0), - # norm_layer(int(ngf * mult / 2)), - # nn.ReLU(True)] - # else: - - # model += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), kernel_size=3, stride=2, padding=1, output_padding=1), - # norm_layer(int(ngf * mult / 2)), activation] # model += [ nn.Upsample(scale_factor = 2, mode='nearest'), # nn.ReflectionPad2d(1), diff --git a/models/video_model.py b/models/video_model.py index 5c15726..4a095c6 100644 --- a/models/video_model.py +++ b/models/video_model.py @@ -1,7 +1,6 @@ import torch import torch.nn as nn import torch.nn.functional as F -from .unet_parts import * from .pix2pix_model import * diff --git a/train/add/train.py b/train/add/train.py index 4d57814..bb8e953 100644 --- a/train/add/train.py +++ b/train/add/train.py @@ -2,8 +2,10 @@ import sys import os import random import datetime +import time import numpy as np +from matplotlib import pyplot as plt import cv2 import torch @@ -11,137 +13,144 @@ import torch.backends.cudnn as cudnn import torch.nn as nn from torch import optim -import sys sys.path.append("..") sys.path.append("../..") +from cores import Options from util import mosaic,util,ffmpeg,filt,data from util import image_processing as impro -from models import unet_model -from matplotlib import pyplot as plt -import torch.backends.cudnn as cudnn - -LR = 0.0002 -EPOCHS = 100 -BATCHSIZE = 16 -LOADSIZE = 256 -FINESIZE = 224 -CONTINUE = True -use_gpu = True -SAVE_FRE = 1 -MAX_LOAD = 30000 - - - -dir_img = './datasets/face/origin_image/' -dir_mask = './datasets/face/mask/' -dir_checkpoint = 'checkpoints/face/' - +from models import unet_model,BiSeNet_model + + +''' +--------------------------Get options-------------------------- +''' +opt = Options() +opt.parser.add_argument('--gpu_id',type=int,default=0, help='') +opt.parser.add_argument('--lr',type=float,default=0.001, help='') +opt.parser.add_argument('--finesize',type=int,default=360, help='') +opt.parser.add_argument('--loadsize',type=int,default=400, help='') +opt.parser.add_argument('--batchsize',type=int,default=8, help='') +opt.parser.add_argument('--model',type=str,default='BiSeNet', help='BiSeNet or UNet') + +opt.parser.add_argument('--maxepoch',type=int,default=100, help='') +opt.parser.add_argument('--savefreq',type=int,default=5, help='') +opt.parser.add_argument('--maxload',type=int,default=1000000, help='') +opt.parser.add_argument('--continuetrain', action='store_true', help='') +opt.parser.add_argument('--startepoch',type=int,default=0, help='') +opt.parser.add_argument('--dataset',type=str,default='./datasets/face/', help='') +opt.parser.add_argument('--savename',type=str,default='face', help='') + + +''' +--------------------------Init-------------------------- +''' +opt = opt.getparse() +dir_img = os.path.join(opt.dataset,'origin_image') +dir_mask = os.path.join(opt.dataset,'mask') +dir_checkpoint = os.path.join('checkpoints/',opt.savename) +util.makedirs(dir_checkpoint) +util.writelog(os.path.join(dir_checkpoint,'loss.txt'), + str(time.asctime(time.localtime(time.time())))+'\n'+util.opt2str(opt)) +torch.cuda.set_device(opt.gpu_id) def Totensor(img,use_gpu=True): size=img.shape[0] img = torch.from_numpy(img).float() - if use_gpu: + if opt.use_gpu: img = img.cuda() return img - -def Toinputshape(imgs,masks,finesize,test_flag = False): - batchsize = len(imgs) - result_imgs=[];result_masks=[] - for i in range(batchsize): - # print(imgs[i].shape,masks[i].shape) - img,mask = data.random_transform_image(imgs[i], masks[i], finesize, test_flag) - # print(img.shape,mask.shape) - mask = (mask.reshape(1,finesize,finesize)/255.0) - img = (img.transpose((2, 0, 1))/255.0) - result_imgs.append(img) - result_masks.append(mask) - result_imgs = np.array(result_imgs) - result_masks = np.array(result_masks) - return result_imgs,result_masks - -def batch_generator(images,masks,batchsize): - dataset_images = [] - dataset_masks = [] - - for i in range(int(len(images)/batchsize)): - dataset_images.append(images[i*batchsize:(i+1)*batchsize]) - dataset_masks.append(masks[i*batchsize:(i+1)*batchsize]) - if len(images)%batchsize != 0: - dataset_images.append(images[len(images)-len(images)%batchsize:]) - dataset_masks.append(masks[len(images)-len(images)%batchsize:]) - - return dataset_images,dataset_masks - -def loadimage(dir_img,dir_mask,loadsize,eval_p): - t1 = datetime.datetime.now() - imgnames = os.listdir(dir_img) - # imgnames = imgnames[:100] - random.shuffle(imgnames) - imgnames = imgnames[:MAX_LOAD] - print('load images:',len(imgnames)) - imgnames = (f[:-4] for f in imgnames) - images = [] - masks = [] - for imgname in imgnames: - img = impro.imread(dir_img+imgname+'.jpg') - mask = impro.imread(dir_mask+imgname+'.png',mod = 'gray') - img = impro.resize(img,loadsize) - mask = impro.resize(mask,loadsize) - images.append(img) - masks.append(mask) - train_images,train_masks = images[0:int(len(masks)*(1-eval_p))],masks[0:int(len(masks)*(1-eval_p))] - eval_images,eval_masks = images[int(len(masks)*(1-eval_p)):len(masks)],masks[int(len(masks)*(1-eval_p)):len(masks)] - t2 = datetime.datetime.now() - print('load data cost time:',(t2 - t1).seconds,'s') - return train_images,train_masks,eval_images,eval_masks - - -util.makedirs(dir_checkpoint) -print('loading data......') -train_images,train_masks,eval_images,eval_masks = loadimage(dir_img,dir_mask,LOADSIZE,0.2) -dataset_eval_images,dataset_eval_masks = batch_generator(eval_images,eval_masks,BATCHSIZE) -dataset_train_images,dataset_train_masks = batch_generator(train_images,train_masks,BATCHSIZE) - - -net = unet_model.UNet(n_channels = 3, n_classes = 1) - - -if CONTINUE: +def loadimage(imagepaths,maskpaths,opt,test_flag = False): + batchsize = len(imagepaths) + images = np.zeros((batchsize,3,opt.finesize,opt.finesize), dtype=np.float32) + masks = np.zeros((batchsize,1,opt.finesize,opt.finesize), dtype=np.float32) + for i in range(len(imagepaths)): + img = impro.resize(impro.imread(imagepaths[i]),opt.loadsize) + mask = impro.resize(impro.imread(maskpaths[i],mod = 'gray'),opt.loadsize) + img,mask = data.random_transform_image(img, mask, opt.finesize, test_flag) + images[i] = (img.transpose((2, 0, 1))/255.0) + masks[i] = (mask.reshape(1,1,opt.finesize,opt.finesize)/255.0) + images = Totensor(images,opt.use_gpu) + masks = Totensor(masks,opt.use_gpu) + + return images,masks + + +''' +--------------------------checking dataset-------------------------- +''' +print('checking dataset...') +imagepaths = sorted(util.Traversal(dir_img))[:opt.maxload] +maskpaths = sorted(util.Traversal(dir_mask))[:opt.maxload] +data.shuffledata(imagepaths, maskpaths) +if len(imagepaths) != len(maskpaths) : + print('dataset error!') + exit(0) +img_num = len(imagepaths) +print('find images:',img_num) +imagepaths_train = (imagepaths[0:int(img_num*0.8)]).copy() +maskpaths_train = (maskpaths[0:int(img_num*0.8)]).copy() +imagepaths_eval = (imagepaths[int(img_num*0.8):]).copy() +maskpaths_eval = (maskpaths[int(img_num*0.8):]).copy() + +''' +--------------------------def network-------------------------- +''' +if opt.model =='UNet': + net = unet_model.UNet(n_channels = 3, n_classes = 1) +elif opt.model =='BiSeNet': + net = BiSeNet_model.BiSeNet(num_classes=1, context_path='resnet18') + +if opt.continuetrain: if not os.path.isfile(os.path.join(dir_checkpoint,'last.pth')): - CONTINUE = False + opt.continuetrain = False print('can not load last.pth, training on init weight.') -if CONTINUE: - net.load_state_dict(torch.load(dir_checkpoint+'last.pth')) -if use_gpu: +if opt.continuetrain: + net.load_state_dict(torch.load(os.path.join(dir_checkpoint,'last.pth'))) + f = open(os.path.join(dir_checkpoint,'epoch_log.txt'),'r') + opt.startepoch = int(f.read()) + f.close() +if opt.use_gpu: net.cuda() cudnn.benchmark = True +optimizer = torch.optim.Adam(net.parameters(), lr=opt.lr) -optimizer = torch.optim.Adam(net.parameters(), lr=LR, betas=(0.9, 0.999)) - -criterion = nn.BCELoss() -# criterion = nn.L1Loss() +if opt.model =='UNet': + criterion = nn.BCELoss() +elif opt.model =='BiSeNet': + criterion = nn.BCELoss() + # criterion = BiSeNet_model.DiceLoss() +''' +--------------------------train-------------------------- +''' +loss_plot = {'train':[],'eval':[]} print('begin training......') -for epoch in range(EPOCHS): - random_save = random.randint(0, len(dataset_train_images)) +for epoch in range(opt.startepoch,opt.maxepoch): + random_save = random.randint(0, int(img_num*0.8/opt.batchsize)) + data.shuffledata(imagepaths_train, maskpaths_train) starttime = datetime.datetime.now() - print('Epoch {}/{}.'.format(epoch + 1, EPOCHS)) + util.writelog(os.path.join(dir_checkpoint,'loss.txt'),'Epoch {}/{}.'.format(epoch + 1, opt.maxepoch),True) net.train() - if use_gpu: + if opt.use_gpu: net.cuda() epoch_loss = 0 - for i,(img,mask) in enumerate(zip(dataset_train_images,dataset_train_masks)): - # print(epoch,i,img.shape,mask.shape) - img,mask = Toinputshape(img, mask, FINESIZE) - img = Totensor(img,use_gpu) - mask = Totensor(mask,use_gpu) + for i in range(int(img_num*0.8/opt.batchsize)): + img,mask = loadimage(imagepaths_train[i*opt.batchsize:(i+1)*opt.batchsize], maskpaths_train[i*opt.batchsize:(i+1)*opt.batchsize], opt) - mask_pred = net(img) - loss = criterion(mask_pred, mask) - epoch_loss += loss.item() + if opt.model =='UNet': + mask_pred = net(img) + loss = criterion(mask_pred, mask) + epoch_loss += loss.item() + elif opt.model =='BiSeNet': + mask_pred, mask_pred_sup1, mask_pred_sup2 = net(img) + loss1 = criterion(mask_pred, mask) + loss2 = criterion(mask_pred_sup1, mask) + loss3 = criterion(mask_pred_sup2, mask) + loss = loss1 + loss2 + loss3 + epoch_loss += loss1.item() optimizer.zero_grad() loss.backward() @@ -151,30 +160,47 @@ for epoch in range(EPOCHS): data.showresult(img,mask,mask_pred,os.path.join(dir_checkpoint,'result.png'),True) if i == random_save: data.showresult(img,mask,mask_pred,os.path.join(dir_checkpoint,'epoch_'+str(epoch+1)+'.png'),True) + epoch_loss = epoch_loss/int(img_num*0.8/opt.batchsize) + loss_plot['train'].append(epoch_loss) - # torch.cuda.empty_cache() - # # net.eval() + #val epoch_loss_eval = 0 with torch.no_grad(): - #net.eval() - for i,(img,mask) in enumerate(zip(dataset_eval_images,dataset_eval_masks)): - # print(epoch,i,img.shape,mask.shape) - img,mask = Toinputshape(img, mask, FINESIZE,test_flag=True) - img = Totensor(img,use_gpu) - mask = Totensor(mask,use_gpu) - mask_pred = net(img) - loss = criterion(mask_pred, mask) + # net.eval() + for i in range(int(img_num*0.2/opt.batchsize)): + img,mask = loadimage(imagepaths_eval[i*opt.batchsize:(i+1)*opt.batchsize], maskpaths_eval[i*opt.batchsize:(i+1)*opt.batchsize], opt,test_flag=True) + if opt.model =='UNet': + mask_pred = net(img) + elif opt.model =='BiSeNet': + mask_pred, _, _ = net(img) + # mask_pred = net(img) + loss= criterion(mask_pred, mask) epoch_loss_eval += loss.item() + epoch_loss_eval = epoch_loss_eval/int(img_num*0.2/opt.batchsize) + loss_plot['eval'].append(epoch_loss_eval) # torch.cuda.empty_cache() + #savelog endtime = datetime.datetime.now() - print('--- Epoch train_loss: {0:.6f} eval_loss: {1:.6f} Cost time: {2:} s'.format( - epoch_loss/len(dataset_train_images), - epoch_loss_eval/len(dataset_eval_images), - (endtime - starttime).seconds)), - torch.save(net.cpu().state_dict(),dir_checkpoint+'last.pth') - - if (epoch+1)%SAVE_FRE == 0: - torch.save(net.cpu().state_dict(),dir_checkpoint+'epoch'+str(epoch+1)+'.pth') - + util.writelog(os.path.join(dir_checkpoint,'loss.txt'), + '--- Epoch train_loss: {0:.6f} eval_loss: {1:.6f} Cost time: {2:} s'.format( + epoch_loss, + epoch_loss_eval, + (endtime - starttime).seconds), + True) + #plot + plt.plot(np.linspace(opt.startepoch+1,epoch+1,epoch+1-opt.startepoch),loss_plot['train'],label='train') + plt.plot(np.linspace(opt.startepoch+1,epoch+1,epoch+1-opt.startepoch),loss_plot['eval'],label='eval') + plt.xlabel('Epoch') + plt.ylabel('BCELoss') + plt.legend(loc=1) + plt.savefig(os.path.join(dir_checkpoint,'loss.jpg')) + plt.close() + #save network + torch.save(net.cpu().state_dict(),os.path.join(dir_checkpoint,'last.pth')) + f = open(os.path.join(dir_checkpoint,'epoch_log.txt'),'w+') + f.write(str(epoch+1)) + f.close() + if (epoch+1)%opt.savefreq == 0: + torch.save(net.cpu().state_dict(),os.path.join(dir_checkpoint,'epoch'+str(epoch+1)+'.pth')) print('network saved.') diff --git a/train/clean/train.py b/train/clean/train.py index 102d08a..6ec5e88 100644 --- a/train/clean/train.py +++ b/train/clean/train.py @@ -21,6 +21,7 @@ import torch.backends.cudnn as cudnn ''' opt = Options() +opt.parser.add_argument('--gpu_id',type=int,default=0, help='') opt.parser.add_argument('--N',type=int,default=25, help='') opt.parser.add_argument('--lr',type=float,default=0.0002, help='') opt.parser.add_argument('--beta1',type=float,default=0.5, help='') @@ -32,14 +33,15 @@ opt.parser.add_argument('--lambda_gan',type=float,default=1, help='') opt.parser.add_argument('--finesize',type=int,default=256, help='') opt.parser.add_argument('--loadsize',type=int,default=286, help='') opt.parser.add_argument('--batchsize',type=int,default=1, help='') -opt.parser.add_argument('--perload_num',type=int,default=16, help='') +opt.parser.add_argument('--perload_num',type=int,default=16, help='number of images pool') opt.parser.add_argument('--norm',type=str,default='instance', help='') +opt.parser.add_argument('--dataset',type=str,default='./datasets/face/', help='') opt.parser.add_argument('--maxiter',type=int,default=10000000, help='') opt.parser.add_argument('--savefreq',type=int,default=10000, help='') opt.parser.add_argument('--startiter',type=int,default=0, help='') opt.parser.add_argument('--continuetrain', action='store_true', help='') -opt.parser.add_argument('--savename',type=str,default='MosaicNet', help='') +opt.parser.add_argument('--savename',type=str,default='face', help='') ''' @@ -50,19 +52,27 @@ dir_checkpoint = os.path.join('checkpoints/',opt.savename) util.makedirs(dir_checkpoint) util.writelog(os.path.join(dir_checkpoint,'loss.txt'), str(time.asctime(time.localtime(time.time())))+'\n'+util.opt2str(opt)) +torch.cuda.set_device(opt.gpu_id) N = opt.N loss_sum = [0.,0.,0.,0.] loss_plot = [[],[]] item_plot = [] -videos = os.listdir('./dataset') -videos.sort() -lengths = [] -print('check dataset...') -for video in videos: - video_images = os.listdir('./dataset/'+video+'/ori') - lengths.append(len(video_images)) +# list video dir +videonames = os.listdir(opt.dataset) +videonames.sort() +lengths = [];tmp = [] +print('Check dataset...') +for video in videonames: + if video != 'opt.txt': + video_images = os.listdir(os.path.join(opt.dataset,video,'origin_image')) + lengths.append(len(video_images)) + tmp.append(video) +videonames = tmp +video_num = len(videonames) +#def network +print('Init network...') if opt.hd: netG = videoHD_model.MosaicNet(3*N+1, 3, norm=opt.norm) else: @@ -71,7 +81,8 @@ loadmodel.show_paramsnumber(netG,'netG') if opt.gan: if opt.hd: - netD = pix2pixHD_model.define_D(6, 64, 3, norm = opt.norm, use_sigmoid=False, num_D=2) + #netD = pix2pixHD_model.define_D(6, 64, 3, norm = opt.norm, use_sigmoid=False, num_D=1) + netD = pix2pixHD_model.define_D(6, 64, 3, norm = opt.norm, use_sigmoid=False, num_D=2,getIntermFeat=True) else: netD = pix2pix_model.define_D(3*2, 64, 'basic', norm = opt.norm) netD.train() @@ -106,36 +117,38 @@ if opt.use_gpu: cudnn.benchmark = True ''' ---------------------------preload data-------------------------- +--------------------------preload data & data pool-------------------------- ''' -def loaddata(): - video_index = random.randint(0,len(videos)-1) - video = videos[video_index] +def loaddata(video_index): + + videoname = videonames[video_index] img_index = random.randint(int(N/2)+1,lengths[video_index]- int(N/2)-1) + input_img = np.zeros((opt.loadsize,opt.loadsize,3*N+1), dtype='uint8') + # this frame + this_mask = impro.imread(os.path.join(opt.dataset,videoname,'mask','%05d'%(img_index)+'.png'),'gray',loadsize=opt.loadsize) + input_img[:,:,-1] = this_mask + #print(os.path.join(opt.dataset,videoname,'origin_image','%05d'%(img_index)+'.jpg')) + ground_true = impro.imread(os.path.join(opt.dataset,videoname,'origin_image','%05d'%(img_index)+'.jpg'),loadsize=opt.loadsize) + mosaic_size,mod,rect_rat,father = mosaic.get_random_parameter(ground_true,this_mask) + # merge other frame for i in range(0,N): - - img = cv2.imread('./dataset/'+video+'/mosaic/output_'+'%05d'%(img_index+i-int(N/2))+'.png') - img = impro.resize(img,opt.loadsize) - input_img[:,:,i*3:(i+1)*3] = img - mask = cv2.imread('./dataset/'+video+'/mask/output_'+'%05d'%(img_index)+'.png',0) - mask = impro.resize(mask,opt.loadsize) - mask = impro.mask_threshold(mask,15,128) - input_img[:,:,-1] = mask - - ground_true = cv2.imread('./dataset/'+video+'/ori/output_'+'%05d'%(img_index)+'.png') - ground_true = impro.resize(ground_true,opt.loadsize) - + img = impro.imread(os.path.join(opt.dataset,videoname,'origin_image','%05d'%(img_index+i-int(N/2))+'.jpg'),loadsize=opt.loadsize) + mask = impro.imread(os.path.join(opt.dataset,videoname,'mask','%05d'%(img_index+i-int(N/2))+'.png'),'gray',loadsize=opt.loadsize) + img_mosaic = mosaic.addmosaic_base(img, mask, mosaic_size,model = mod,rect_rat=rect_rat,father=father) + input_img[:,:,i*3:(i+1)*3] = img_mosaic + # to tensor input_img,ground_true = data.random_transform_video(input_img,ground_true,opt.finesize,N) input_img = data.im2tensor(input_img,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False,is0_1=False) ground_true = data.im2tensor(ground_true,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False,is0_1=False) return input_img,ground_true -print('preloading data, please wait 5s...') +print('Preloading data, please wait...') if opt.perload_num <= opt.batchsize: opt.perload_num = opt.batchsize*2 +#data pool input_imgs = torch.rand(opt.perload_num,N*3+1,opt.finesize,opt.finesize).cuda() ground_trues = torch.rand(opt.perload_num,3,opt.finesize,opt.finesize).cuda() load_cnt = 0 @@ -144,14 +157,15 @@ def preload(): global load_cnt while 1: try: + video_index = random.randint(0,video_num-1) ran = random.randint(0, opt.perload_num-1) - input_imgs[ran],ground_trues[ran] = loaddata() + input_imgs[ran],ground_trues[ran] = loaddata(video_index) load_cnt += 1 # time.sleep(0.1) except Exception as e: print("error:",e) import threading -t = threading.Thread(target=preload,args=()) #t为新创建的线程 +t = threading.Thread(target=preload,args=()) t.daemon = True t.start() time_start=time.time() diff --git a/util/data.py b/util/data.py index 567c397..1ffb0e1 100755 --- a/util/data.py +++ b/util/data.py @@ -3,7 +3,7 @@ import numpy as np import torch import torchvision.transforms as transforms import cv2 -from .image_processing import color_adjust +from .image_processing import color_adjust,dctblur transform = transforms.Compose([ transforms.ToTensor(), @@ -61,6 +61,11 @@ def im2tensor(image_numpy, imtype=np.uint8, gray=False,bgr2rgb = True, reshape = image_tensor = image_tensor.cuda() return image_tensor +def shuffledata(data,target): + state = np.random.get_state() + np.random.shuffle(data) + np.random.set_state(state) + np.random.shuffle(target) def random_transform_video(src,target,finesize,N): @@ -78,8 +83,8 @@ def random_transform_video(src,target,finesize,N): target = target[:,::-1,:] #random color - alpha = random.uniform(-0.3,0.3) - beta = random.uniform(-0.2,0.2) + alpha = random.uniform(-0.1,0.1) + beta = random.uniform(-0.1,0.1) b = random.uniform(-0.05,0.05) g = random.uniform(-0.05,0.05) r = random.uniform(-0.05,0.05) @@ -87,39 +92,54 @@ def random_transform_video(src,target,finesize,N): src[:,:,i*3:(i+1)*3] = color_adjust(src[:,:,i*3:(i+1)*3],alpha,beta,b,g,r) target = color_adjust(target,alpha,beta,b,g,r) - # random_num = 15 - # bright = random.randint(-random_num*2,random_num*2) - # for i in range(N*3): src[:,:,i]=np.clip(src[:,:,i].astype('int')+bright,0,255).astype('uint8') - # for i in range(3): target[:,:,i]=np.clip(target[:,:,i].astype('int')+bright,0,255).astype('uint8') - - return src,target + #random blur + if random.random()<0.5: + interpolations = [cv2.INTER_LINEAR,cv2.INTER_CUBIC,cv2.INTER_LANCZOS4] + size_ran = random.uniform(0.7,1.5) + interpolation_up = interpolations[random.randint(0,2)] + interpolation_down =interpolations[random.randint(0,2)] + tmp = cv2.resize(src[:,:,:3*N], (int(finesize*size_ran),int(finesize*size_ran)),interpolation=interpolation_up) + src[:,:,:3*N] = cv2.resize(tmp, (finesize,finesize),interpolation=interpolation_down) -def random_transform_image(img,mask,finesize,test_flag = False): + tmp = cv2.resize(target, (int(finesize*size_ran),int(finesize*size_ran)),interpolation=interpolation_up) + target = cv2.resize(tmp, (finesize,finesize),interpolation=interpolation_down) - # randomsize = int(finesize*(1.2+0.2*random.random())+2) + return src,target +def random_transform_single(img,out_shape): + out_h,out_w = out_shape + img = cv2.resize(img,(int(out_w*random.uniform(1.1, 1.5)),int(out_h*random.uniform(1.1, 1.5)))) h,w = img.shape[:2] - loadsize = min((h,w)) - a = (float(h)/float(w))*random.uniform(0.9, 1.1) - - if h0.5: - size_ran = random.uniform(0.5,1.5) - img = cv2.resize(img, (int(finesize*size_ran),int(finesize*size_ran))) - img = cv2.resize(img, (finesize,finesize)) - #img = cv2.blur(img, (random.randint(1,3), random.randint(1,3))) + if random.random()<0.5: + img = dctblur(img,random.randint(1,15)) + + # interpolations = [cv2.INTER_LINEAR,cv2.INTER_CUBIC,cv2.INTER_LANCZOS4] + # size_ran = random.uniform(0.7,1.5) + # img = cv2.resize(img, (int(finesize*size_ran),int(finesize*size_ran)),interpolation=interpolations[random.randint(0,2)]) + # img = cv2.resize(img, (finesize,finesize),interpolation=interpolations[random.randint(0,2)]) + + #check shape + if img.shape[0]!= finesize or img.shape[1]!= finesize or mask.shape[0]!= finesize or mask.shape[1]!= finesize: + img = cv2.resize(img,(finesize,finesize)) + mask = cv2.resize(mask,(finesize,finesize)) + print('warning! shape error.') return img,mask def showresult(img1,img2,img3,name,is0_1 = False): diff --git a/util/ffmpeg.py b/util/ffmpeg.py index f91f888..8baca40 100755 --- a/util/ffmpeg.py +++ b/util/ffmpeg.py @@ -2,11 +2,18 @@ import os,json # ffmpeg 3.4.6 -def video2image(videopath,imagepath,fps=0): - if fps == 0: - os.system('ffmpeg -i "'+videopath+'" -f image2 '+imagepath) +def video2image(videopath,imagepath,fps=0,start_time=0,last_time=0): + if start_time == 0: + if fps == 0: + os.system('ffmpeg -i "'+videopath+'" -f image2 '+'-q:v -0 '+imagepath) + else: + os.system('ffmpeg -i "'+videopath+'" -r '+str(fps)+' -f image2 '+'-q:v -0 '+imagepath) else: - os.system('ffmpeg -i "'+videopath+'" -r '+str(fps)+' -f image2 '+imagepath) + if fps == 0: + os.system('ffmpeg -ss '+start_time+' -t '+last_time+' -i "'+videopath+'" -f image2 '+'-q:v -0 '+imagepath) + else: + os.system('ffmpeg -ss '+start_time+' -t '+last_time+' -i "'+videopath+'" -r '+str(fps)+' -f image2 '+'-q:v -0 '+imagepath) + def video2voice(videopath,voicepath): os.system('ffmpeg -i "'+videopath+'" -f mp3 '+voicepath) @@ -53,4 +60,4 @@ def continuous_screenshot(videopath,savedir,fps): fps: save how many images per second ''' videoname = os.path.splitext(os.path.basename(videopath))[0] - os.system('ffmpeg -i "'+videopath+'" -vf fps='+str(fps)+' '+savedir+'/'+videoname+'_%05d.jpg') + os.system('ffmpeg -i "'+videopath+'" -vf fps='+str(fps)+' -q:v -0 '+savedir+'/'+videoname+'_%06d.jpg') diff --git a/util/image_processing.py b/util/image_processing.py index 8c6455f..4ba9c78 100755 --- a/util/image_processing.py +++ b/util/image_processing.py @@ -3,13 +3,24 @@ import numpy as np import random import platform + system_type = 'Linux' if 'Windows' in platform.platform(): system_type = 'Windows' -def imread(file_path,mod = 'normal'): +DCT_Q = np.array([[8,16,19,22,26,27,29,34], + [16,16,22,24,27,29,34,37], + [19,22,26,27,29,34,34,38], + [22,22,26,27,29,34,37,40], + [22,26,27,29,32,35,40,48], + [26,27,29,32,35,40,48,58], + [26,27,29,34,38,46,56,59], + [27,29,35,38,46,56,69,83]]) + +def imread(file_path,mod = 'normal',loadsize = 0): ''' - mod = 'normal' | 'gray' | 'all' + mod: 'normal' | 'gray' | 'all' + loadsize: 0->original ''' if system_type == 'Linux': if mod == 'normal': @@ -26,6 +37,9 @@ def imread(file_path,mod = 'normal'): img = cv2.imdecode(np.fromfile(file_path,dtype=np.uint8),0) else: img = cv2.imdecode(np.fromfile(file_path,dtype=np.uint8),-1) + + if loadsize != 0: + img = resize(img, loadsize, interpolation=cv2.INTER_CUBIC) return img @@ -40,6 +54,13 @@ def imwrite(file_path,img): cv2.imencode('.jpg', img)[1].tofile(file_path) def resize(img,size,interpolation=cv2.INTER_LINEAR): + ''' + cv2.INTER_NEAREST      最邻近插值点法 + cv2.INTER_LINEAR        双线性插值法 + cv2.INTER_AREA         邻域像素再取样插补 + cv2.INTER_CUBIC        双立方插补,4*4大小的补点 + cv2.INTER_LANCZOS4 8x8像素邻域的Lanczos插值 + ''' h, w = img.shape[:2] if np.min((w,h)) ==size: return img @@ -55,8 +76,6 @@ def resize_like(img,img_like): return img def ch_one2three(img): - #zeros = np.zeros(img.shape[:2], dtype = "uint8") - # ret,thresh = cv2.threshold(img,127,255,cv2.THRESH_BINARY) res = cv2.merge([img, img, img]) return res @@ -78,11 +97,11 @@ def color_adjust(img,alpha=1,beta=0,b=0,g=0,r=0,ran = False): ''' img = img.astype('float') if ran: - alpha = random.uniform(-0.2,0.2) - beta = random.uniform(-0.2,0.2) - b = random.uniform(-0.1,0.1) - g = random.uniform(-0.1,0.1) - r = random.uniform(-0.1,0.1) + alpha = random.uniform(-0.1,0.1) + beta = random.uniform(-0.1,0.1) + b = random.uniform(-0.05,0.05) + g = random.uniform(-0.05,0.05) + r = random.uniform(-0.05,0.05) img = (1+alpha)*img+255.0*beta bgr = [b*255.0,g*255.0,r*255.0] for i in range(3): img[:,:,i]=img[:,:,i]+bgr[i] @@ -98,14 +117,6 @@ def makedataset(target_image,orgin_image): img[0:256,256:512] = orgin_image[0:256,int(w/2-256/2):int(w/2+256/2)] return img -def image2folat(img,ch): - size=img.shape[0] - if ch == 1: - img = (img[:,:,0].reshape(1,size,size)/255.0).astype(np.float32) - else: - img = (img.transpose((2, 0, 1))/255.0).astype(np.float32) - return img - def spiltimage(img,size = 128): h, w = img.shape[:2] # size = min(h,w) @@ -133,6 +144,34 @@ def mergeimage(img1,img2,orgin_image,size = 128): result_img = cv2.add(new_img1,new_img2) return result_img +def block_dct_and_idct(g,QQF): + T = cv2.dct(g) + IT = np.round(cv2.idct(np.round(np.round(16.0*T/QQF)*QQF/16))) + return IT + +def image_dct_and_idct(I,QF): + h,w = I.shape + QQF = DCT_Q*QF + for i in range(int(h/8)): + for j in range(int(w/8)): + I[i*8:(i+1)*8,j*8:(j+1)*8] = block_dct_and_idct(I[i*8:(i+1)*8,j*8:(j+1)*8],QQF) + return I + +def dctblur(img,Q): + ''' + Q: 1~20, 1->best + ''' + h,w = img.shape[:2] + img[:8*int(h/8),:8*int(w/8)] + img = img.astype(np.float32) + if img.ndim == 2: + img = image_dct_and_idct(img, Q) + if img.ndim == 3: + h,w,ch = img.shape + for i in range(ch): + img[:,:,i] = image_dct_and_idct(img[:,:,i], Q) + return (np.clip(img,0,255)).astype(np.uint8) + def find_mostlikely_ROI(mask): contours,hierarchy=cv2.findContours(mask, cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE) if len(contours)>0: @@ -199,8 +238,20 @@ def mask_area(mask): return area -def replace_mosaic(img_origin,img_fake,x,y,size,no_father): - img_fake = resize(img_fake,size*2,interpolation=cv2.INTER_LANCZOS4) +def Q_lapulase(resImg): + ''' + Evaluate image quality + score > 20 normal + score > 50 clear + ''' + img2gray = cv2.cvtColor(resImg, cv2.COLOR_BGR2GRAY) + img2gray = resize(img2gray,512) + res = cv2.Laplacian(img2gray, cv2.CV_64F) + score = res.var() + return score + +def replace_mosaic(img_origin,img_fake,mask,x,y,size,no_father): + img_fake = cv2.resize(img_fake,(size*2,size*2),interpolation=cv2.INTER_LANCZOS4) if no_father: img_origin[y-size:y+size,x-size:x+size]=img_fake img_result = img_origin @@ -212,13 +263,20 @@ def replace_mosaic(img_origin,img_fake,x,y,size,no_father): #eclosion eclosion_num = int(size/5) entad = int(eclosion_num/2+2) - mask = np.zeros(img_origin.shape, dtype='uint8') - mask = cv2.rectangle(mask,(x-size+entad,y-size+entad),(x+size-entad,y+size-entad),(255,255,255),-1) + + # mask = np.zeros(img_origin.shape, dtype='uint8') + # mask = cv2.rectangle(mask,(x-size+entad,y-size+entad),(x+size-entad,y+size-entad),(255,255,255),-1) + mask = cv2.resize(mask,(img_origin.shape[1],img_origin.shape[0])) + mask = ch_one2three(mask) + mask = (cv2.blur(mask, (eclosion_num, eclosion_num))) - mask = mask/255.0 + mask_tmp = np.zeros_like(mask) + mask_tmp[y-size:y+size,x-size:x+size] = mask[y-size:y+size,x-size:x+size]# Fix edge overflow + mask = mask_tmp/255.0 img_tmp = np.zeros(img_origin.shape) img_tmp[y-size:y+size,x-size:x+size]=img_fake img_result = img_origin.copy() img_result = (img_origin*(1-mask)+img_tmp*mask).astype('uint8') + return img_result \ No newline at end of file diff --git a/util/mosaic.py b/util/mosaic.py index fbeed38..936227b 100755 --- a/util/mosaic.py +++ b/util/mosaic.py @@ -10,10 +10,19 @@ def addmosaic(img,mask,opt): elif opt.mosaic_size == 0: img = addmosaic_autosize(img, mask, opt.mosaic_mod) else: - img = addmosaic_normal(img,mask,opt.mosaic_size,opt.output_size,model = opt.mosaic_mod) + img = addmosaic_base(img,mask,opt.mosaic_size,opt.output_size,model = opt.mosaic_mod) return img -def addmosaic_normal(img,mask,n,out_size = 0,model = 'squa_avg',rect_rat = 1.6): +def addmosaic_base(img,mask,n,out_size = 0,model = 'squa_avg',rect_rat = 1.6,father=0): + ''' + img: input image + mask: input mask + n: mosaic size + out_size: output size 0->original + model : squa_avg squa_mid squa_random squa_avg_circle_edge rect_avg + rect_rat: if model==rect_avg , mosaic w/h=rect_rat + father : father size, -1->no 0->auto + ''' n = int(n) if out_size: img = resize(img,out_size) @@ -44,9 +53,9 @@ def addmosaic_normal(img,mask,n,out_size = 0,model = 'squa_avg',rect_rat = 1.6): for j in range(int(w/n)): img_mosaic[i*n:(i+1)*n,j*n:(j+1)*n,:]=img[i*n:(i+1)*n,j*n:(j+1)*n,:].mean(0).mean(0) mask = cv2.threshold(mask,127,255,cv2.THRESH_BINARY)[1] - mask = ch_one2three(mask) - mask_inv = cv2.bitwise_not(mask) - imgroi1 = cv2.bitwise_and(mask,img_mosaic) + _mask = ch_one2three(mask) + mask_inv = cv2.bitwise_not(_mask) + imgroi1 = cv2.bitwise_and(_mask,img_mosaic) imgroi2 = cv2.bitwise_and(mask_inv,img) img_mosaic = cv2.add(imgroi1,imgroi2) @@ -58,12 +67,21 @@ def addmosaic_normal(img,mask,n,out_size = 0,model = 'squa_avg',rect_rat = 1.6): if mask[int(i*n_h+n_h/2),int(j*n_w+n_w/2)] == 255: img_mosaic[i*n_h:(i+1)*n_h,j*n_w:(j+1)*n_w,:]=img[i*n_h:(i+1)*n_h,j*n_w:(j+1)*n_w,:].mean(0).mean(0) + if father != -1: + if father==0: + mask = (cv2.blur(mask, (n, n))) + else: + mask = (cv2.blur(mask, (father, father))) + mask = ch_one2three(mask)/255.0 + img_mosaic = (img*(1-mask)+img_mosaic*mask).astype('uint8') + return img_mosaic def get_autosize(img,mask,area_type = 'normal'): h,w = img.shape[:2] - mask = cv2.resize(mask,(w,h)) - alpha = np.min((w,h))/512 + size = np.min([h,w]) + mask = resize(mask,size) + alpha = size/512 try: if area_type == 'normal': area = mask_area(mask) @@ -85,66 +103,32 @@ def get_autosize(img,mask,area_type = 'normal'): pass return size -def addmosaic_autosize(img,mask,model,area_type = 'normal'): - h,w = img.shape[:2] - mask = cv2.resize(mask,(w,h)) - alpha = np.min((w,h))/512 - try: - if area_type == 'normal': - area = mask_area(mask) - elif area_type == 'bounding': - w,h = cv2.boundingRect(mask)[2:] - area = w*h - except: - area = 0 - area = area/(alpha*alpha) - if area>50000: - img_mosaic = addmosaic_normal(img,mask,alpha*((area-50000)/50000+12),model = model) - elif 2000050000: - img_mosaic = random_mod(img,mask,alpha*random.uniform(8,30)) #16,30 - elif 20000