diff --git a/.gitignore b/.gitignore
index c1a7e93d334778204b805743b7469df3d456667a..1635c5630c085ff819456c2f2a648852e5df5978 100644
--- a/.gitignore
+++ b/.gitignore
@@ -154,6 +154,7 @@ result/
/pretrained_models_old
/deepmosaic_window
/sftp-config.json
+/exe
#./make_datasets
/make_datasets/video
/make_datasets/tmp
diff --git a/README.md b/README.md
index 601f8380ee1b8fd7794292fc25a5324e65f01e3b..2b5aed7bed8bd9c0530ef54aded79300de3be336 100755
--- a/README.md
+++ b/README.md
@@ -6,25 +6,19 @@ This porject based on "semantic segmentation" and "Image-to-Image Translation".<
* [中文版README](./README_CN.md)
### More example
-
origin | auto add mosaic | auto clean mosaic
:-:|:-:|:-:
![image](./imgs/example/lena.jpg) | ![image](./imgs/example/lena_add.jpg) | ![image](./imgs/example/lena_clean.jpg)
![image](./imgs/example/youknow.png) | ![image](./imgs/example/youknow_add.png) | ![image](./imgs/example/youknow_clean.png)
-
* Compared with [DeepCreamPy](https://github.com/deeppomf/DeepCreamPy)
-
mosaic image | DeepCreamPy | ours
:-:|:-:|:-:
![image](./imgs/example/face_a_mosaic.jpg) | ![image](./imgs/example/a_dcp.png) | ![image](./imgs/example/face_a_clean.jpg)
![image](./imgs/example/face_b_mosaic.jpg) | ![image](./imgs/example/b_dcp.png) | ![image](./imgs/example/face_b_clean.jpg)
-
* Style Transfer
-
origin | to Van Gogh | to winter
:-:|:-:|:-:
![image](./imgs/example/SZU.jpg) | ![image](./imgs/example/SZU_vangogh.jpg) | ![image](./imgs/example/SZU_summer2winter.jpg)
-
An interesting example:[Ricardo Milos to cat](https://www.bilibili.com/video/BV1Q7411W7n6)
## Run DeepMosaics
@@ -33,6 +27,7 @@ You can either run DeepMosaics via pre-built binary package or from source.
### Pre-built binary package
For windows, we bulid a GUI version for easy test.
Download this version and pre-trained model via [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ)
+
* [[How to use]](./docs/exe_help.md)
![image](./imgs/GUI.png)
@@ -64,11 +59,11 @@ You can download pre_trained models and put them into './pretrained_models'.
[[Introduction to pre-trained models]](./docs/pre-trained_models_introduction.md)
#### Simple example
-* Add Mosaic (output video will save in './result')
+* Add Mosaic (output media will save in './result')
```bash
python3 deepmosaic.py --media_path ./imgs/ruoruo.jpg --model_path ./pretrained_models/mosaic/add_face.pth --use_gpu -1
```
-* Clean Mosaic (output video will save in './result')
+* Clean Mosaic (output media will save in './result')
```bash
python3 deepmosaic.py --media_path ./result/ruoruo_add.jpg --model_path ./pretrained_models/mosaic/clean_face_HD.pth --use_gpu -1
```
@@ -76,5 +71,9 @@ python3 deepmosaic.py --media_path ./result/ruoruo_add.jpg --model_path ./pretra
If you want to test other image or video, please refer to this file.
[[options_introduction.md]](./docs/options_introduction.md)
+## Training with your own dataset
+If you want to train with your own dataset, please refer to [training_with_your_own_dataset.md](./docs/training_with_your_own_dataset.md)
+
## Acknowledgments
-This code borrows heavily from [[pytorch-CycleGAN-and-pix2pix]](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix) [[Pytorch-UNet]](https://github.com/milesial/Pytorch-UNet)[[pix2pixHD]](https://github.com/NVIDIA/pix2pixHD).
+This code borrows heavily from [[pytorch-CycleGAN-and-pix2pix]](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix) [[Pytorch-UNet]](https://github.com/milesial/Pytorch-UNet) [[pix2pixHD]](https://github.com/NVIDIA/pix2pixHD) [[BiSeNet]](https://github.com/ooooverflow/BiSeNet).
+
diff --git a/README_CN.md b/README_CN.md
index 94a2e50d2cd67fce6c5eea82123169231cc0bd88..fac53c0e2657e2cb61936e029e5f54c6bd01d02a 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -3,25 +3,19 @@
这是一个通过深度学习自动的为图片/视频添加马赛克,或消除马赛克的项目.
它基于“语义分割”以及“图像翻译”.
### 更多例子
-
原始 | 自动打码 | 自动去码
:-:|:-:|:-:
![image](./imgs/example/lena.jpg) | ![image](./imgs/example/lena_add.jpg) | ![image](./imgs/example/lena_clean.jpg)
![image](./imgs/example/youknow.png) | ![image](./imgs/example/youknow_add.png) | ![image](./imgs/example/youknow_clean.png)
-
* 与 [DeepCreamPy](https://github.com/deeppomf/DeepCreamPy)相比较
-
马赛克图片 | DeepCreamPy | ours
:-:|:-:|:-:
![image](./imgs/example/face_a_mosaic.jpg) | ![image](./imgs/example/a_dcp.png) | ![image](./imgs/example/face_a_clean.jpg)
![image](./imgs/example/face_b_mosaic.jpg) | ![image](./imgs/example/b_dcp.png) | ![image](./imgs/example/face_b_clean.jpg)
-
* 风格转换
-
原始 | 梵高风格 | 转化为冬天
:-:|:-:|:-:
![image](./imgs/example/SZU.jpg) | ![image](./imgs/example/SZU_vangogh.jpg) | ![image](./imgs/example/SZU_summer2winter.jpg)
-
一个有意思的尝试:[香蕉君♂猫](https://www.bilibili.com/video/BV1Q7411W7n6)
## 如何运行
@@ -74,5 +68,9 @@ python3 deepmosaic.py --media_path ./result/ruoruo_add.jpg --model_path ./pretra
如果想要测试其他的图片或视频,请参照以下文件输入参数.
[[options_introduction_CN.md]](./docs/options_introduction_CN.md)
+## 使用自己的数据训练模型
+如果需要使用自己的数据训练模型,请参照 [training_with_your_own_dataset.md](./docs/training_with_your_own_dataset.md)
+
## 鸣谢
-代码大量的参考了以下项目:[[pytorch-CycleGAN-and-pix2pix]](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix) [[Pytorch-UNet]](https://github.com/milesial/Pytorch-UNet)[[pix2pixHD]](https://github.com/NVIDIA/pix2pixHD).
\ No newline at end of file
+代码大量的参考了以下项目:[[pytorch-CycleGAN-and-pix2pix]](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix) [[Pytorch-UNet]](https://github.com/milesial/Pytorch-UNet) [[pix2pixHD]](https://github.com/NVIDIA/pix2pixHD) [[BiSeNet]](https://github.com/ooooverflow/BiSeNet).
+
diff --git a/cores/core.py b/cores/core.py
index 22a408a0cc6dec1663f83d4eff8ebe2e242356a8..d48ae5156681b668c7df7c9fdf480be1a5b324e9 100644
--- a/cores/core.py
+++ b/cores/core.py
@@ -38,7 +38,7 @@ def addmosaic_video(opt,netS):
positions = []
for i,imagepath in enumerate(imagepaths,1):
img = impro.imread(os.path.join('./tmp/video2image',imagepath))
- mask,x,y,area = runmodel.get_ROI_position(img,netS,opt)
+ mask,x,y,size,area = runmodel.get_ROI_position(img,netS,opt)
positions.append([x,y,area])
cv2.imwrite(os.path.join('./tmp/ROI_mask',imagepath),mask)
print('\r','Find ROI location:'+str(i)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=35),end='')
@@ -110,7 +110,7 @@ def cleanmosaic_img(opt,netG,netM):
print('Clean Mosaic:',path)
img_origin = impro.imread(path)
x,y,size,mask = runmodel.get_mosaic_position(img_origin,netM,opt)
- #cv2.imwrite('./mask/'+os.path.basename(path), mask)
+ cv2.imwrite('./mask/'+os.path.basename(path), mask)
img_result = img_origin.copy()
if size != 0 :
img_mosaic = img_origin[y-size:y+size,x-size:x+size]
@@ -118,7 +118,7 @@ def cleanmosaic_img(opt,netG,netM):
img_fake = runmodel.traditional_cleaner(img_mosaic,opt)
else:
img_fake = runmodel.run_pix2pix(img_mosaic,netG,opt)
- img_result = impro.replace_mosaic(img_origin,img_fake,x,y,size,opt.no_feather)
+ img_result = impro.replace_mosaic(img_origin,img_fake,mask,x,y,size,opt.no_feather)
else:
print('Do not find mosaic')
impro.imwrite(os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_clean.jpg'),img_result)
@@ -126,7 +126,7 @@ def cleanmosaic_img(opt,netG,netM):
def cleanmosaic_video_byframe(opt,netG,netM):
path = opt.media_path
fps,imagepaths = video_init(opt,path)[:2]
- positions = get_mosaic_positions(opt,netM,imagepaths,savemask=False)
+ positions = get_mosaic_positions(opt,netM,imagepaths,savemask=True)
# clean mosaic
for i,imagepath in enumerate(imagepaths,0):
x,y,size = positions[i][0],positions[i][1],positions[i][2]
@@ -138,7 +138,8 @@ def cleanmosaic_video_byframe(opt,netG,netM):
img_fake = runmodel.traditional_cleaner(img_mosaic,opt)
else:
img_fake = runmodel.run_pix2pix(img_mosaic,netG,opt)
- img_result = impro.replace_mosaic(img_origin,img_fake,x,y,size,opt.no_feather)
+ mask = cv2.imread(os.path.join('./tmp/mosaic_mask',imagepath),0)
+ img_result = impro.replace_mosaic(img_origin,img_fake,mask,x,y,size,opt.no_feather)
cv2.imwrite(os.path.join('./tmp/replace_mosaic',imagepath),img_result)
print('\r','Clean Mosaic:'+str(i+1)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=35),end='')
print()
@@ -178,13 +179,13 @@ def cleanmosaic_video_fusion(opt,netG,netM):
mosaic_input = np.zeros((INPUT_SIZE,INPUT_SIZE,3*N+1), dtype='uint8')
mosaic_input[:,:,0:N*3] = impro.resize(img_pool[y-size:y+size,x-size:x+size,:], INPUT_SIZE)
- mask = impro.resize(mask,np.min(img_origin.shape[:2]))[y-size:y+size,x-size:x+size]
- mosaic_input[:,:,-1] = impro.resize(mask, INPUT_SIZE)
+ mask_input = impro.resize(mask,np.min(img_origin.shape[:2]))[y-size:y+size,x-size:x+size]
+ mosaic_input[:,:,-1] = impro.resize(mask_input, INPUT_SIZE)
mosaic_input = data.im2tensor(mosaic_input,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False,is0_1 = False)
unmosaic_pred = netG(mosaic_input)
img_fake = data.tensor2im(unmosaic_pred,rgb2bgr = False ,is0_1 = False)
- img_result = impro.replace_mosaic(img_origin,img_fake,x,y,size,opt.no_feather)
+ img_result = impro.replace_mosaic(img_origin,img_fake,mask,x,y,size,opt.no_feather)
cv2.imwrite(os.path.join('./tmp/replace_mosaic',imagepath),img_result)
print('\r','Clean Mosaic:'+str(i+1)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=35),end='')
print()
diff --git a/cores/options.py b/cores/options.py
index b3cb051608ad3fcf7e2e2786e2647e7cef8f01d3..d0a4a778dc3398f83ee7b805f1d72aaaea769682 100644
--- a/cores/options.py
+++ b/cores/options.py
@@ -16,17 +16,17 @@ class Options():
self.parser.add_argument('--mode', type=str, default='auto',help='Program running mode. auto | add | clean | style')
self.parser.add_argument('--model_path', type=str, default='./pretrained_models/mosaic/add_face.pth',help='pretrained model path')
self.parser.add_argument('--result_dir', type=str, default='./result',help='output media will be saved here')
- self.parser.add_argument('--tempimage_type', type=str, default='png',help='type of temp image, png | jpg, png is better but occupy more storage space')
+ self.parser.add_argument('--tempimage_type', type=str, default='jpg',help='type of temp image, png | jpg, png is better but occupy more storage space')
self.parser.add_argument('--netG', type=str, default='auto',
help='select model to use for netG(Clean mosaic and Transfer style) -> auto | unet_128 | unet_256 | resnet_9blocks | HD | video')
self.parser.add_argument('--fps', type=int, default=0,help='read and output fps, if 0-> origin')
self.parser.add_argument('--output_size', type=int, default=0,help='size of output media, if 0 -> origin')
-
+ self.parser.add_argument('--mask_threshold', type=int, default=64,help='threshold of recognize clean or add mosaic position 0~255')
+
#AddMosaic
self.parser.add_argument('--mosaic_mod', type=str, default='squa_avg',help='type of mosaic -> squa_avg | squa_random | squa_avg_circle_edge | rect_avg | random')
self.parser.add_argument('--mosaic_size', type=int, default=0,help='mosaic size,if 0 auto size')
self.parser.add_argument('--mask_extend', type=int, default=10,help='extend mosaic area')
- self.parser.add_argument('--mask_threshold', type=int, default=64,help='threshold of recognize mosaic position 0~255')
#CleanMosaic
self.parser.add_argument('--mosaic_position_model_path', type=str, default='auto',help='name of model use to find mosaic position')
diff --git a/deepmosaic.py b/deepmosaic.py
index 58a6031b86742ce458ecab61012a2c42a58fbef9..a92f9416dfe8261e822aeed9ee7151b39d44dd48 100644
--- a/deepmosaic.py
+++ b/deepmosaic.py
@@ -15,7 +15,7 @@ def main():
else:
files = [opt.media_path]
if opt.mode == 'add':
- netS = loadmodel.unet(opt)
+ netS = loadmodel.bisenet(opt,'roi')
for file in files:
opt.media_path = file
if util.is_img(file):
@@ -26,7 +26,7 @@ def main():
print('This type of file is not supported')
elif opt.mode == 'clean':
- netM = loadmodel.unet_clean(opt)
+ netM = loadmodel.bisenet(opt,'mosaic')
if opt.traditional:
netG = None
elif opt.netG == 'video':
diff --git a/docs/Release_notes.txt b/docs/Release_notes.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b62014f75e02e1def47f34eafcc912fc180ae07c
--- /dev/null
+++ b/docs/Release_notes.txt
@@ -0,0 +1,23 @@
+DeepMosaics V0.3.0
+Core program building with windows10_1703_x86_64
+ + python 3.68
+ + pyinstaller 3.5
+GUI building with C#
+For more detail, please view on github: https://github.com/HypoX64/DeepMosaics
+
+Releases History
+ V0.3.0
+ 1. Support BiSeNet(Better recognition of mosaics).
+ 2. New videoHD model.
+ 3. Better feathering method.
+ V0.2.0
+ 1. Add video model.
+ 2. Now you can input chinese path
+ 3. Support style transfer
+ 4. Support fps limit
+ V0.1.2
+ 1. Support pix2pixHD model
+ V0.1.1
+ 1. Check path, can't input illegal path
+ V0.1.0
+ 1. Initial release.
\ No newline at end of file
diff --git a/docs/exe_help.md b/docs/exe_help.md
index 3f9f2428051ebaca9209937f206491fb64e78cef..98ee6a221e8707b1a61687598571ccf6a67ae527 100644
--- a/docs/exe_help.md
+++ b/docs/exe_help.md
@@ -1,92 +1,92 @@
-## DeepMosaics.exe Instructions
-[[中文版]](./exe_help_CN.md)
-This is a GUI version compiled in Windows.
-Download this version and pre-trained model via [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ)
-Attentions:
-
- - Require Windows_x86_64, Windows10 is better.
- - Different pre-trained models are suitable for different effects.
- - Run time depends on computer performance.
- - If output video cannot be played, you can try with [potplayer](https://daumpotplayer.com/download/).
- - GUI version update slower than source.
-### How to use
-* step 1: Choose image or video.
-* step 2: Choose model(Different pre-trained models are suitable for different effects)
-* step3: Run program and wait.
-* step4: Cheek reult in './result'.
-
-### Introduction to pre-trained models
-* Mosaic
-
-| Name | Description |
-| :------------------------------: | :---------------------------------------------------------: |
-| add_face.pth | Add mosaic to all faces in images/videos. |
-| clean_face_HD.pth | Clean mosaic to all faces in images/video.
(RAM > 8GB). |
-| add_youknow.pth | Add mosaic to all (FBI Warning) in images/videos. |
-| clean_youknow_resnet_9blocks.pth | Clean mosaic to all (FBI Warning) in images/videos. |
-| clean_youknow_video.pth | Clean mosaic to all (FBI Warning) in videos. |
-| clean_youknow_video_HD.pth | Clean mosaic to all (FBI Warning) in videos.
(RAM > 8GB) |
-
-* Style Transfer
-
-| Name | Description |
-| :---------------------: | :-------------------------------------------------------: |
-| style_apple2orange.pth | Convert apples to oranges. |
-| style_orange2apple.pth | Convert oranges to apples |
-| style_summer2winter.pth | Convert summer to winter. |
-| style_winter2summer.pth | Convert winter to summer. |
-| style_cezanne.pth | Convert photos/video to Paul Cézanne style. |
-| style_monet.pth | Convert photos/video to Claude Monet style. |
-| style_ukiyoe.pth | Convert photos/video to Ukiyoe style. |
-| style_vangogh.pth | Convert photos/video to Van Gogh style. |
-### Annotation
-![image](../imgs/GUI_Instructions.jpg)
-* 1. Choose image or video.
-* 2. Choose model(Different pre-trained models are suitable for different effects).
-* 3. Program running mode. (auto | add | clean | style)
-* 4. Use GPU to run deep learning model. (The current version does not support gpu, if you need to use gpu please run source).
-* 5. Limit the fps of the output video(0->original fps).
-* 6. More options.
-* 7. More options can be input.
-* 8. Run program.
-* 9. Open help file.
-* 10. Sponsor our project.
-* 11. Version information.
-* 12. Open the URL on github.
-
-### Introduction to options
-If you need more effects, use '--option your-parameters' to enter what you need.
-* Base
-
-| Option | Description | Default |
-| :----------: | :----------------------------------------: | :-------------------------------------: |
-| --use_gpu | if -1, do not use gpu | 0 |
-| --media_path | your videos or images path | ./imgs/ruoruo.jpg |
-| --mode | program running mode(auto/clean/add/style) | 'auto' |
-| --model_path | pretrained model path | ./pretrained_models/mosaic/add_face.pth |
-| --result_dir | output media will be saved here | ./result |
-| --fps | read and output fps, if 0-> origin | 0 |
-
-* AddMosaic
-
-| Option | Description | Default |
-| :--------------: | :----------------------------------------------------------: | :------: |
-| --mosaic_mod | type of mosaic -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random | squa_avg |
-| --mosaic_size | mosaic size,if 0 -> auto size | 0 |
-| --mask_extend | extend mosaic area | 10 |
-| --mask_threshold | threshold of recognize mosaic position 0~255 | 64 |
-
-* CleanMosaic
-
-| Option | Description | Default |
-| :-----------: | :----------------------------------------------------------: | :-----: |
-| --traditional | if specified, use traditional image processing methods to clean mosaic | |
-| --tr_blur | ksize of blur when using traditional method, it will affect final quality | 10 |
-| --tr_down | downsample when using traditional method,it will affect final quality | 10 |
-| --medfilt_num | medfilt window of mosaic movement in the video | 11 |
-
-* Style Transfer
-
-| Option | Description | Default |
-| :-----------: | :----------------------------------: | :-----: |
+## DeepMosaics.exe Instructions
+[[中文版]](./exe_help_CN.md)
+This is a GUI version compiled in Windows.
+Download this version and pre-trained model via [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ)
+Attentions:
+
+ - Require Windows_x86_64, Windows10 is better.
+ - Different pre-trained models are suitable for different effects.
+ - Run time depends on computer performance.
+ - If output video cannot be played, you can try with [potplayer](https://daumpotplayer.com/download/).
+ - GUI version update slower than source.
+### How to use
+* step 1: Choose image or video.
+* step 2: Choose model(Different pre-trained models are suitable for different effects)
+* step3: Run program and wait.
+* step4: Cheek reult in './result'.
+
+### Introduction to pre-trained models
+* Mosaic
+
+| Name | Description |
+| :------------------------------: | :---------------------------------------------------------: |
+| add_face.pth | Add mosaic to all faces in images/videos. |
+| clean_face_HD.pth | Clean mosaic to all faces in images/video.
(RAM > 8GB). |
+| add_youknow.pth | Add mosaic to all (FBI Warning) in images/videos. |
+| clean_youknow_resnet_9blocks.pth | Clean mosaic to all (FBI Warning) in images/videos. |
+| clean_youknow_video.pth | Clean mosaic to all (FBI Warning) in videos. |
+| clean_youknow_video_HD.pth | Clean mosaic to all (FBI Warning) in videos.
(RAM > 8GB) |
+
+* Style Transfer
+
+| Name | Description |
+| :---------------------: | :-------------------------------------------------------: |
+| style_apple2orange.pth | Convert apples to oranges. |
+| style_orange2apple.pth | Convert oranges to apples |
+| style_summer2winter.pth | Convert summer to winter. |
+| style_winter2summer.pth | Convert winter to summer. |
+| style_cezanne.pth | Convert photos/video to Paul Cézanne style. |
+| style_monet.pth | Convert photos/video to Claude Monet style. |
+| style_ukiyoe.pth | Convert photos/video to Ukiyoe style. |
+| style_vangogh.pth | Convert photos/video to Van Gogh style. |
+### Annotation
+![image](../imgs/GUI_Instructions.jpg)
+* 1. Choose image or video.
+* 2. Choose model(Different pre-trained models are suitable for different effects).
+* 3. Program running mode. (auto | add | clean | style)
+* 4. Use GPU to run deep learning model. (The current version does not support gpu, if you need to use gpu please run source).
+* 5. Limit the fps of the output video(0->original fps).
+* 6. More options.
+* 7. More options can be input.
+* 8. Run program.
+* 9. Open help file.
+* 10. Sponsor our project.
+* 11. Version information.
+* 12. Open the URL on github.
+
+### Introduction to options
+If you need more effects, use '--option your-parameters' to enter what you need.
+* Base
+
+| Option | Description | Default |
+| :----------: | :----------------------------------------: | :-------------------------------------: |
+| --use_gpu | if -1, do not use gpu | 0 |
+| --media_path | your videos or images path | ./imgs/ruoruo.jpg |
+| --mode | program running mode(auto/clean/add/style) | 'auto' |
+| --model_path | pretrained model path | ./pretrained_models/mosaic/add_face.pth |
+| --result_dir | output media will be saved here | ./result |
+| --fps | read and output fps, if 0-> origin | 0 |
+
+* AddMosaic
+
+| Option | Description | Default |
+| :--------------: | :----------------------------------------------------------: | :------: |
+| --mosaic_mod | type of mosaic -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random | squa_avg |
+| --mosaic_size | mosaic size,if 0 -> auto size | 0 |
+| --mask_extend | extend mosaic area | 10 |
+| --mask_threshold | threshold of recognize mosaic position 0~255 | 64 |
+
+* CleanMosaic
+
+| Option | Description | Default |
+| :-----------: | :----------------------------------------------------------: | :-----: |
+| --traditional | if specified, use traditional image processing methods to clean mosaic | |
+| --tr_blur | ksize of blur when using traditional method, it will affect final quality | 10 |
+| --tr_down | downsample when using traditional method,it will affect final quality | 10 |
+| --medfilt_num | medfilt window of mosaic movement in the video | 11 |
+
+* Style Transfer
+
+| Option | Description | Default |
+| :-----------: | :----------------------------------: | :-----: |
| --output_size | size of output media, if 0 -> origin | 512 |
\ No newline at end of file
diff --git a/docs/exe_help_CN.md b/docs/exe_help_CN.md
index a160c724cb9d0555b0f2dd1499ff2dc96b33c062..a38b6e55c10276f5bcfd240c093b64d3e64ad9aa 100644
--- a/docs/exe_help_CN.md
+++ b/docs/exe_help_CN.md
@@ -1,93 +1,93 @@
-## DeepMosaics.exe 使用说明
-下载程序以及预训练模型 [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ)
-注意事项:
-
- - 程序的运行要求在64位Windows操作系统,我仅在Windows10运行过,其他版本暂未经过测试
- - 请根据需求选择合适的预训练模型进行测试
- - 运行时间取决于电脑性能,对于视频文件,我们建议使用源码以及GPU运行
- - 如果输出的视频无法播放,这边建议您尝试[potplayer](https://daumpotplayer.com/download/).
- - 相比于源码,该版本的更新将会延后.
-
-### 如何使用
-* step 1: 选择需要处理的图片或视频
-* step 2: 选择预训练模型(不同的预训练模型有不同的效果)
-* step3: 运行程序并等待
-* step4: 查看结果(储存在result文件夹下)
-
-## 预训练模型说明
-当前的预训练模型分为两类——添加/移除马赛克以及风格转换.
-
-* 马赛克
-
-| 文件名 | 描述 |
-| :------------------------------: | :-------------------------------------------: |
-| add_face.pth | 对图片或视频中的脸部打码 |
-| clean_face_HD.pth | 对图片或视频中的脸部去码
(要求内存 > 8GB). |
-| add_youknow.pth | 对图片或视频中的十八禁内容打码 |
-| clean_youknow_resnet_9blocks.pth | 对图片或视频中的十八禁内容去码 |
-| clean_youknow_video.pth | 对视频中的十八禁内容去码 |
-| clean_youknow_video_HD.pth | 对视频中的十八禁内容去码
(要求内存 > 8GB) |
-
-* 风格转换
-
-| 文件名 | 描述 |
-| :---------------------: | :-------------------------------------------------------: |
-| style_apple2orange.pth | 苹果变橙子 |
-| style_orange2apple.pth | 橙子变苹果 |
-| style_summer2winter.pth | 夏天变冬天 |
-| style_winter2summer.pth | 冬天变夏天 |
-| style_cezanne.pth | 转化为Paul Cézanne 的绘画风格 |
-| style_monet.pth | 转化为Claude Monet的绘画风格 |
-| style_ukiyoe.pth | 转化为Ukiyoe的绘画风格 |
-| style_vangogh.pth | 转化为Van Gogh的绘画风格 |
-
-### GUI界面注释
-![image](../imgs/GUI_Instructions.jpg)
-* 1. 选择需要处理的图片或视频
-* 2. 选择预训练模型
-* 3. 程序运行模式 (auto | add | clean | style)
-* 4. 使用GPU (该版本目前不支持GPU,若需要使用GPU请使用源码运行).
-* 5. 限制输出的视频帧率(0->原始帧率).
-* 6. 更多的选项以及参数
-* 7. 自行输入更多参数,详见下文
-* 8. 运行
-* 9. 打开帮助文件
-* 10. 支持我们
-* 11. 版本信息
-* 12. 打开项目的github页面
-
-### 参数说明
-如果需要更多的效果, 请按照 '--option your-parameters' 输入所需要的参数
-* 基本
-
-| 选项 | 描述 | 默认 |
-| :----------: | :------------------------: | :-------------------------------------: |
-| --use_gpu | if -1, do not use gpu | 0 |
-| --media_path | 需要处理的视频或者照片的路径 | ./imgs/ruoruo.jpg |
-| --mode | 运行模式(auto/clean/add/style) | 'auto' |
-| --model_path | 预训练模型的路径 | ./pretrained_models/mosaic/add_face.pth |
-| --result_dir | 保存路径 | ./result |
-| --fps | 限制视频输出的fps,0则为默认 | 0 |
-* 添加马赛克
-
-| 选项 | 描述 | 默认 |
-| :----------: | :------------------------: | :-------------------------------------: |
-| --mosaic_mod | 马赛克类型 -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random | squa_avg |
-| --mosaic_size | 马赛克大小,0则为自动 | 0 |
-| --mask_extend | 拓展马赛克区域 | 10 |
-| --mask_threshold | 马赛克区域识别阈值 0~255 | 64 |
-
-* 去除马赛克
-
-| 选项 | 描述 | 默认 |
-| :----------: | :------------------------: | :-------------------------------------: |
-| --traditional | 如果输入这个参数则使用传统方法清除马赛克 | |
-| --tr_blur | 传统方法模糊尺寸 | 10 |
-| --tr_down | 传统方法下采样尺寸 | 10 |
-| --medfilt_num | medfilt window of mosaic movement in the video | 11 |
-
-* 风格转换
-
-| 选项 | 描述 | 默认 |
-| :----------: | :------------------------: | :-------------------------------------: |
+## DeepMosaics.exe 使用说明
+下载程序以及预训练模型 [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ)
+注意事项:
+
+ - 程序的运行要求在64位Windows操作系统,我仅在Windows10运行过,其他版本暂未经过测试
+ - 请根据需求选择合适的预训练模型进行测试
+ - 运行时间取决于电脑性能,对于视频文件,我们建议使用源码以及GPU运行
+ - 如果输出的视频无法播放,这边建议您尝试[potplayer](https://daumpotplayer.com/download/).
+ - 相比于源码,该版本的更新将会延后.
+
+### 如何使用
+* step 1: 选择需要处理的图片或视频
+* step 2: 选择预训练模型(不同的预训练模型有不同的效果)
+* step3: 运行程序并等待
+* step4: 查看结果(储存在result文件夹下)
+
+## 预训练模型说明
+当前的预训练模型分为两类——添加/移除马赛克以及风格转换.
+
+* 马赛克
+
+| 文件名 | 描述 |
+| :------------------------------: | :-------------------------------------------: |
+| add_face.pth | 对图片或视频中的脸部打码 |
+| clean_face_HD.pth | 对图片或视频中的脸部去码
(要求内存 > 8GB). |
+| add_youknow.pth | 对图片或视频中的十八禁内容打码 |
+| clean_youknow_resnet_9blocks.pth | 对图片或视频中的十八禁内容去码 |
+| clean_youknow_video.pth | 对视频中的十八禁内容去码 |
+| clean_youknow_video_HD.pth | 对视频中的十八禁内容去码
(要求内存 > 8GB) |
+
+* 风格转换
+
+| 文件名 | 描述 |
+| :---------------------: | :-------------------------------------------------------: |
+| style_apple2orange.pth | 苹果变橙子 |
+| style_orange2apple.pth | 橙子变苹果 |
+| style_summer2winter.pth | 夏天变冬天 |
+| style_winter2summer.pth | 冬天变夏天 |
+| style_cezanne.pth | 转化为Paul Cézanne 的绘画风格 |
+| style_monet.pth | 转化为Claude Monet的绘画风格 |
+| style_ukiyoe.pth | 转化为Ukiyoe的绘画风格 |
+| style_vangogh.pth | 转化为Van Gogh的绘画风格 |
+
+### GUI界面注释
+![image](../imgs/GUI_Instructions.jpg)
+* 1. 选择需要处理的图片或视频
+* 2. 选择预训练模型
+* 3. 程序运行模式 (auto | add | clean | style)
+* 4. 使用GPU (该版本目前不支持GPU,若需要使用GPU请使用源码运行).
+* 5. 限制输出的视频帧率(0->原始帧率).
+* 6. 更多的选项以及参数
+* 7. 自行输入更多参数,详见下文
+* 8. 运行
+* 9. 打开帮助文件
+* 10. 支持我们
+* 11. 版本信息
+* 12. 打开项目的github页面
+
+### 参数说明
+如果需要更多的效果, 请按照 '--option your-parameters' 输入所需要的参数
+* 基本
+
+| 选项 | 描述 | 默认 |
+| :----------: | :------------------------: | :-------------------------------------: |
+| --use_gpu | if -1, do not use gpu | 0 |
+| --media_path | 需要处理的视频或者照片的路径 | ./imgs/ruoruo.jpg |
+| --mode | 运行模式(auto/clean/add/style) | 'auto' |
+| --model_path | 预训练模型的路径 | ./pretrained_models/mosaic/add_face.pth |
+| --result_dir | 保存路径 | ./result |
+| --fps | 限制视频输出的fps,0则为默认 | 0 |
+* 添加马赛克
+
+| 选项 | 描述 | 默认 |
+| :----------: | :------------------------: | :-------------------------------------: |
+| --mosaic_mod | 马赛克类型 -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random | squa_avg |
+| --mosaic_size | 马赛克大小,0则为自动 | 0 |
+| --mask_extend | 拓展马赛克区域 | 10 |
+| --mask_threshold | 马赛克区域识别阈值 0~255 | 64 |
+
+* 去除马赛克
+
+| 选项 | 描述 | 默认 |
+| :----------: | :------------------------: | :-------------------------------------: |
+| --traditional | 如果输入这个参数则使用传统方法清除马赛克 | |
+| --tr_blur | 传统方法模糊尺寸 | 10 |
+| --tr_down | 传统方法下采样尺寸 | 10 |
+| --medfilt_num | medfilt window of mosaic movement in the video | 11 |
+
+* 风格转换
+
+| 选项 | 描述 | 默认 |
+| :----------: | :------------------------: | :-------------------------------------: |
| --output_size | 输出媒体的尺寸,如果是0则为原始尺寸 |512|
\ No newline at end of file
diff --git a/docs/how_to_train.md b/docs/how_to_train.md
new file mode 100644
index 0000000000000000000000000000000000000000..ec947736200d2022200b2a75faba8427c908d6cc
--- /dev/null
+++ b/docs/how_to_train.md
@@ -0,0 +1 @@
+### make datasets
diff --git a/docs/options_introduction.md b/docs/options_introduction.md
index 410d136937dc94188c8cad1de5cc0c915212110e..95ccfc059361dc2464790d59cc2d0f85c3717ea2 100644
--- a/docs/options_introduction.md
+++ b/docs/options_introduction.md
@@ -1,37 +1,37 @@
-## Introduction to options
-If you need more effects, use '--option your-parameters' to enter what you need.
-
-### Base
-
-| Option | Description | Default |
-| :----------: | :------------------------: | :-------------------------------------: |
-| --use_gpu | if -1, do not use gpu | 0 |
-| --media_path | your videos or images path | ./imgs/ruoruo.jpg |
-| --mode | program running mode(auto/clean/add/style) | 'auto' |
-| --model_path | pretrained model path | ./pretrained_models/mosaic/add_face.pth |
-| --result_dir | output media will be saved here| ./result |
-| --fps | read and output fps, if 0-> origin | 0 |
-
-### AddMosaic
-
-| Option | Description | Default |
-| :----------: | :------------------------: | :-------------------------------------: |
-| --mosaic_mod | type of mosaic -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random | squa_avg |
-| --mosaic_size | mosaic size,if 0 -> auto size | 0 |
-| --mask_extend | extend mosaic area | 10 |
-| --mask_threshold | threshold of recognize mosaic position 0~255 | 64 |
-
-### CleanMosaic
-
-| Option | Description | Default |
-| :----------: | :------------------------: | :-------------------------------------: |
-| --traditional | if specified, use traditional image processing methods to clean mosaic | |
-| --tr_blur | ksize of blur when using traditional method, it will affect final quality | 10 |
-| --tr_down | downsample when using traditional method,it will affect final quality | 10 |
-| --medfilt_num | medfilt window of mosaic movement in the video | 11 |
-
-### Style Transfer
-
-| Option | Description | Default |
-| :----------: | :------------------------: | :-------------------------------------: |
+## Introduction to options
+If you need more effects, use '--option your-parameters' to enter what you need.
+
+### Base
+
+| Option | Description | Default |
+| :----------: | :------------------------: | :-------------------------------------: |
+| --use_gpu | if -1, do not use gpu | 0 |
+| --media_path | your videos or images path | ./imgs/ruoruo.jpg |
+| --mode | program running mode(auto/clean/add/style) | 'auto' |
+| --model_path | pretrained model path | ./pretrained_models/mosaic/add_face.pth |
+| --result_dir | output media will be saved here| ./result |
+| --fps | read and output fps, if 0-> origin | 0 |
+
+### AddMosaic
+
+| Option | Description | Default |
+| :----------: | :------------------------: | :-------------------------------------: |
+| --mosaic_mod | type of mosaic -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random | squa_avg |
+| --mosaic_size | mosaic size,if 0 -> auto size | 0 |
+| --mask_extend | extend mosaic area | 10 |
+| --mask_threshold | threshold of recognize mosaic position 0~255 | 64 |
+
+### CleanMosaic
+
+| Option | Description | Default |
+| :----------: | :------------------------: | :-------------------------------------: |
+| --traditional | if specified, use traditional image processing methods to clean mosaic | |
+| --tr_blur | ksize of blur when using traditional method, it will affect final quality | 10 |
+| --tr_down | downsample when using traditional method,it will affect final quality | 10 |
+| --medfilt_num | medfilt window of mosaic movement in the video | 11 |
+
+### Style Transfer
+
+| Option | Description | Default |
+| :----------: | :------------------------: | :-------------------------------------: |
| --output_size | size of output media, if 0 -> origin |512|
\ No newline at end of file
diff --git a/docs/options_introduction_CN.md b/docs/options_introduction_CN.md
index b71e0790c21fb739d635532544bbb46cea505d45..7695740b1e77948d7c36daf53d00446b72c6c943 100644
--- a/docs/options_introduction_CN.md
+++ b/docs/options_introduction_CN.md
@@ -1,37 +1,37 @@
-## 参数说明
-如果需要更多的效果, 请按照 '--option your-parameters' 输入所需要的参数
-
-### 基本
-
-| 选项 | 描述 | 默认 |
-| :----------: | :------------------------: | :-------------------------------------: |
-| --use_gpu | if -1, do not use gpu | 0 |
-| --media_path | 需要处理的视频或者照片的路径 | ./imgs/ruoruo.jpg |
-| --mode | 运行模式(auto/clean/add/style) | 'auto' |
-| --model_path | 预训练模型的路径 | ./pretrained_models/mosaic/add_face.pth |
-| --result_dir | 保存路径 | ./result |
-| --fps | 限制视频输出的fps,0则为默认 | 0 |
-
-### 添加马赛克
-
-| 选项 | 描述 | 默认 |
-| :----------: | :------------------------: | :-------------------------------------: |
-| --mosaic_mod | 马赛克类型 -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random | squa_avg |
-| --mosaic_size | 马赛克大小,0则为自动 | 0 |
-| --mask_extend | 拓展马赛克区域 | 10 |
-| --mask_threshold | 马赛克区域识别阈值 0~255 | 64 |
-
-### 去除马赛克
-
-| 选项 | 描述 | 默认 |
-| :----------: | :------------------------: | :-------------------------------------: |
-| --traditional | 如果输入这个参数则使用传统方法清除马赛克 | |
-| --tr_blur | 传统方法模糊尺寸 | 10 |
-| --tr_down | 传统方法下采样尺寸 | 10 |
-| --medfilt_num | medfilt window of mosaic movement in the video | 11 |
-
-### 风格转换
-
-| 选项 | 描述 | 默认 |
-| :----------: | :------------------------: | :-------------------------------------: |
+## 参数说明
+如果需要更多的效果, 请按照 '--option your-parameters' 输入所需要的参数
+
+### 基本
+
+| 选项 | 描述 | 默认 |
+| :----------: | :------------------------: | :-------------------------------------: |
+| --use_gpu | if -1, do not use gpu | 0 |
+| --media_path | 需要处理的视频或者照片的路径 | ./imgs/ruoruo.jpg |
+| --mode | 运行模式(auto/clean/add/style) | 'auto' |
+| --model_path | 预训练模型的路径 | ./pretrained_models/mosaic/add_face.pth |
+| --result_dir | 保存路径 | ./result |
+| --fps | 限制视频输出的fps,0则为默认 | 0 |
+
+### 添加马赛克
+
+| 选项 | 描述 | 默认 |
+| :----------: | :------------------------: | :-------------------------------------: |
+| --mosaic_mod | 马赛克类型 -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random | squa_avg |
+| --mosaic_size | 马赛克大小,0则为自动 | 0 |
+| --mask_extend | 拓展马赛克区域 | 10 |
+| --mask_threshold | 马赛克区域识别阈值 0~255 | 64 |
+
+### 去除马赛克
+
+| 选项 | 描述 | 默认 |
+| :----------: | :------------------------: | :-------------------------------------: |
+| --traditional | 如果输入这个参数则使用传统方法清除马赛克 | |
+| --tr_blur | 传统方法模糊尺寸 | 10 |
+| --tr_down | 传统方法下采样尺寸 | 10 |
+| --medfilt_num | medfilt window of mosaic movement in the video | 11 |
+
+### 风格转换
+
+| 选项 | 描述 | 默认 |
+| :----------: | :------------------------: | :-------------------------------------: |
| --output_size | 输出媒体的尺寸,如果是0则为原始尺寸 |512|
\ No newline at end of file
diff --git a/docs/pre-trained_models_introduction.md b/docs/pre-trained_models_introduction.md
index 042857365b50c77b6c749b906591c65bdbf4955d..2c4a3f347c37b5694fbe34ab48c08d8fda57bb03 100644
--- a/docs/pre-trained_models_introduction.md
+++ b/docs/pre-trained_models_introduction.md
@@ -1,28 +1,28 @@
-## Introduction to pre-trained models
-The current pre-trained models are divided into two categories(Add/Clean mosaic and StyleTransfer).
-Download pre-trained model via [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ)
-
-### Mosaic
-
-| Name | Description |
-| :------------------------------: | :---------------------------------------------------------: |
-| add_face.pth | Add mosaic to all faces in images/videos. |
-| clean_face_HD.pth | Clean mosaic to all faces in images/video.
(RAM > 8GB). |
-| add_youknow.pth | Add mosaic to all (FBI Warning) in images/videos. |
-| clean_youknow_resnet_9blocks.pth | Clean mosaic to all (FBI Warning) in images/videos. |
-| clean_youknow_video.pth | Clean mosaic to all (FBI Warning) in videos. |
-| clean_youknow_video_HD.pth | Clean mosaic to all (FBI Warning) in videos.
(RAM > 8GB) |
-
-### Style Transfer
-
-| Name | Description |
-| :---------------------: | :-------------------------------------------------------: |
-| style_apple2orange.pth | Convert apples to oranges. |
-| style_orange2apple.pth | Convert oranges to apples |
-| style_summer2winter.pth | Convert summer to winter. |
-| style_winter2summer.pth | Convert winter to summer. |
-| style_cezanne.pth | Convert photos/video to Paul Cézanne style. |
-| style_monet.pth | Convert photos/video to Claude Monet style. |
-| style_ukiyoe.pth | Convert photos/video to Ukiyoe style. |
-| style_vangogh.pth | Convert photos/video to Van Gogh style. |
-
+## Introduction to pre-trained models
+The current pre-trained models are divided into two categories(Add/Clean mosaic and StyleTransfer).
+Download pre-trained model via [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ)
+
+### Mosaic
+
+| Name | Description |
+| :------------------------------: | :-----------------------------------------------------: |
+| add_face.pth | Add mosaic to faces in images/videos. |
+| clean_face_HD.pth | Clean mosaic to faces in images/video.
(RAM > 8GB). |
+| add_youknow.pth | Add mosaic to ... in images/videos. |
+| clean_youknow_resnet_9blocks.pth | Clean mosaic to ... in images/videos. |
+| clean_youknow_video.pth | Clean mosaic to ... in videos. |
+| clean_youknow_video_HD.pth | Clean mosaic to ... in videos.
(RAM > 8GB) |
+
+### Style Transfer
+
+| Name | Description |
+| :---------------------: | :-------------------------------------------------------: |
+| style_apple2orange.pth | Convert apples to oranges. |
+| style_orange2apple.pth | Convert oranges to apples |
+| style_summer2winter.pth | Convert summer to winter. |
+| style_winter2summer.pth | Convert winter to summer. |
+| style_cezanne.pth | Convert photos/video to Paul Cézanne style. |
+| style_monet.pth | Convert photos/video to Claude Monet style. |
+| style_ukiyoe.pth | Convert photos/video to Ukiyoe style. |
+| style_vangogh.pth | Convert photos/video to Van Gogh style. |
+
diff --git a/docs/pre-trained_models_introduction_CN.md b/docs/pre-trained_models_introduction_CN.md
index fa60d1ea07464241d29fd8732ce53b398839f8e7..915639136f268071b471768b12313e47794e4a30 100644
--- a/docs/pre-trained_models_introduction_CN.md
+++ b/docs/pre-trained_models_introduction_CN.md
@@ -1,28 +1,28 @@
-## 预训练模型说明
-当前的预训练模型分为两类——添加/移除马赛克以及风格转换.
-可以通过以下方式下载预训练模型 [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ)
-
-### 马赛克
-
-| 文件名 | 描述 |
-| :------------------------------: | :-------------------------------------------: |
-| add_face.pth | 对图片或视频中的脸部打码 |
-| clean_face_HD.pth | 对图片或视频中的脸部去码
(要求内存 > 8GB). |
-| add_youknow.pth | 对图片或视频中的十八禁内容打码 |
-| clean_youknow_resnet_9blocks.pth | 对图片或视频中的十八禁内容去码 |
-| clean_youknow_video.pth | 对视频中的十八禁内容去码 |
-| clean_youknow_video_HD.pth | 对视频中的十八禁内容去码
(要求内存 > 8GB) |
-
-### 风格转换
-
-| 文件名 | 描述 |
-| :---------------------: | :-------------------------------------------------------: |
-| style_apple2orange.pth | 苹果变橙子 |
-| style_orange2apple.pth | 橙子变苹果 |
-| style_summer2winter.pth | 夏天变冬天 |
-| style_winter2summer.pth | 冬天变夏天 |
-| style_cezanne.pth | 转化为Paul Cézanne 的绘画风格 |
-| style_monet.pth | 转化为Claude Monet的绘画风格 |
-| style_ukiyoe.pth | 转化为Ukiyoe的绘画风格 |
-| style_vangogh.pth | 转化为Van Gogh的绘画风格 |
-
+## 预训练模型说明
+当前的预训练模型分为两类——添加/移除马赛克以及风格转换.
+可以通过以下方式下载预训练模型 [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ)
+
+### 马赛克
+
+| 文件名 | 描述 |
+| :------------------------------: | :-------------------------------------------: |
+| add_face.pth | 对图片或视频中的脸部打码 |
+| clean_face_HD.pth | 对图片或视频中的脸部去码
(要求内存 > 8GB). |
+| add_youknow.pth | 对图片或视频中的...内容打码 |
+| clean_youknow_resnet_9blocks.pth | 对图片或视频中的...内容去码 |
+| clean_youknow_video.pth | 对视频中的...内容去码 |
+| clean_youknow_video_HD.pth | 对视频中的...内容去码
(要求内存 > 8GB) |
+
+### 风格转换
+
+| 文件名 | 描述 |
+| :---------------------: | :-------------------------------------------------------: |
+| style_apple2orange.pth | 苹果变橙子 |
+| style_orange2apple.pth | 橙子变苹果 |
+| style_summer2winter.pth | 夏天变冬天 |
+| style_winter2summer.pth | 冬天变夏天 |
+| style_cezanne.pth | 转化为Paul Cézanne 的绘画风格 |
+| style_monet.pth | 转化为Claude Monet的绘画风格 |
+| style_ukiyoe.pth | 转化为Ukiyoe的绘画风格 |
+| style_vangogh.pth | 转化为Van Gogh的绘画风格 |
+
diff --git a/docs/training_with_your_own_dataset.md b/docs/training_with_your_own_dataset.md
new file mode 100644
index 0000000000000000000000000000000000000000..cc052896d90fd767b82bbcc13954a8178a510fd0
--- /dev/null
+++ b/docs/training_with_your_own_dataset.md
@@ -0,0 +1,73 @@
+# Training with your own dataset
+Training with your own dataset requires a GPU with 6G memory (above GTX1060).
+We will make "face" as an example. If you don't have any picture, you can download [CelebA](http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html) or [WIDER](http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/WiderFace_Results.html).
+
+## Getting Started
+#### Prerequisites
+ - Linux, Mac OS, Windows
+ - Python 3.6+
+ - [ffmpeg 3.4.6](http://ffmpeg.org/)
+ - [Pytorch 1.0+](https://pytorch.org/)
+ - NVIDIA GPU(with more than 6G memory) + CUDA CuDNN
+#### Dependencies
+This code depends on opencv-python, torchvision, matplotlib available via pip install.
+#### Clone this repo
+```bash
+git clone https://github.com/HypoX64/DeepMosaics
+cd DeepMosaics
+```
+## Make training datasets
+```bash
+cd make_datasets
+```
+### Add mosaic dataset
+Please generate mask from images which you want to add mosaic(number of images should be above 1000). And then put the images in ```face/origin_image```, and masks in ```face/mask```.
+* You can use ```draw_mask.py```to generate them.
+```bash
+python draw_mask.py --datadir 'dir for your pictures' --savedir ../datasets/draw/face
+#Press the left mouse button to draw the mask . Press 'S' to save mask, 'A' to reduce brush size, 'D' to increase brush size, 'W' to cancel drawing.
+```
+* If you want to get images from videos, you can use ```get_image_from_video.py```
+```bash
+python get_image_from_video.py --datadir 'dir for your videos' --savedir ../datasets/video2image --fps 1
+```
+### Clean mosaic dataset
+We provide several methods for generating clean mosaic datasets. However, for better effect, we recommend train a addmosaic model in a small data first and use it to automatically generate datasets in a big data.(recommend: Method 2(for image) & Method 4(for video))
+* Method 1: Use drawn mask to make pix2pix(HD) datasets(Require``` origin_image``` and ```mask```)
+```bash
+python make_pix2pix_dataset.py --datadir ../datasets/draw/face --hd --outsize 512 --fold 1 --name face --savedir ../datasets/pix2pix/face --mod drawn --minsize 128 --square
+```
+* Method 2: Use addmosaic model to make pix2pix(HD) datasets(Require addmosaic pre-trained model)
+```bash
+python make_pix2pix_dataset.py --datadir 'dir for your pictures' --hd --outsize 512 --fold 1 --name face --savedir ../datasets/pix2pix/face --mod network --model_path ../pretrained_models/mosaic/add_face.pth --minsize 128 --square --mask_threshold 128
+```
+* Method 3: Use Irregular Masks to make pix2pix(HD) datasets(Require [Irregular Masks](https://nv-adlr.github.io/publication/partialconv-inpainting))
+```bash
+python make_pix2pix_dataset.py --datadir 'dir for your pictures' --hd --outsize 512 --fold 1 --name face --savedir ../datasets/pix2pix/face --mod irregular --irrholedir ../datasets/Irregular_Holes_mask --square
+```
+* Method 4: Use addmosaic model to make video datasets(Require addmosaic pre-trained model. This is better for processing video mosaics)
+```bash
+python make_video_dataset.py --datadir 'dir for your videos' --model_path ../pretrained_models/mosaic/add_face.pth --mask_threshold 96 --savedir ../datasets/video/face
+```
+## Training
+### Add
+```bash
+cd train/add
+python train.py --gpu_id 0 --dataset ../../datasets/draw/face --savename face --loadsize 512 --finesize 360 --batchsize 16
+```
+### Clean
+* For image datasets(generated by ```make_pix2pix_dataset.py```)
+We use [pix2pix](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix) or [pix2pixHD](https://github.com/NVIDIA/pix2pixHD) to train model. We just take pix2pixHD as an example.
+```bash
+git clone https://github.com/NVIDIA/pix2pixHD
+cd pix2pixHD
+pip install dominate
+python train.py --name face --resize_or_crop resize_and_crop --loadSize 563 --fineSize 512 --label_nc 0 --no_instance --dataroot ../datasets/pix2pix/face
+```
+* For video datasets(generated by ```make_video_dataset.py```)
+```bash
+cd train/clean
+python train.py --dataset ../../datasets/video/face --savename face --savefreq 100000 --gan --hd --lr 0.0002 --lambda_gan 1 --gpu_id 0 --perload_num 8
+```
+## Testing
+Put saved network to ```./pretrained_models/mosaic/``` and rename it as ```add_face.pth``` or ```clean_face_HD.pth``` or ```clean_face_video_HD.pth```
diff --git a/make_datasets/csv/video_used_time.csv b/make_datasets/csv/video_used_time.csv
deleted file mode 100644
index 3250579f7b2c815a08548f3f8967a6efadb2dfc9..0000000000000000000000000000000000000000
--- a/make_datasets/csv/video_used_time.csv
+++ /dev/null
@@ -1,40 +0,0 @@
-010412_249-1pon-whole1_hd.avi,00:12:00,00:13:33,00:14:26,00:15:06,00:19:35,00:24:30,00:25:53,00:29:29,00:29:55,00:30:30,00:31:43,00:32:54,00:33:39,00:35:55,00:38:30,00:38:49,00:39:47,00:41:15,00:42:35,00:43:15,00:43:50,00:45:30,00:46:33,00:47:35,00:49:10,00:49:20,00:51:04,00:51:20,00:53:10,00:55:05
-011013_511-1pon-whole1_hd.avi,00:16:09,00:16:43,00:19:12,00:19:54,00:24:52,00:26:23,00:29:20,00:31:40,00:32:16,00:36:45,00:37:15,00:37:35,00:38:00,00:38:40,00:41:40,00:46:09,00:57:50,00:58:10
-012514_744-1pon-whole1_hd.mp4,00:08:12,00:12:00,00:12:30,00:17:40,00:19:35,00:20:50,00:21:50,00:24:35,00:29:10,00:30:25,00:33:10,00:39:35,00:40:35,00:42:25,00:42:35,00:57:05,00:58:25,00:59:15
-020916_242-1pon-1080p.mp4,00:13:35,00:15:10,00:18:20,00:26:50,00:31:25,00:33:15,00:34:55,00:37:15,00:38:25,00:39:35,00:41:05,00:41:55,00:42:10,00:43:10,00:43:20,00:45:15,00:45:20,00:46:10,00:47:50,00:49:10,00:50:00,00:50:20,00:52:10,00:56:55,00:57:05,00:57:35,00:59:15,00:59:30
-031516_262-1pon-1080p.mp4,00:09:30,00:13:00,00:13:50,00:14:50,00:16:00,00:20:55,00:25:50,00:26:35,00:30:30,00:32:40,00:38:20,00:38:30,00:39:55,00:42:10,00:43:45,00:45:40,00:46:20,00:47:50,00:48:05,00:49:50,00:51:45,00:51:50,00:57:00
-031716_001-1pon-1080p.mp4,00:02:30,00:02:40,00:02:55,00:04:00,00:04:20,00:05:40,00:06:05,00:06:50,00:08:10,00:08:20,00:08:30,00:08:47,00:10:00,00:10:05,00:10:20,00:10:30,00:11:50,00:12:00,00:12:35,00:13:20,00:14:20,00:15:35
-032113_554-1pon-whole1_hd.avi,00:13:20,00:21:20,00:23:15,00:23:35,00:24:00,00:25:10,0:25:30,00:25:50,00:26:35,00:26:50,00:31:40,00:35:15,00:35:25,00:37:10,00:45:35,00:46:05,00:48:00,00:49:50,00:50:30,00:51:50,00:52:30,00:52:40,00:52:50,00:58:20,00:58:30,00:59:30,00:59:45,01:01:45,01:02:00,01:03:50,01:04:05,01:04:20,01:04:30,01:05:35,01:07:40
-032313_556-1pon-whole1_hd.avi,00:04:05,00:05:00,00:06:40,00:06:50,00:07:50,00:09:10,00:10:30,00:13:15,00:16:05,00:17:35,00:18:20,00:20:25,00:20:30,00:22:30,00:26:50,00:27:30,00:35:30,00:42:40,00:44:09,00:50:00,00:52:50,00:53:40,00:54:15,00:58:00,00:58:25,01:04:05,01:05:05,01:06:15,01:06:50,01:07:51,01:08:10
-032715_001-1pon-1080p.mp4,00:09:20,00:10:35,00:10:45,00:13:25,00:21:20,00:24:50,00:28:10,00:29:26,00:29:52,00:30:55,00:31:10,00:31:55,00:32:20,00:32:40,00:33:10,00:34:30,00:35:40,00:35:50,00:48:30,00:48:50,00:49:45,00:50:15,00:53:55,00:57:13,00:57:20,00:59:00,00:59:55
-032715_004-1pon-1080p.mp4,00:22:30,00:22:55,00:24:44,00:26:15,00:28:00,00:28:40,00:30:40,00:35:40,00:38:20,00:38:50,00:39:50,00:41:30,00:42:10,00:42:30,00:43:40,00:44:05,00:44:35,00:45:17,00:45:36,00:46:23,00:46:55,00:47:20,00:47:40,00:48:05,00:48:30,00:50:50,00:52:00,00:53:30,00:53:45,00:54:25,00:54:45,00:57:40,00:58:00,00:58:40,00:58:50
-040111_063-1pon-whole1_hd.avi,00:08:25,00:08:45,00:09:00,00:10:55,00:16:40,00:17:05,00:17:35,00:19:10,00:27:00,00:28:05,00:29:05,00:31:40,00:36:00,00:37:50,00:45:30,00:46:15,00:47:45,00:50:15,00:52:50,00:53:47,00:53:58,00:55:05,00:56:15,00:58:40,00:59:00,00:59:20,00:59:45
-040814_786-1pon-whole1_hd.avi,00:04:40,00:05:00,00:06:50,00:10:20,00:21:00,00:23:35,00:24:10,00:26:40,00:28:35,00:29:15,00:29:20,00:31:15,00:32:50,00:36:10,00:39:40,00:42:00,00:42:50,00:44:00,00:44:15,00:44:36,00:45:00,00:45:20,00:47:20,00:48:10,00:48:30,00:53:50,00:54:43,00:55:20,00:59:15,00:59:30
-050915_077-1pon-1080p,00:11:00,00:12:30,00:19:20,00:19:50,00:21:00,00:22:00,00:23:40,00:24:30,00:28:20,00:33:50,00:36:00,00:37:30,00:38:50,00:39:30,00:41:50,00:44:20,00:48:45,00:49:25,00:50:45,00:51:00,00:53:05,00:54:00,00:54:27,00:57:30,00:59:10,01:00:30,01:04:10,01:04:20,01:04:30,01:04:50,01:05:20
-052215_084-1pon-1080p.mp4,00:26:50,00:27:15,00:30:20,00:33:20,00:34:00,00:37:00,00:41:00,00:43:00,00:44:30,00:47:40,00:50:35,00:50:40,00:51:40,00:55:20,00:55:50,00:55:55,00:56:20,00:57:30,00:57:40,00:59:10,00:59:15,01:00:05
-062015_101-1pon-1080p.mp4,00:11:00,00:12:47,00:13:10,00:14:20,00:15:20,00:16:20,00:17:10,00:17:25,00:19:45,00:21:05,00:23:40,00:27:40,00:28:10,00:37:15,00:41:30,00:43:20,00:44:25,00:46:51,00:47:20,00:49:00,00:50:40,00:51:50,00:52:50,00:55:00,00:56:20,00:58:10,00:59:00,10:00:00
-062715_105-1pon-1080p.mp4,00:11:30,00:11:55,00:12:00,00:12:30,00:13:45,00:16:50,00:18:25,00:19:20,00:20:40,00:25:15,00:36:20,00:36:40,00:37:25,00:39:05,00:39:50,00:40:55,00:41:55,00:45:40,00:43:30,00:44:15,00:45:30,00:47:40,00:50:05,00:50:10,00:50:20,00:50:30,00:55:10,00:56:35,00:58:40,01:00:15,01:05:05,01:05:15,01:05:30,01:05:50
-1pondo_070315_108_1080p.mp4,00:11:10,00:11:50,00:13:50,00:14:20,00:14:35,00:15:50,00:17:20,00:18:35,00:20:45,00:24:35,00:25:05,00:29:15,00:30:40,00:31:55,00:35:20,00:42:55,00:43:05,00:46:15,00:48:00,00:51:45,00:52:33,00:54:20,00:59:25,00:59:40,01:00:05
-071114_842-1pon-whole1_hd.mp4,00:09:50,00:11:25,00:16:35,00:18:20,00:22:10,00:25:25,00:26:35,00:33:50,00:35:40,00:43:10
-071715_116-1pon-1080p.mp4,00:10:50,00:11:30,00:12:50,00:15:10,00:16:45,00:17:05,00:25:20,00:26:45,00:28:30,00:30:20,00:32:55,00:34:30,00:37:40,00:38:40,00:40:20,00:41:20,00:44:10,00:47:15,00:55:00,00:59:40,00:59:50
-071815_117-1pon-1080p.mp4,00:14:50,00:15:10,00:18:05,00:14:50,00:25:55,00:26:25,00:32:45,00:33:40,00:43:15,00:45:05,00:45:45,00:48:40,00:48:50,00:55:45,10:00:20,01:00:35,01:01:00,01:01:10
-080815_130-1pon-1080p,00:14:50,00:17:15,00:17:20,00:23:55,00:25:30,00:25:55,00:28:20,00:28:30,00:30:10,00:31:00,00:33:25,00:33:35,00:33:45,00:33:50,00:39:25,00:39:50,00:40:25,00:44:05,00:45:00,00:45:40,00:45:50,00:46:55,00:49:15,00:49:25,00:46:40,00:50:10,00:50:15,00:51:25,00:51:50,00:53:14,00:53:20,00:54:15,00:56:15,00:56:25,00:56:45,00:57:45,00:57:30,00:58:00,00:56:45,00:56:55,01:00:00,01:00:05,01:00:25,01:00:30
-081514_863-1pon-whole1_hd.avi,00:10:30,00:26:00,00:30:00,00:38:21,00:40:15,00:40:30,00:49:10,00:50:05,00:57:10,00:59:00
-090614_877-1pon-whole1_hd.mp4,00:04:45,00:05:15,00:12:25,00:12:40,00:15:00,00:15:15,00:16:25,00:20:50,00:21:45,00:26:10,00:33:35,00:35:55,00:37:50,00:37:55,00:38:12,00:39:55,00:41:50,00:44:27,00:44:37,00:46:30,00:47:35,00:47:40,00:48:20,00:59:50
-091215_152-1pon-1080p.mp4,00:05:30,00:06:10,00:06:20,00:08:15,00:10:10,00:11:15,00:12:15,00:12:55,0:15:15,00:15:35,00:18:00,00:24:45,00:25:45,00:33:45,00:35:32,00:37:35,00:37:55,00:38:50,00:42:15,00:45:00,00:47:55,00:48:20,00:48:35,00:48:42,00:49:43,00:50:15,00:51:10,00:55:35,00:57:00,00:57:55,01:03:30,01:05:00
-092813_670-1pon-whole1_hd.avi,00:16:32,00:19:00,00:22:10,00:23:20,00:23:40,00:30:20,00:32:00,00:35:00,00:36:50,00:41:40,00:44:50,00:52:45,00:54:00
-103015_180-1pon-1080p.mp4,00:24:50,00:31:25,00:41:20,00:48:10,00:48:50,00:49:20,00:50:15,00:52:45,00:53:30,01:02:40,01:03:35,01:09:50,01:15:05,01:16:50
-110615_185-1pon-1080p.mp4,00:15:00,00:15:40,00:34:15,00:34:50,00:35:30,00:37:05,00:39:35,00:40:30,00:41:40,00:47:35,00:50:15,00:51:01,00:51:35,00:54:15,00:55:40,00:55:50,00:57:20,00:59:35,01:00:00,01:00:25
-120310_979-1pon-whole1_hd.avi,00:15:10,00:14:25,00:14:30,00:14:50,00:15:45,00:16:35,00:16:55,00:17:25,00:19:25,00:20:45,00:27:05,00:30:17,00:32:00,00:33:50,00:35:45,00:38:55,00:40:25,00:40:40,00:41:10,00:42:50,00:44:35,00:45:15,00:46:15,00:48:00,00:49:10,00:50:10,00:54:00,00:55:23,00:55:30,00:55:50
-021315-806-carib-1080p.mp4,00:13:30,00:15:20,00:17:40,00:21:50,00:22:25,00:24:35,00:28:50,00:28:52,00:31:00,00:37:25,00:37:35,00:38:20,00:38:45,00:43:30,00:48:35,00:51:30,00:51:50,00:52:19,00:56:20,00:58:35
-021715-809-carib-1080p.mp4,00:17:30,00:20:35,00:21:00,00:22:00,00:23:55,00:24:15,00:28:40,00:37:20,00:39:05,00:40:05,00:40:50,00:42:45,00:45:00,00:46:40,00:48:00,00:48:20,00:51:30,00:52:10,00:53:35,00:54:10,00:54:20,00:56:45,00:56:55,00:59:10,00:59:35,00:59:55
-022715-817-carib-1080p.mp4,00:57:52,00:08:50,00:10:00,00:12:50,00:14:05,00:18:25,00:20:45,00:20:57,00:22:15,00:23:30,00:23:55,00:24:18,00:24:50,00:25:25,00:26:30,00:26:55,00:28:50,00:31:55,00:34:00,00:34:35,00:42:45,00:44:33
-030914-558-carib-high_1.mp4,00:10:45,00:12:45,00:14:40,00:16:33,00:19:40,00:21:35,00:21:55,00:23:05,00:26:15,00:27:30,00:29:55,00:31:10,00:31:40,00:36:40,00:41:40,00:42:40,00:44:50,00:49:50,00:52:25,00:53:50,00:54:30,00:55:20,00:55:10,00:57:05,00:57:25,00:59:05,01:00:15,01:02:11,01:03:55,01:05:10
-031815-830-carib-1080p.mp4,00:13:15,00:13:25,00:13:55,00:14:40,00:15:40,00:17:30,00:18:20,00:19:10,00:21:00,00:22:10,00:22:25,00:23:25,00:27:10,00:28:33,00:35:05,00:35:40,00:37:50,00:38:00,00:39:35,00:41:35,00:42:40,00:47:40,00:50:33,00:55:50,01:02:10,01:05:20,01:05:30
-032016-121-carib-1080p.mp4,00:27:20,00:28:40,00:28:55,00:30:35,00:36:10,00:39:10,00:40:30,00:43:00,00:46:05,00:50:00,00:56:05,00:56:20,00:59:20
-032913-301-carib-whole_hd1.wmv,00:06:00,00:09:40,00:11:00,00:13:00,00:15:05,00:16:40,00:18:05,00:20:00,00:39:31,00:34:35,00:44:50,00:47:25,00:49:50,00:51:20,00:54:58,00:56:55,00:59:50,01:00:50
-032914-571-carib-high_1.mp4,00:13:30,00:13:55,00:16:40,00:15:25,00:20:40,00:26:45,00:32:05,00:33:15,00:36:40,00:38:55,00:39:00,00:39:25,00:47:30,00:49:20
-042514-588-carib-high_1.mp4,00:10:30,00:11:15,00:19:15,00:20:00,00:20:30,00:22:05,00:22:45,00:22:53,00:24:15,00:30:50,00:32:25,00:34:15,00:34:45,00:34:55,0:36:05,00:37:20,00:37:40,00:38:30,00:39:35,00:41:00,00:43:30,00:43:40
-052315-884-carib-1080p.mp4,00:09:35,00:14:10,00:14:30,00:14:40,00:17:10,00:17:50,00:19:00,00:20:20,01:21:55,00:22:40,00:23:05,00:24:00,00:26:00,00:27:15,00:30:25,00:32:50,00:37:55,0:39:35,00:40:10,00:41:40,00:43:15,00:43:40,00:47:55,00:49:30,00:49:55,00:58:55,01:00:40
-053114-612-carib-high_1.mp4,00:08:35,00:13:35,00:15:25,00:16:40,00:20:35,00:22:25,00:26:10,00:29:10,00:32:55,00:34:10,00:37:05,00:37:40,00:39:40,00:40:52,00:42:08,00:42:15
-062615-908-carib-1080p.mp4,00:13:45,00:14:40,00:15:45,00:16:11,00:17:00,00:22:10,00:23:40,00:26:10,00:27:15,00:27:50,00:31:30,00:35:00,00:40:20,00:43:10,00:44:35,00:47:17,00:50:25,00:51:15,00:52:20,00:54:10,00:55:30,01:00:20
\ No newline at end of file
diff --git a/make_datasets/draw_mask.py b/make_datasets/draw_mask.py
index 75a6950c73f95da84900436365d94f696f4206d7..cb66ed9f3e1c6d2191c9a54561a24cdfbff925b5 100644
--- a/make_datasets/draw_mask.py
+++ b/make_datasets/draw_mask.py
@@ -6,18 +6,25 @@ import random
import sys
sys.path.append("..")
+from cores import Options
from util import util
from util import image_processing as impro
-image_dir = './datasets_img/v2im'
-mask_dir = './datasets_img/v2im_mask'
-util.makedirs(mask_dir)
-files = os.listdir(image_dir)
-files_new =files.copy()
-print('find image:',len(files))
-masks = os.listdir(mask_dir)
-print('mask:',len(masks))
+opt = Options()
+opt.parser.add_argument('--datadir',type=str,default=' ', help='your images dir')
+opt.parser.add_argument('--savedir',type=str,default='../datasets/draw/face', help='')
+opt = opt.getparse()
+
+mask_savedir = os.path.join(opt.savedir,'mask')
+img_savedir = os.path.join(opt.savedir,'origin_image')
+util.makedirs(mask_savedir)
+util.makedirs(img_savedir)
+
+filepaths = util.Traversal(opt.datadir)
+filepaths = util.is_imgs(filepaths)
+random.shuffle(filepaths)
+print('find image:',len(filepaths))
# mouse callback function
drawing = False # true if mouse is pressed
@@ -32,68 +39,58 @@ def draw_circle(event,x,y,flags,param):
elif event == cv2.EVENT_MOUSEMOVE:
if drawing == True:
- cv2.circle(img,(x,y),brushsize,(0,255,0),-1)
+ cv2.circle(img_drawn,(x,y),brushsize,(0,255,0),-1)
elif event == cv2.EVENT_LBUTTONUP:
drawing = False
- cv2.circle(img,(x,y),brushsize,(0,255,0),-1)
+ cv2.circle(img_drawn,(x,y),brushsize,(0,255,0),-1)
-def makemask(img):
+def makemask(img_drawn):
# starttime = datetime.datetime.now()
- mask = np.zeros(img.shape, np.uint8)
- for row in range(img.shape[0]):
- for col in range(img.shape[1]):
- # if (img[row,col,:] == [0,255,0]).all(): #too slow
- if img[row,col,0] == 0:
- if img[row,col,1] == 255:
- if img[row,col,2] == 0:
+ mask = np.zeros(img_drawn.shape, np.uint8)
+ for row in range(img_drawn.shape[0]):
+ for col in range(img_drawn.shape[1]):
+ # if (img_drawn[row,col,:] == [0,255,0]).all(): #too slow
+ if img_drawn[row,col,0] == 0:
+ if img_drawn[row,col,1] == 255:
+ if img_drawn[row,col,2] == 0:
mask[row,col,:] = [255,255,255]
- # endtime = datetime.datetime.now()
- # print('Cost time:',(endtime-starttime))
return mask
-
-for i in range(len(masks)):
- masks[i]=masks[i].replace('.png','.jpg')
-for file in files:
- if file in masks:
- files_new.remove(file)
-files = files_new
-# files = list(set(files)) #Distinct
-print('remain:',len(files))
-random.shuffle(files)
-# files.sort()
cnt = 0
+for file in filepaths:
+ try:
+ cnt += 1
+ img = impro.imread(file,loadsize=512)
+ img_drawn = img.copy()
+ cv2.namedWindow('image')
+ cv2.setMouseCallback('image',draw_circle) #MouseCallback
+ while(1):
-for file in files:
- cnt += 1
- img = cv2.imread(os.path.join(image_dir,file))
- img = impro.resize(img,512)
- cv2.namedWindow('image')
- cv2.setMouseCallback('image',draw_circle) #MouseCallback
- while(1):
-
- cv2.imshow('image',img)
- k = cv2.waitKey(1) & 0xFF
- if k == ord(' '):
- img = impro.resize(img,256)
- mask = makemask(img)
- cv2.imwrite(os.path.join(mask_dir,os.path.splitext(file)[0]+'.png'),mask)
- print(os.path.join(mask_dir,os.path.splitext(file)[0]+'.png'))
- # cv2.destroyAllWindows()
- print('remain:',len(files)-cnt)
- brushsize = 20
- break
- elif k == ord('a'):
- brushsize -= 5
- if brushsize<5:
- brushsize = 5
- print('brushsize:',brushsize)
- elif k == ord('d'):
- brushsize += 5
- print('brushsize:',brushsize)
- elif k == ord('w'):
- print('remain:',len(files)-cnt)
- break
+ cv2.imshow('image',img_drawn)
+ k = cv2.waitKey(1) & 0xFF
+ if k == ord('s'):
+
+ img_drawn = impro.resize(img_drawn,256)
+ mask = makemask(img_drawn)
+ cv2.imwrite(os.path.join(mask_savedir,os.path.splitext(os.path.basename(file))[0]+'.png'),mask)
+ cv2.imwrite(os.path.join(img_savedir,os.path.basename(file)),img)
+ print('Saved:',os.path.join(mask_savedir,os.path.splitext(os.path.basename(file))[0]+'.png'),mask)
+ # cv2.destroyAllWindows()
+ print('remain:',len(filepaths)-cnt)
+ brushsize = 20
+ break
+ elif k == ord('a'):
+ brushsize -= 5
+ if brushsize<5:
+ brushsize = 5
+ print('brushsize:',brushsize)
+ elif k == ord('d'):
+ brushsize += 5
+ print('brushsize:',brushsize)
+ elif k == ord('w'):
+ print('remain:',len(filepaths)-cnt)
+ break
+ except Exception as e:
+ print(file,e)
-# cv2.destroyAllWindows()
\ No newline at end of file
diff --git a/make_datasets/get_image_from_video.py b/make_datasets/get_image_from_video.py
index fffcde5a9f9fab6def0989bf465f67c8b26b8075..b8dedbc3c61fde96d153a4b9b15325d4cdf4d5cf 100644
--- a/make_datasets/get_image_from_video.py
+++ b/make_datasets/get_image_from_video.py
@@ -1,19 +1,17 @@
import os
-import numpy as np
-import cv2
-import random
-import csv
-
import sys
sys.path.append("..")
+from cores import Options
from util import util,ffmpeg
-from util import image_processing as impro
-files = util.Traversal('./videos')
+opt = Options()
+opt.parser.add_argument('--datadir',type=str,default='', help='your video dir')
+opt.parser.add_argument('--savedir',type=str,default='../datasets/video2image', help='')
+opt = opt.getparse()
+
+files = util.Traversal(opt.datadir)
videos = util.is_videos(files)
-output_dir = './datasets_img/v2im'
-util.makedirs(output_dir)
-FPS = 1
-util.makedirs(output_dir)
+
+util.makedirs(opt.savedir)
for video in videos:
- ffmpeg.continuous_screenshot(video, output_dir, FPS)
\ No newline at end of file
+ ffmpeg.continuous_screenshot(video, opt.savedir, opt.fps)
\ No newline at end of file
diff --git a/make_datasets/make_pix2pix_dataset.py b/make_datasets/make_pix2pix_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..68c39c80772d313bc16661e30e78cf206ddaa80d
--- /dev/null
+++ b/make_datasets/make_pix2pix_dataset.py
@@ -0,0 +1,190 @@
+import os
+import random
+import sys
+import datetime
+import time
+import shutil
+import threading
+import warnings
+warnings.filterwarnings(action='ignore')
+
+import numpy as np
+import cv2
+
+sys.path.append("..")
+from models import runmodel,loadmodel
+import util.image_processing as impro
+from util import util,mosaic,data
+from cores import Options
+
+
+opt = Options()
+opt.parser.add_argument('--datadir',type=str,default='../datasets/draw/face', help='')
+opt.parser.add_argument('--savedir',type=str,default='../datasets/pix2pix/face', help='')
+opt.parser.add_argument('--name',type=str,default='', help='save name')
+opt.parser.add_argument('--mod',type=str,default='drawn', help='drawn | network | irregular | drawn,irregular | network,irregular')
+opt.parser.add_argument('--square', action='store_true', help='if specified, crop to square')
+opt.parser.add_argument('--irrholedir',type=str,default='../datasets/Irregular_Holes_mask', help='')
+opt.parser.add_argument('--hd', action='store_true', help='if false make dataset for pix2pix, if Ture for pix2pix_HD')
+opt.parser.add_argument('--savemask', action='store_true', help='if specified,save mask')
+opt.parser.add_argument('--outsize', type=int ,default= 512,help='')
+opt.parser.add_argument('--fold', type=int ,default= 1,help='')
+opt.parser.add_argument('--start', type=int ,default= 0,help='')
+opt.parser.add_argument('--minsize', type=int ,default= 128,help='when [square], minimal roi size')
+opt.parser.add_argument('--quality', type=int ,default= 40,help='when [square], minimal quality')
+
+opt = opt.getparse()
+
+util.makedirs(opt.savedir)
+util.writelog(os.path.join(opt.savedir,'opt.txt'),
+ str(time.asctime(time.localtime(time.time())))+'\n'+util.opt2str(opt))
+opt.mod = (opt.mod).split(',')
+
+#save dir
+if opt.hd:
+ train_A_path = os.path.join(opt.savedir,'train_A')
+ train_B_path = os.path.join(opt.savedir,'train_B')
+ util.makedirs(train_A_path)
+ util.makedirs(train_B_path)
+else:
+ train_path = os.path.join(opt.savedir,'train')
+ util.makedirs(train_path)
+if opt.savemask:
+ mask_save_path = os.path.join(opt.savedir,'mask')
+ util.makedirs(mask_save_path)
+
+#read dir
+if 'drawn' in opt.mod:
+ imgpaths = util.Traversal(os.path.join(opt.datadir,'origin_image'))
+ imgpaths.sort()
+ maskpaths = util.Traversal(os.path.join(opt.datadir,'mask'))
+ maskpaths.sort()
+if 'network' in opt.mod or 'irregular' in opt.mod:
+ imgpaths = util.Traversal(opt.datadir)
+ random.shuffle (imgpaths)
+if 'irregular' in opt.mod:
+ irrpaths = util.Traversal(opt.irrholedir)
+
+
+#def network
+if 'network' in opt.mod:
+ net = loadmodel.bisenet(opt,'roi')
+
+
+# def checksaveimage(opt,img,mask):
+
+# #check
+# saveflag = True
+# x,y,size,area = impro.boundingSquare(mask, random.uniform(1.4,1.6))
+# if area < 1000:
+# saveflag = False
+# else:
+# if opt.square:
+# if size < opt.minsize:
+# saveflag = False
+# else:
+# img = impro.resize(img[y-size:y+size,x-size:x+size],opt.outsize,interpolation=cv2.INTER_CUBIC)
+# mask = impro.resize(mask[y-size:y+size,x-size:x+size],opt.outsize,interpolation=cv2.INTER_CUBIC)
+# if impro.Q_lapulase(img) opt.minmaskarea and size>opt.minsize and impro.Q_lapulase(img)>opt.quality:
+ cnt +=1
+ if cnt == opt.time:
+ # print(second)
+ timestamps.append(util.second2stamp(cut_point*opt.interval))
+ util.writelog(os.path.join(opt.savedir,'opt.txt'),videopath+'\n'+str(timestamps))
+ #print(timestamps)
+
+ # util.clean_tempfiles()
+ # fps,endtime,height,width = ffmpeg.get_video_infos(videopath)
+ # # print(fps,endtime,height,width)
+ # ffmpeg.continuous_screenshot(videopath, './tmp/video2image', 1)
+
+ # # find where to cut
+ # print('Find where to cut...')
+ # timestamps=[]
+ # imagepaths = util.Traversal('./tmp/video2image')
+ # for second in range(int(endtime)):
+ # if second%opt.interval==0:
+ # cnt = 0
+ # for i in range(opt.time):
+ # img = impro.imread(imagepaths[second+i])
+ # mask = runmodel.get_ROI_position(img,net,opt)[0]
+ # if not opt.all_mosaic_area:
+ # mask = impro.find_mostlikely_ROI(mask)
+ # if impro.mask_area(mask) > opt.minmaskarea and impro.Q_lapulase(img)>opt.quality:
+ # # print(impro.mask_area(mask))
+ # cnt +=1
+ # if cnt == opt.time:
+ # # print(second)
+ # timestamps.append(util.second2stamp(second))
+
+ #generate datasets
+ print('Generate datasets...')
+ for timestamp in timestamps:
+ savecnt = '%05d' % result_cnt
+ origindir = os.path.join(opt.savedir,savecnt,'origin_image')
+ maskdir = os.path.join(opt.savedir,savecnt,'mask')
+ util.makedirs(origindir)
+ util.makedirs(maskdir)
+
+ util.clean_tempfiles()
+ ffmpeg.video2image(videopath, './tmp/video2image/%05d.'+opt.tempimage_type,
+ start_time = timestamp,last_time = util.second2stamp(opt.time))
+
+ endtime = datetime.datetime.now()
+ print(str(video_cnt)+'/'+str(len(videopaths))+' ',
+ util.get_bar(100*video_cnt/len(videopaths),35),'',
+ util.second2stamp((endtime-starttime).seconds)+'/'+util.second2stamp((endtime-starttime).seconds/video_cnt*len(videopaths)))
+
+ imagepaths = util.Traversal('./tmp/video2image')
+ imagepaths = sorted(imagepaths)
+ imgs=[];masks=[]
+ mask_flag = False
+
+ for imagepath in imagepaths:
+ img = impro.imread(imagepath)
+ mask = runmodel.get_ROI_position(img,net,opt,keepsize=True)[0]
+ imgs.append(img)
+ masks.append(mask)
+ if not mask_flag:
+ mask_avg = mask.astype(np.float64)
+ mask_flag = True
+ else:
+ mask_avg += mask.astype(np.float64)
+
+ mask_avg = np.clip(mask_avg/len(imagepaths),0,255).astype('uint8')
+ mask_avg = impro.mask_threshold(mask_avg,20,64)
+ if not opt.all_mosaic_area:
+ mask_avg = impro.find_mostlikely_ROI(mask_avg)
+ x,y,size,area = impro.boundingSquare(mask_avg,Ex_mul=random.uniform(1.1,1.5))
+
+ for i in range(len(imagepaths)):
+ img = impro.resize(imgs[i][y-size:y+size,x-size:x+size],opt.outsize,interpolation=cv2.INTER_CUBIC)
+ mask = impro.resize(masks[i][y-size:y+size,x-size:x+size],opt.outsize,interpolation=cv2.INTER_CUBIC)
+ impro.imwrite(os.path.join(origindir,'%05d'%(i+1)+'.jpg'), img)
+ impro.imwrite(os.path.join(maskdir,'%05d'%(i+1)+'.png'), mask)
+
+ result_cnt+=1
+
+ except Exception as e:
+ video_cnt +=1
+ util.writelog(os.path.join(opt.savedir,'opt.txt'),
+ videopath+'\n'+str(result_cnt)+'\n'+str(e))
+ video_cnt +=1
diff --git a/make_datasets/use_addmosaic_model_make_dataset.py b/make_datasets/use_addmosaic_model_make_dataset.py
deleted file mode 100644
index 7c04dc0d669969bab847d928a917f3371a4b721a..0000000000000000000000000000000000000000
--- a/make_datasets/use_addmosaic_model_make_dataset.py
+++ /dev/null
@@ -1,73 +0,0 @@
-import sys
-import os
-import random
-import datetime
-
-import numpy as np
-import cv2
-
-import torch
-import torch.backends.cudnn as cudnn
-import torch.nn as nn
-from torch import optim
-
-from unet import UNet
-from mosaic import random_mosaic
-import image_processing as impro
-
-
-
-def runmodel(img,net):
- img=impro.image2folat(img,3)
- img=img.reshape(1,3,128,128)
- img = torch.from_numpy(img)
- img=img.cuda()
- pred = net(img)
- pred = (pred.cpu().detach().numpy()*255)
- pred = pred.reshape(128,128).astype('uint8')
- return pred
-
-
-
-dir_img = './origin_image/'
-dir_mosaic = './mosaic/'
-dir_mask = './mask/'
-dir_dataset = './dataset/'
-dir_checkpoint = 'checkpoints/'
-
-net = UNet(n_channels = 3, n_classes = 1)
-net.load_state_dict(torch.load(dir_checkpoint+'mosaic_position.pth'))
-net.cuda()
-net.eval()
-# cudnn.benchmark = True
-files = os.listdir(dir_mosaic)
-
-for i,file in enumerate(files,1):
- orgin_image = cv2.imread(dir_img+file)
- mosaic_image = cv2.imread(dir_mosaic+file)
- img = impro.resize(mosaic_image,128)
- img1,img2 = impro.spiltimage(img)
- mask1 =runmodel(img1,net)
- mask2 =runmodel(img2,net)
- mask = impro.mergeimage(mask1,mask2,img)
-
- # test_mask = mask.copy()
-
- mask = impro.mask_threshold(mask,blur=5,threshold=128)
- if impro.mask_area(mask) > 1:
- h,w = orgin_image.shape[:2]
- mosaic_image = cv2.resize(mosaic_image,(w,h))
- # test_mask = cv2.resize(test_mask,(w,h))
- # test_mask = impro.ch_one2three(test_mask)
-
- x,y,size,area = impro.boundingSquare(mask,Ex_mul=1.5)
- rat = min(orgin_image.shape[:2])/128.0
- x,y,size = int(rat*x),int(rat*y),int(rat*size)
- orgin_crop = orgin_image[y-size:y+size,x-size:x+size]
- mosaic_crop = mosaic_image[y-size:y+size,x-size:x+size]
- # mosaic_crop = test_mask[y-size:y+size,x-size:x+size]
-
- result = impro.makedataset(mosaic_crop,orgin_crop)
- cv2.imwrite(dir_dataset+file,result)
- if i%1000==0:
- print(i,'image finished.')
diff --git a/make_datasets/use_addmosaic_model_make_video_dataset.py b/make_datasets/use_addmosaic_model_make_video_dataset.py
deleted file mode 100644
index c972fed8427ae38437e7ddd730f04bd385e0461e..0000000000000000000000000000000000000000
--- a/make_datasets/use_addmosaic_model_make_video_dataset.py
+++ /dev/null
@@ -1,86 +0,0 @@
-import os
-import numpy as np
-import cv2
-import random
-
-import sys
-sys.path.append("..")
-from models import runmodel,loadmodel
-from util import mosaic,util,ffmpeg,filt
-from util import image_processing as impro
-from cores import options
-
-opt = options.Options().getparse()
-util.file_init(opt)
-
-videos = os.listdir('./video')
-videos.sort()
-opt.model_path = '../pretrained_models/add_youknow_128.pth'
-opt.use_gpu = True
-Ex = 1.4
-Area_Type = 'normal'
-suffix = ''
-
-net = loadmodel.unet(opt)
-for i,path in enumerate(videos,0):
- try:
- path = os.path.join('./video',path)
- util.clean_tempfiles()
- ffmpeg.video2voice(path,'./tmp/voice_tmp.mp3')
- ffmpeg.video2image(path,'./tmp/video2image/output_%05d.'+opt.tempimage_type)
- imagepaths=os.listdir('./tmp/video2image')
- imagepaths.sort()
-
- # get position
- positions = []
- img_ori_example = impro.imread(os.path.join('./tmp/video2image',imagepaths[0]))
- mask_avg = np.zeros((impro.resize(img_ori_example, 128)).shape[:2])
- for imagepath in imagepaths:
- imagepath = os.path.join('./tmp/video2image',imagepath)
- #print('Find ROI location:',imagepath)
- img = impro.imread(imagepath)
- x,y,size,mask = runmodel.get_mosaic_position(img,net,opt,threshold = 80)
- cv2.imwrite(os.path.join('./tmp/ROI_mask',
- os.path.basename(imagepath)),mask)
- positions.append([x,y,size])
- mask_avg = mask_avg + mask
- #print('Optimize ROI locations...')
- mask_index = filt.position_medfilt(np.array(positions), 13)
-
- mask = np.clip(mask_avg/len(imagepaths),0,255).astype('uint8')
- mask = impro.mask_threshold(mask,20,32)
- x,y,size,area = impro.boundingSquare(mask,Ex_mul=Ex)
- rat = min(img_ori_example.shape[:2])/128.0
- x,y,size = int(rat*x),int(rat*y),int(rat*size)
- cv2.imwrite(os.path.join('./tmp/ROI_mask_check',
- 'test_show.png'),mask)
- if size !=0 :
- mask_path = './dataset/'+os.path.splitext(os.path.basename(path))[0]+suffix+'/mask'
- ori_path = './dataset/'+os.path.splitext(os.path.basename(path))[0]+suffix+'/ori'
- mosaic_path = './dataset/'+os.path.splitext(os.path.basename(path))[0]+suffix+'/mosaic'
- os.makedirs('./dataset/'+os.path.splitext(os.path.basename(path))[0]+suffix)
- os.makedirs(mask_path)
- os.makedirs(ori_path)
- os.makedirs(mosaic_path)
- #print('Add mosaic to images...')
- mosaic_size = mosaic.get_autosize(img_ori_example,mask,area_type = Area_Type)*random.uniform(1,2)
- models = ['squa_avg','rect_avg','squa_mid']
- mosaic_type = random.randint(0,len(models)-1)
- rect_rat = random.uniform(1.2,1.6)
- for i in range(len(imagepaths)):
- mask = impro.imread(os.path.join('./tmp/ROI_mask',imagepaths[mask_index[i]]),mod = 'gray')
- img_ori = impro.imread(os.path.join('./tmp/video2image',imagepaths[i]))
- img_mosaic = mosaic.addmosaic_normal(img_ori,mask,mosaic_size,model = models[mosaic_type],rect_rat=rect_rat)
- mask = impro.resize(mask, min(img_ori.shape[:2]))
-
- img_ori_crop = impro.resize(img_ori[y-size:y+size,x-size:x+size],256)
- img_mosaic_crop = impro.resize(img_mosaic[y-size:y+size,x-size:x+size],256)
- mask_crop = impro.resize(mask[y-size:y+size,x-size:x+size],256)
-
- cv2.imwrite(os.path.join(ori_path,os.path.basename(imagepaths[i])),img_ori_crop)
- cv2.imwrite(os.path.join(mosaic_path,os.path.basename(imagepaths[i])),img_mosaic_crop)
- cv2.imwrite(os.path.join(mask_path,os.path.basename(imagepaths[i])),mask_crop)
- except Exception as e:
- print(e)
-
- print(util.get_bar(100*i/len(videos),num=50))
\ No newline at end of file
diff --git a/make_datasets/use_drawn_mask_make_dataset.py b/make_datasets/use_drawn_mask_make_dataset.py
deleted file mode 100644
index 56dfe54d3072c9a2a6a31183cce6fa38f2db6ff6..0000000000000000000000000000000000000000
--- a/make_datasets/use_drawn_mask_make_dataset.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import numpy as np
-import cv2
-import os
-from torchvision import transforms
-from PIL import Image
-import random
-import sys
-sys.path.append("..")
-import util.image_processing as impro
-from util import util,mosaic
-import datetime
-import shutil
-
-mask_dir = '/media/hypo/Project/MyProject/DeepMosaics/DeepMosaics/train/add/datasets/av/mask'
-img_dir ='/media/hypo/Project/MyProject/DeepMosaics/DeepMosaics/train/add/datasets/av/origin_image'
-output_dir = './datasets_img'
-util.makedirs(output_dir)
-HD = True # if false make dataset for pix2pix, if Ture for pix2pix_HD
-MASK = True # if True, output mask,too
-OUT_SIZE = 256
-FOLD_NUM = 2
-Bounding = False
-
-if HD:
- train_A_path = os.path.join(output_dir,'train_A')
- train_B_path = os.path.join(output_dir,'train_B')
- util.makedirs(train_A_path)
- util.makedirs(train_B_path)
-else:
- train_path = os.path.join(output_dir,'train')
- util.makedirs(train_path)
-if MASK:
- mask_path = os.path.join(output_dir,'mask')
- util.makedirs(mask_path)
-
-mask_names = os.listdir(mask_dir)
-img_names = os.listdir(img_dir)
-mask_names.sort()
-img_names.sort()
-print('Find images:',len(img_names))
-
-cnt = 0
-for fold in range(FOLD_NUM):
- for img_name,mask_name in zip(img_names,mask_names):
- try:
- img = impro.imread(os.path.join(img_dir,img_name))
- mask = impro.imread(os.path.join(mask_dir,mask_name),'gray')
- mask = impro.resize_like(mask, img)
- x,y,size,area = impro.boundingSquare(mask, 1.5)
- if area > 100:
- if Bounding:
- img = impro.resize(img[y-size:y+size,x-size:x+size],OUT_SIZE)
- mask = impro.resize(mask[y-size:y+size,x-size:x+size],OUT_SIZE)
- img_mosaic = mosaic.addmosaic_random(img, mask)
-
- if HD:
- cv2.imwrite(os.path.join(train_A_path,'%05d' % cnt+'.jpg'), img_mosaic)
- cv2.imwrite(os.path.join(train_B_path,'%05d' % cnt+'.jpg'), img)
- else:
- merge_img = impro.makedataset(img_mosaic, img)
- cv2.imwrite(os.path.join(train_path,'%05d' % cnt+'.jpg'), merge_img)
- if MASK:
- cv2.imwrite(os.path.join(mask_path,'%05d' % cnt+'.png'), mask)
- print("Processing:",img_name," ","Remain:",len(img_names)*FOLD_NUM-cnt)
-
- except Exception as e:
- print(img_name,e)
- cnt += 1
diff --git a/make_datasets/use_irregular_holes_make_dataset.py b/make_datasets/use_irregular_holes_make_dataset.py
deleted file mode 100644
index 4f9557fb51817d6ca58ecdd88b17325b5f8ba86f..0000000000000000000000000000000000000000
--- a/make_datasets/use_irregular_holes_make_dataset.py
+++ /dev/null
@@ -1,93 +0,0 @@
-import numpy as np
-import cv2
-import os
-from torchvision import transforms
-from PIL import Image
-import random
-import sys
-sys.path.append("..")
-import util.image_processing as impro
-from util import util,mosaic
-import datetime
-
-ir_mask_path = './Irregular_Holes_mask'
-img_dir ='/media/hypo/Hypoyun/Datasets/other/face512'
-MOD = 'mosaic' #HD | pix2pix | mosaic
-MASK = False # if True, output mask,too
-BOUNDING = True # if true the mosaic size will be more big
-suffix = '_1'
-output_dir = os.path.join('./datasets_img',MOD)
-util.makedirs(output_dir)
-
-if MOD == 'HD':
- train_A_path = os.path.join(output_dir,'train_A')
- train_B_path = os.path.join(output_dir,'train_B')
- util.makedirs(train_A_path)
- util.makedirs(train_B_path)
-elif MOD == 'pix2pix':
- train_path = os.path.join(output_dir,'train')
- util.makedirs(train_path)
-elif MOD == 'mosaic':
- ori_path = os.path.join(output_dir,'ori')
- mosaic_path = os.path.join(output_dir,'mosaic')
- mask_path = os.path.join(output_dir,'mask')
- util.makedirs(ori_path)
- util.makedirs(mosaic_path)
- util.makedirs(mask_path)
-if MASK:
- mask_path = os.path.join(output_dir,'mask')
- util.makedirs(mask_path)
-
-transform_mask = transforms.Compose([
- transforms.RandomResizedCrop(size=512, scale=(0.5,1)),
- transforms.RandomHorizontalFlip(),
- ])
-
-transform_img = transforms.Compose([
-
- transforms.Resize(512),
- transforms.RandomCrop(512)
- ])
-
-mask_names = os.listdir(ir_mask_path)
-img_paths = util.Traversal(img_dir)
-img_paths = util.is_imgs(img_paths)
-print('Find images:',len(img_paths))
-
-for i,img_path in enumerate(img_paths,1):
- try:
- img = Image.open(img_path)
- img = transform_img(img)
- img = np.array(img)
- img = img[...,::-1]
-
- if BOUNDING:
- mosaic_area = 0
- while mosaic_area < 16384:
- mask = Image.open(os.path.join(ir_mask_path,random.choices(mask_names)[0]))
- mask = transform_mask(mask)
- mask = np.array(mask)
- mosaic_area = impro.mask_area(mask)
- mosaic_img = mosaic.addmosaic_random(img, mask,'bounding')
- else:
- mask = Image.open(os.path.join(ir_mask_path,random.choices(mask_names)[0]))
- mask = transform_mask(mask)
- mask = np.array(mask)
- mosaic_img = mosaic.addmosaic_random(img, mask)
-
- if MOD == 'HD':#[128:384,128:384,:] --->256
- cv2.imwrite(os.path.join(train_A_path,'%05d' % i+suffix+'.jpg'), mosaic_img)
- cv2.imwrite(os.path.join(train_B_path,'%05d' % i+suffix+'.jpg'), img)
- if MASK:
- cv2.imwrite(os.path.join(mask_path,'%05d' % i+suffix+'.png'), mask)
- elif MOD == 'pix2pix':
- merge_img = impro.makedataset(mosaic_img, img)
- cv2.imwrite(os.path.join(train_path,'%05d' % i+suffix+'.jpg'), merge_img)
- elif MOD == 'mosaic':
- cv2.imwrite(os.path.join(mosaic_path,'%05d' % i+suffix+'.jpg'), mosaic_img)
- cv2.imwrite(os.path.join(ori_path,'%05d' % i+suffix+'.jpg'), img)
- cv2.imwrite(os.path.join(mask_path,'%05d' % i+suffix+'.png'), mask)
-
- print('\r','Proc/all:'+str(i)+'/'+str(len(img_paths)),util.get_bar(100*i/len(img_paths),num=40),end='')
- except Exception as e:
- print(img_path,e)
diff --git a/models/BiSeNet_model.py b/models/BiSeNet_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..b58ea5be70ac3634b8a8429597a885259b61ae9a
--- /dev/null
+++ b/models/BiSeNet_model.py
@@ -0,0 +1,264 @@
+# This code clone from https://github.com/ooooverflow/BiSeNet
+import torch.nn as nn
+import torch
+import torch.nn.functional as F
+from . import components
+import warnings
+warnings.filterwarnings(action='ignore')
+
+def flatten(tensor):
+ """Flattens a given tensor such that the channel axis is first.
+ The shapes are transformed as follows:
+ (N, C, D, H, W) -> (C, N * D * H * W)
+ """
+ C = tensor.size(1)
+ # new axis order
+ axis_order = (1, 0) + tuple(range(2, tensor.dim()))
+ # Transpose: (N, C, D, H, W) -> (C, N, D, H, W)
+ transposed = tensor.permute(axis_order)
+ # Flatten: (C, N, D, H, W) -> (C, N * D * H * W)
+ return transposed.contiguous().view(C, -1)
+
+
+class DiceLoss(nn.Module):
+ def __init__(self):
+ super().__init__()
+ self.epsilon = 1e-5
+
+ def forward(self, output, target):
+ assert output.size() == target.size(), "'input' and 'target' must have the same shape"
+ output = F.softmax(output, dim=1)
+ output = flatten(output)
+ target = flatten(target)
+ # intersect = (output * target).sum(-1).sum() + self.epsilon
+ # denominator = ((output + target).sum(-1)).sum() + self.epsilon
+
+ intersect = (output * target).sum(-1)
+ denominator = (output + target).sum(-1)
+ dice = intersect / denominator
+ dice = torch.mean(dice)
+ return 1 - dice
+ # return 1 - 2. * intersect / denominator
+
+class resnet18(torch.nn.Module):
+ def __init__(self, pretrained=True):
+ super().__init__()
+ self.features = components.resnet18(pretrained=pretrained)
+ self.conv1 = self.features.conv1
+ self.bn1 = self.features.bn1
+ self.relu = self.features.relu
+ self.maxpool1 = self.features.maxpool
+ self.layer1 = self.features.layer1
+ self.layer2 = self.features.layer2
+ self.layer3 = self.features.layer3
+ self.layer4 = self.features.layer4
+
+ def forward(self, input):
+ x = self.conv1(input)
+ x = self.relu(self.bn1(x))
+ x = self.maxpool1(x)
+ feature1 = self.layer1(x) # 1 / 4
+ feature2 = self.layer2(feature1) # 1 / 8
+ feature3 = self.layer3(feature2) # 1 / 16
+ feature4 = self.layer4(feature3) # 1 / 32
+ # global average pooling to build tail
+ tail = torch.mean(feature4, 3, keepdim=True)
+ tail = torch.mean(tail, 2, keepdim=True)
+ return feature3, feature4, tail
+
+
+class resnet101(torch.nn.Module):
+ def __init__(self, pretrained=True):
+ super().__init__()
+ self.features = components.resnet101(pretrained=pretrained)
+ self.conv1 = self.features.conv1
+ self.bn1 = self.features.bn1
+ self.relu = self.features.relu
+ self.maxpool1 = self.features.maxpool
+ self.layer1 = self.features.layer1
+ self.layer2 = self.features.layer2
+ self.layer3 = self.features.layer3
+ self.layer4 = self.features.layer4
+
+ def forward(self, input):
+ x = self.conv1(input)
+ x = self.relu(self.bn1(x))
+ x = self.maxpool1(x)
+ feature1 = self.layer1(x) # 1 / 4
+ feature2 = self.layer2(feature1) # 1 / 8
+ feature3 = self.layer3(feature2) # 1 / 16
+ feature4 = self.layer4(feature3) # 1 / 32
+ # global average pooling to build tail
+ tail = torch.mean(feature4, 3, keepdim=True)
+ tail = torch.mean(tail, 2, keepdim=True)
+ return feature3, feature4, tail
+
+def build_contextpath(name,pretrained):
+ model = {
+ 'resnet18': resnet18(pretrained=pretrained),
+ 'resnet101': resnet101(pretrained=pretrained)
+ }
+ return model[name]
+
+class ConvBlock(torch.nn.Module):
+ def __init__(self, in_channels, out_channels, kernel_size=3, stride=2,padding=1):
+ super().__init__()
+ self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False)
+ self.bn = nn.BatchNorm2d(out_channels)
+ self.relu = nn.ReLU()
+
+ def forward(self, input):
+ x = self.conv1(input)
+ return self.relu(self.bn(x))
+
+class Spatial_path(torch.nn.Module):
+ def __init__(self):
+ super().__init__()
+ self.convblock1 = ConvBlock(in_channels=3, out_channels=64)
+ self.convblock2 = ConvBlock(in_channels=64, out_channels=128)
+ self.convblock3 = ConvBlock(in_channels=128, out_channels=256)
+
+ def forward(self, input):
+ x = self.convblock1(input)
+ x = self.convblock2(x)
+ x = self.convblock3(x)
+ return x
+
+class AttentionRefinementModule(torch.nn.Module):
+ def __init__(self, in_channels, out_channels):
+ super().__init__()
+ self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
+ self.bn = nn.BatchNorm2d(out_channels)
+ self.sigmoid = nn.Sigmoid()
+ self.in_channels = in_channels
+ self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
+
+ def forward(self, input):
+ # global average pooling
+ x = self.avgpool(input)
+ assert self.in_channels == x.size(1), 'in_channels and out_channels should all be {}'.format(x.size(1))
+ x = self.conv(x)
+ # x = self.sigmoid(self.bn(x))
+ x = self.sigmoid(x)
+ # channels of input and x should be same
+ x = torch.mul(input, x)
+ return x
+
+class FeatureFusionModule(torch.nn.Module):
+ def __init__(self, num_classes, in_channels):
+ super().__init__()
+ # self.in_channels = input_1.channels + input_2.channels
+ # resnet101 3328 = 256(from context path) + 1024(from spatial path) + 2048(from spatial path)
+ # resnet18 1024 = 256(from context path) + 256(from spatial path) + 512(from spatial path)
+ self.in_channels = in_channels
+
+ self.convblock = ConvBlock(in_channels=self.in_channels, out_channels=num_classes, stride=1)
+ self.conv1 = nn.Conv2d(num_classes, num_classes, kernel_size=1)
+ self.relu = nn.ReLU()
+ self.conv2 = nn.Conv2d(num_classes, num_classes, kernel_size=1)
+ self.sigmoid = nn.Sigmoid()
+ self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
+
+
+ def forward(self, input_1, input_2):
+ x = torch.cat((input_1, input_2), dim=1)
+ assert self.in_channels == x.size(1), 'in_channels of ConvBlock should be {}'.format(x.size(1))
+ feature = self.convblock(x)
+ x = self.avgpool(feature)
+
+ x = self.relu(self.conv1(x))
+ x = self.sigmoid(self.conv2(x))
+ x = torch.mul(feature, x)
+ x = torch.add(x, feature)
+ return x
+
+class BiSeNet(torch.nn.Module):
+ def __init__(self, num_classes, context_path, train_flag=True):
+ super().__init__()
+ # build spatial path
+ self.saptial_path = Spatial_path()
+ self.sigmoid = nn.Sigmoid()
+ # build context path
+ if train_flag:
+ self.context_path = build_contextpath(name=context_path,pretrained=True)
+ else:
+ self.context_path = build_contextpath(name=context_path,pretrained=False)
+
+ # build attention refinement module for resnet 101
+ if context_path == 'resnet101':
+ self.attention_refinement_module1 = AttentionRefinementModule(1024, 1024)
+ self.attention_refinement_module2 = AttentionRefinementModule(2048, 2048)
+ # supervision block
+ self.supervision1 = nn.Conv2d(in_channels=1024, out_channels=num_classes, kernel_size=1)
+ self.supervision2 = nn.Conv2d(in_channels=2048, out_channels=num_classes, kernel_size=1)
+ # build feature fusion module
+ self.feature_fusion_module = FeatureFusionModule(num_classes, 3328)
+
+ elif context_path == 'resnet18':
+ # build attention refinement module for resnet 18
+ self.attention_refinement_module1 = AttentionRefinementModule(256, 256)
+ self.attention_refinement_module2 = AttentionRefinementModule(512, 512)
+ # supervision block
+ self.supervision1 = nn.Conv2d(in_channels=256, out_channels=num_classes, kernel_size=1)
+ self.supervision2 = nn.Conv2d(in_channels=512, out_channels=num_classes, kernel_size=1)
+ # build feature fusion module
+ self.feature_fusion_module = FeatureFusionModule(num_classes, 1024)
+ else:
+ print('Error: unspport context_path network \n')
+
+ # build final convolution
+ self.conv = nn.Conv2d(in_channels=num_classes, out_channels=num_classes, kernel_size=1)
+
+ self.init_weight()
+
+ self.mul_lr = []
+ self.mul_lr.append(self.saptial_path)
+ self.mul_lr.append(self.attention_refinement_module1)
+ self.mul_lr.append(self.attention_refinement_module2)
+ self.mul_lr.append(self.supervision1)
+ self.mul_lr.append(self.supervision2)
+ self.mul_lr.append(self.feature_fusion_module)
+ self.mul_lr.append(self.conv)
+
+ def init_weight(self):
+ for name, m in self.named_modules():
+ if 'context_path' not in name:
+ if isinstance(m, nn.Conv2d):
+ nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
+ elif isinstance(m, nn.BatchNorm2d):
+ m.eps = 1e-5
+ m.momentum = 0.1
+ nn.init.constant_(m.weight, 1)
+ nn.init.constant_(m.bias, 0)
+
+ def forward(self, input):
+ # output of spatial path
+ sx = self.saptial_path(input)
+
+ # output of context path
+ cx1, cx2, tail = self.context_path(input)
+ cx1 = self.attention_refinement_module1(cx1)
+ cx2 = self.attention_refinement_module2(cx2)
+ cx2 = torch.mul(cx2, tail)
+ # upsampling
+ cx1 = torch.nn.functional.interpolate(cx1, size=sx.size()[-2:], mode='bilinear')
+ cx2 = torch.nn.functional.interpolate(cx2, size=sx.size()[-2:], mode='bilinear')
+ cx = torch.cat((cx1, cx2), dim=1)
+
+ if self.training == True:
+ cx1_sup = self.supervision1(cx1)
+ cx2_sup = self.supervision2(cx2)
+ cx1_sup = torch.nn.functional.interpolate(cx1_sup, size=input.size()[-2:], mode='bilinear')
+ cx2_sup = torch.nn.functional.interpolate(cx2_sup, size=input.size()[-2:], mode='bilinear')
+
+ # output of feature fusion module
+ result = self.feature_fusion_module(sx, cx)
+
+ # upsampling
+ result = torch.nn.functional.interpolate(result, scale_factor=8, mode='bilinear')
+ result = self.conv(result)
+
+ if self.training == True:
+ return self.sigmoid(result), self.sigmoid(cx1_sup), self.sigmoid(cx2_sup)
+
+ return self.sigmoid(result)
\ No newline at end of file
diff --git a/models/__init__.py b/models/__init__.py
index 54739ba61e657ebed8aa91832b2f8f20accbfdab..8b137891791fe96927ad78e64b0aad7bded08bdc 100755
--- a/models/__init__.py
+++ b/models/__init__.py
@@ -1,2 +1 @@
-from .pix2pix_model import *
-from .unet_model import UNet
+
diff --git a/models/components.py b/models/components.py
new file mode 100644
index 0000000000000000000000000000000000000000..59cb3333a3f0f59e3fe95e0cb5a009a3ce4c9b80
--- /dev/null
+++ b/models/components.py
@@ -0,0 +1,234 @@
+import torch.nn as nn
+import torch.utils.model_zoo as model_zoo
+
+
+__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
+ 'resnet152']
+
+
+model_urls = {
+ 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+ 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+ 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+ 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
+ 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
+}
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+ """3x3 convolution with padding"""
+ return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+ padding=1, bias=False)
+
+
+def conv1x1(in_planes, out_planes, stride=1):
+ """1x1 convolution"""
+ return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+
+
+class BasicBlock(nn.Module):
+ expansion = 1
+
+ def __init__(self, inplanes, planes, stride=1, downsample=None, norm_layer=None):
+ super(BasicBlock, self).__init__()
+ if norm_layer is None:
+ norm_layer = nn.BatchNorm2d
+ # Both self.conv1 and self.downsample layers downsample the input when stride != 1
+ self.conv1 = conv3x3(inplanes, planes, stride)
+ self.bn1 = norm_layer(planes)
+ self.relu = nn.ReLU(inplace=True)
+ self.conv2 = conv3x3(planes, planes)
+ self.bn2 = norm_layer(planes)
+ self.downsample = downsample
+ self.stride = stride
+
+ def forward(self, x):
+ identity = x
+
+ out = self.conv1(x)
+ out = self.bn1(out)
+ out = self.relu(out)
+
+ out = self.conv2(out)
+ out = self.bn2(out)
+
+ if self.downsample is not None:
+ identity = self.downsample(x)
+
+ out += identity
+ out = self.relu(out)
+
+ return out
+
+
+class Bottleneck(nn.Module):
+ expansion = 4
+
+ def __init__(self, inplanes, planes, stride=1, downsample=None, norm_layer=None):
+ super(Bottleneck, self).__init__()
+ if norm_layer is None:
+ norm_layer = nn.BatchNorm2d
+ # Both self.conv2 and self.downsample layers downsample the input when stride != 1
+ self.conv1 = conv1x1(inplanes, planes)
+ self.bn1 = norm_layer(planes)
+ self.conv2 = conv3x3(planes, planes, stride)
+ self.bn2 = norm_layer(planes)
+ self.conv3 = conv1x1(planes, planes * self.expansion)
+ self.bn3 = norm_layer(planes * self.expansion)
+ self.relu = nn.ReLU(inplace=True)
+ self.downsample = downsample
+ self.stride = stride
+
+ def forward(self, x):
+ identity = x
+
+ out = self.conv1(x)
+ out = self.bn1(out)
+ out = self.relu(out)
+
+ out = self.conv2(out)
+ out = self.bn2(out)
+ out = self.relu(out)
+
+ out = self.conv3(out)
+ out = self.bn3(out)
+
+ if self.downsample is not None:
+ identity = self.downsample(x)
+
+ out += identity
+ out = self.relu(out)
+
+ return out
+
+
+class ResNet(nn.Module):
+
+ def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, norm_layer=None):
+ super(ResNet, self).__init__()
+ if norm_layer is None:
+ norm_layer = nn.BatchNorm2d
+ self.inplanes = 64
+ self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
+ bias=False)
+ self.bn1 = norm_layer(64)
+ self.relu = nn.ReLU(inplace=True)
+ self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+ self.layer1 = self._make_layer(block, 64, layers[0], norm_layer=norm_layer)
+ self.layer2 = self._make_layer(block, 128, layers[1], stride=2, norm_layer=norm_layer)
+ self.layer3 = self._make_layer(block, 256, layers[2], stride=2, norm_layer=norm_layer)
+ self.layer4 = self._make_layer(block, 512, layers[3], stride=2, norm_layer=norm_layer)
+ self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+ self.fc = nn.Linear(512 * block.expansion, num_classes)
+
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+ elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+ nn.init.constant_(m.weight, 1)
+ nn.init.constant_(m.bias, 0)
+
+ # Zero-initialize the last BN in each residual branch,
+ # so that the residual branch starts with zeros, and each residual block behaves like an identity.
+ # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
+ if zero_init_residual:
+ for m in self.modules():
+ if isinstance(m, Bottleneck):
+ nn.init.constant_(m.bn3.weight, 0)
+ elif isinstance(m, BasicBlock):
+ nn.init.constant_(m.bn2.weight, 0)
+
+ def _make_layer(self, block, planes, blocks, stride=1, norm_layer=None):
+ if norm_layer is None:
+ norm_layer = nn.BatchNorm2d
+ downsample = None
+ if stride != 1 or self.inplanes != planes * block.expansion:
+ downsample = nn.Sequential(
+ conv1x1(self.inplanes, planes * block.expansion, stride),
+ norm_layer(planes * block.expansion),
+ )
+
+ layers = []
+ layers.append(block(self.inplanes, planes, stride, downsample, norm_layer))
+ self.inplanes = planes * block.expansion
+ for _ in range(1, blocks):
+ layers.append(block(self.inplanes, planes, norm_layer=norm_layer))
+
+ return nn.Sequential(*layers)
+
+ def forward(self, x):
+ x = self.conv1(x)
+ x = self.bn1(x)
+ x = self.relu(x)
+ x = self.maxpool(x)
+
+ x = self.layer1(x)
+ x = self.layer2(x)
+ x = self.layer3(x)
+ x = self.layer4(x)
+
+ x = self.avgpool(x)
+ x = x.view(x.size(0), -1)
+ x = self.fc(x)
+
+ return x
+
+
+def resnet18(pretrained=False, **kwargs):
+ """Constructs a ResNet-18 model.
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ """
+ model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
+ if pretrained:
+ model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
+ return model
+
+
+def resnet34(pretrained=False, **kwargs):
+ """Constructs a ResNet-34 model.
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ """
+ model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
+ if pretrained:
+ model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
+ return model
+
+
+def resnet50(pretrained=False, **kwargs):
+ """Constructs a ResNet-50 model.
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ """
+ model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
+ if pretrained:
+ model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
+ return model
+
+
+def resnet101(pretrained=False, **kwargs):
+ """Constructs a ResNet-101 model.
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ """
+ model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
+ if pretrained:
+ model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
+ return model
+
+
+def resnet152(pretrained=False, **kwargs):
+ """Constructs a ResNet-152 model.
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ """
+ model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
+ if pretrained:
+ model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
+ return model
\ No newline at end of file
diff --git a/models/loadmodel.py b/models/loadmodel.py
index 00607d7970add0e373be78279c091e31a45fcd34..eedd34e3e227f06a44d2b5265e69f161f7bd4041 100755
--- a/models/loadmodel.py
+++ b/models/loadmodel.py
@@ -4,6 +4,7 @@ from .pix2pixHD_model import define_G as define_G_HD
from .unet_model import UNet
from .video_model import MosaicNet
from .videoHD_model import MosaicNet as MosaicNet_HD
+from .BiSeNet_model import BiSeNet
def show_paramsnumber(net,netname='net'):
parameters = sum(param.numel() for param in net.parameters())
@@ -75,21 +76,35 @@ def video(opt):
netG.cuda()
return netG
-
-def unet_clean(opt):
- net = UNet(n_channels = 3, n_classes = 1)
+def bisenet(opt,type='roi'):
+ '''
+ type: roi or mosaic
+ '''
+ net = BiSeNet(num_classes=1, context_path='resnet18',train_flag=False)
show_paramsnumber(net,'segment')
- net.load_state_dict(torch.load(opt.mosaic_position_model_path))
+ if type == 'roi':
+ net.load_state_dict(torch.load(opt.model_path))
+ elif type == 'mosaic':
+ net.load_state_dict(torch.load(opt.mosaic_position_model_path))
net.eval()
if opt.use_gpu:
net.cuda()
return net
-def unet(opt):
- net = UNet(n_channels = 3, n_classes = 1)
- show_paramsnumber(net,'segment')
- net.load_state_dict(torch.load(opt.model_path))
- net.eval()
- if opt.use_gpu:
- net.cuda()
- return net
+# def unet_clean(opt):
+# net = UNet(n_channels = 3, n_classes = 1)
+# show_paramsnumber(net,'segment')
+# net.load_state_dict(torch.load(opt.mosaic_position_model_path))
+# net.eval()
+# if opt.use_gpu:
+# net.cuda()
+# return net
+
+# def unet(opt):
+# net = UNet(n_channels = 3, n_classes = 1)
+# show_paramsnumber(net,'segment')
+# net.load_state_dict(torch.load(opt.model_path))
+# net.eval()
+# if opt.use_gpu:
+# net.cuda()
+# return net
diff --git a/models/runmodel.py b/models/runmodel.py
index 11b14dd2878c69b59c7460b72201b7c8108bcdb7..2ff8414b0cbf9255415fe76747fc9a42eb579a3b 100755
--- a/models/runmodel.py
+++ b/models/runmodel.py
@@ -7,7 +7,7 @@ from util import data
import torch
import numpy as np
-def run_unet(img,net,size = 224,use_gpu = True):
+def run_segment(img,net,size = 360,use_gpu = True):
img = impro.resize(img,size)
img = data.im2tensor(img,use_gpu = use_gpu, bgr2rgb = False,use_transform = False , is0_1 = True)
mask = net(img)
@@ -60,18 +60,26 @@ def run_styletransfer(opt, net, img):
img = data.tensor2im(img)
return img
-def get_ROI_position(img,net,opt):
- mask = run_unet(img,net,size=224,use_gpu = opt.use_gpu)
+def get_ROI_position(img,net,opt,keepsize=True):
+ mask = run_segment(img,net,size=360,use_gpu = opt.use_gpu)
mask = impro.mask_threshold(mask,opt.mask_extend,opt.mask_threshold)
+ if keepsize:
+ mask = impro.resize_like(mask, img)
x,y,halfsize,area = impro.boundingSquare(mask, 1)
- return mask,x,y,area
+ return mask,x,y,halfsize,area
-def get_mosaic_position(img_origin,net_mosaic_pos,opt,threshold = 128 ):
- mask = run_unet(img_origin,net_mosaic_pos,size=224,use_gpu = opt.use_gpu)
- mask = impro.mask_threshold(mask,30,threshold)
+def get_mosaic_position(img_origin,net_mosaic_pos,opt):
+ h,w = img_origin.shape[:2]
+ mask = run_segment(img_origin,net_mosaic_pos,size=360,use_gpu = opt.use_gpu)
+ # mask_1 = mask.copy()
+ mask = impro.mask_threshold(mask,ex_mun=int(min(h,w)/20),threshold=opt.mask_threshold)
if not opt.all_mosaic_area:
mask = impro.find_mostlikely_ROI(mask)
x,y,size,area = impro.boundingSquare(mask,Ex_mul=opt.ex_mult)
- rat = min(img_origin.shape[:2])/224.0
+ #Location fix
+ rat = min(h,w)/360.0
x,y,size = int(rat*x),int(rat*y),int(rat*size)
+ x,y = np.clip(x, 0, w),np.clip(y, 0, h)
+ size = np.clip(size, 0, min(w-x,h-y))
+ # print(x,y,size)
return x,y,size,mask
\ No newline at end of file
diff --git a/models/unet_model.py b/models/unet_model.py
index de16f646f3b40d2a7a39e5a008974ac9c0aa2dd0..5dce46e0ef4ec3ee90a9472f70aac9085d9b1d48 100755
--- a/models/unet_model.py
+++ b/models/unet_model.py
@@ -1,10 +1,101 @@
# This code clone from https://github.com/milesial/Pytorch-UNet
# LICENSE file : https://github.com/milesial/Pytorch-UNet/blob/master/LICENSE
-# full assembly of the sub-parts to form the complete net
-
+import torch
+import torch.nn as nn
import torch.nn.functional as F
-from .unet_parts import *
+
+class double_conv(nn.Module):
+ '''(conv => BN => ReLU) * 2'''
+ def __init__(self, in_ch, out_ch):
+ super(double_conv, self).__init__()
+ self.conv = nn.Sequential(
+ nn.Conv2d(in_ch, out_ch, 3, padding=1),
+ nn.BatchNorm2d(out_ch),
+ nn.ReLU(inplace=True),
+ nn.Conv2d(out_ch, out_ch, 3, padding=1),
+ nn.BatchNorm2d(out_ch),
+ nn.ReLU(inplace=True)
+ )
+
+ def forward(self, x):
+ x = self.conv(x)
+ return x
+
+
+class inconv(nn.Module):
+ def __init__(self, in_ch, out_ch):
+ super(inconv, self).__init__()
+ self.conv = double_conv(in_ch, out_ch)
+
+ def forward(self, x):
+ x = self.conv(x)
+ return x
+
+
+class down(nn.Module):
+ def __init__(self, in_ch, out_ch):
+ super(down, self).__init__()
+ self.mpconv = nn.Sequential(
+ nn.MaxPool2d(2),
+ double_conv(in_ch, out_ch)
+ )
+
+ def forward(self, x):
+ x = self.mpconv(x)
+ return x
+
+class Upsample(nn.Module):
+ def __init__(self, scale_factor):
+ super(Upsample, self).__init__()
+ self.scale_factor = scale_factor
+ def forward(self, x):
+ return F.interpolate(x, scale_factor=self.scale_factor,mode='bilinear', align_corners=True)
+
+
+class up(nn.Module):
+ def __init__(self, in_ch, out_ch, bilinear=True):
+ super(up, self).__init__()
+
+ # would be a nice idea if the upsampling could be learned too,
+ # but my machine do not have enough memory to handle all those weights
+ if bilinear:
+ self.up = Upsample(scale_factor=2)
+ else:
+ self.up = nn.ConvTranspose2d(in_ch//2, in_ch//2, 2, stride=2)
+
+ self.conv = double_conv(in_ch, out_ch)
+
+ def forward(self, x1, x2):
+ x1 = self.up(x1)
+
+ # input is CHW
+ diffY = x2.size()[2] - x1.size()[2]
+ diffX = x2.size()[3] - x1.size()[3]
+
+ x1 = F.pad(x1, (diffX // 2, diffX - diffX//2,
+ diffY // 2, diffY - diffY//2))
+
+ # for padding issues, see
+ # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
+ # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
+
+ x = torch.cat([x2, x1], dim=1)
+ x = self.conv(x)
+ return x
+
+
+class outconv(nn.Module):
+ def __init__(self, in_ch, out_ch):
+ super(outconv, self).__init__()
+ self.conv = nn.Sequential(
+ nn.Conv2d(in_ch, out_ch, 1),
+ nn.Sigmoid()
+ )
+
+ def forward(self, x):
+ x = self.conv(x)
+ return x
class UNet(nn.Module):
def __init__(self, n_channels, n_classes):
diff --git a/models/unet_parts.py b/models/unet_parts.py
deleted file mode 100755
index 2d93833babf7f16e753571517c7e3925f7d80b0d..0000000000000000000000000000000000000000
--- a/models/unet_parts.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# This code clone from https://github.com/milesial/Pytorch-UNet
-# LICENSE file : https://github.com/milesial/Pytorch-UNet/blob/master/LICENSE
-
-# sub-parts of the U-Net model
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-class double_conv(nn.Module):
- '''(conv => BN => ReLU) * 2'''
- def __init__(self, in_ch, out_ch):
- super(double_conv, self).__init__()
- self.conv = nn.Sequential(
- nn.Conv2d(in_ch, out_ch, 3, padding=1),
- nn.BatchNorm2d(out_ch),
- nn.ReLU(inplace=True),
- nn.Conv2d(out_ch, out_ch, 3, padding=1),
- nn.BatchNorm2d(out_ch),
- nn.ReLU(inplace=True)
- )
-
- def forward(self, x):
- x = self.conv(x)
- return x
-
-
-class inconv(nn.Module):
- def __init__(self, in_ch, out_ch):
- super(inconv, self).__init__()
- self.conv = double_conv(in_ch, out_ch)
-
- def forward(self, x):
- x = self.conv(x)
- return x
-
-
-class down(nn.Module):
- def __init__(self, in_ch, out_ch):
- super(down, self).__init__()
- self.mpconv = nn.Sequential(
- nn.MaxPool2d(2),
- double_conv(in_ch, out_ch)
- )
-
- def forward(self, x):
- x = self.mpconv(x)
- return x
-
-class Upsample(nn.Module):
- def __init__(self, scale_factor):
- super(Upsample, self).__init__()
- self.scale_factor = scale_factor
- def forward(self, x):
- return F.interpolate(x, scale_factor=self.scale_factor,mode='bilinear', align_corners=True)
-
-
-class up(nn.Module):
- def __init__(self, in_ch, out_ch, bilinear=True):
- super(up, self).__init__()
-
- # would be a nice idea if the upsampling could be learned too,
- # but my machine do not have enough memory to handle all those weights
- if bilinear:
- self.up = Upsample(scale_factor=2)
- else:
- self.up = nn.ConvTranspose2d(in_ch//2, in_ch//2, 2, stride=2)
-
- self.conv = double_conv(in_ch, out_ch)
-
- def forward(self, x1, x2):
- x1 = self.up(x1)
-
- # input is CHW
- diffY = x2.size()[2] - x1.size()[2]
- diffX = x2.size()[3] - x1.size()[3]
-
- x1 = F.pad(x1, (diffX // 2, diffX - diffX//2,
- diffY // 2, diffY - diffY//2))
-
- # for padding issues, see
- # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
- # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
-
- x = torch.cat([x2, x1], dim=1)
- x = self.conv(x)
- return x
-
-
-class outconv(nn.Module):
- def __init__(self, in_ch, out_ch):
- super(outconv, self).__init__()
- self.conv = nn.Sequential(
- nn.Conv2d(in_ch, out_ch, 1),
- nn.Sigmoid()
- )
-
-
-
- def forward(self, x):
- x = self.conv(x)
- return x
diff --git a/models/videoHD_model.py b/models/videoHD_model.py
index 9f214c50daf398b55184c759a59d64cd8dd7bb0e..20e901f2c199e82a9e540b8067fe4917df338bd3 100644
--- a/models/videoHD_model.py
+++ b/models/videoHD_model.py
@@ -15,7 +15,7 @@ class encoder_2d(nn.Module):
### downsample
for i in range(n_downsampling):
mult = 2**i
- model += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1),
+ model += [nn.ReflectionPad2d(1),nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=0),
norm_layer(ngf * mult * 2), activation]
self.model = nn.Sequential(*model)
@@ -39,16 +39,6 @@ class decoder_2d(nn.Module):
### upsample
for i in range(n_downsampling):
mult = 2**(n_downsampling - i)
- # if i%2 ==0:
- # model += [ nn.Upsample(scale_factor = 2, mode='nearest'),
- # nn.ReflectionPad2d(1),
- # nn.Conv2d(ngf * mult, int(ngf * mult / 2),kernel_size=3, stride=1, padding=0),
- # norm_layer(int(ngf * mult / 2)),
- # nn.ReLU(True)]
- # else:
-
- # model += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), kernel_size=3, stride=2, padding=1, output_padding=1),
- # norm_layer(int(ngf * mult / 2)), activation]
# model += [ nn.Upsample(scale_factor = 2, mode='nearest'),
# nn.ReflectionPad2d(1),
diff --git a/models/video_model.py b/models/video_model.py
index 5c15726c76b432f9d7b1e83c2045c5f28afb8a84..4a095c6a577176ac10a1318adf8a71278c234c89 100644
--- a/models/video_model.py
+++ b/models/video_model.py
@@ -1,7 +1,6 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
-from .unet_parts import *
from .pix2pix_model import *
diff --git a/train/add/train.py b/train/add/train.py
index 4d57814c33668e13d2977ce85b744fe3fc868d30..bb8e953dc9572d912e01185c61b08223af9b8f92 100644
--- a/train/add/train.py
+++ b/train/add/train.py
@@ -2,8 +2,10 @@ import sys
import os
import random
import datetime
+import time
import numpy as np
+from matplotlib import pyplot as plt
import cv2
import torch
@@ -11,137 +13,144 @@ import torch.backends.cudnn as cudnn
import torch.nn as nn
from torch import optim
-import sys
sys.path.append("..")
sys.path.append("../..")
+from cores import Options
from util import mosaic,util,ffmpeg,filt,data
from util import image_processing as impro
-from models import unet_model
-from matplotlib import pyplot as plt
-import torch.backends.cudnn as cudnn
-
-LR = 0.0002
-EPOCHS = 100
-BATCHSIZE = 16
-LOADSIZE = 256
-FINESIZE = 224
-CONTINUE = True
-use_gpu = True
-SAVE_FRE = 1
-MAX_LOAD = 30000
-
-
-
-dir_img = './datasets/face/origin_image/'
-dir_mask = './datasets/face/mask/'
-dir_checkpoint = 'checkpoints/face/'
-
+from models import unet_model,BiSeNet_model
+
+
+'''
+--------------------------Get options--------------------------
+'''
+opt = Options()
+opt.parser.add_argument('--gpu_id',type=int,default=0, help='')
+opt.parser.add_argument('--lr',type=float,default=0.001, help='')
+opt.parser.add_argument('--finesize',type=int,default=360, help='')
+opt.parser.add_argument('--loadsize',type=int,default=400, help='')
+opt.parser.add_argument('--batchsize',type=int,default=8, help='')
+opt.parser.add_argument('--model',type=str,default='BiSeNet', help='BiSeNet or UNet')
+
+opt.parser.add_argument('--maxepoch',type=int,default=100, help='')
+opt.parser.add_argument('--savefreq',type=int,default=5, help='')
+opt.parser.add_argument('--maxload',type=int,default=1000000, help='')
+opt.parser.add_argument('--continuetrain', action='store_true', help='')
+opt.parser.add_argument('--startepoch',type=int,default=0, help='')
+opt.parser.add_argument('--dataset',type=str,default='./datasets/face/', help='')
+opt.parser.add_argument('--savename',type=str,default='face', help='')
+
+
+'''
+--------------------------Init--------------------------
+'''
+opt = opt.getparse()
+dir_img = os.path.join(opt.dataset,'origin_image')
+dir_mask = os.path.join(opt.dataset,'mask')
+dir_checkpoint = os.path.join('checkpoints/',opt.savename)
+util.makedirs(dir_checkpoint)
+util.writelog(os.path.join(dir_checkpoint,'loss.txt'),
+ str(time.asctime(time.localtime(time.time())))+'\n'+util.opt2str(opt))
+torch.cuda.set_device(opt.gpu_id)
def Totensor(img,use_gpu=True):
size=img.shape[0]
img = torch.from_numpy(img).float()
- if use_gpu:
+ if opt.use_gpu:
img = img.cuda()
return img
-
-def Toinputshape(imgs,masks,finesize,test_flag = False):
- batchsize = len(imgs)
- result_imgs=[];result_masks=[]
- for i in range(batchsize):
- # print(imgs[i].shape,masks[i].shape)
- img,mask = data.random_transform_image(imgs[i], masks[i], finesize, test_flag)
- # print(img.shape,mask.shape)
- mask = (mask.reshape(1,finesize,finesize)/255.0)
- img = (img.transpose((2, 0, 1))/255.0)
- result_imgs.append(img)
- result_masks.append(mask)
- result_imgs = np.array(result_imgs)
- result_masks = np.array(result_masks)
- return result_imgs,result_masks
-
-def batch_generator(images,masks,batchsize):
- dataset_images = []
- dataset_masks = []
-
- for i in range(int(len(images)/batchsize)):
- dataset_images.append(images[i*batchsize:(i+1)*batchsize])
- dataset_masks.append(masks[i*batchsize:(i+1)*batchsize])
- if len(images)%batchsize != 0:
- dataset_images.append(images[len(images)-len(images)%batchsize:])
- dataset_masks.append(masks[len(images)-len(images)%batchsize:])
-
- return dataset_images,dataset_masks
-
-def loadimage(dir_img,dir_mask,loadsize,eval_p):
- t1 = datetime.datetime.now()
- imgnames = os.listdir(dir_img)
- # imgnames = imgnames[:100]
- random.shuffle(imgnames)
- imgnames = imgnames[:MAX_LOAD]
- print('load images:',len(imgnames))
- imgnames = (f[:-4] for f in imgnames)
- images = []
- masks = []
- for imgname in imgnames:
- img = impro.imread(dir_img+imgname+'.jpg')
- mask = impro.imread(dir_mask+imgname+'.png',mod = 'gray')
- img = impro.resize(img,loadsize)
- mask = impro.resize(mask,loadsize)
- images.append(img)
- masks.append(mask)
- train_images,train_masks = images[0:int(len(masks)*(1-eval_p))],masks[0:int(len(masks)*(1-eval_p))]
- eval_images,eval_masks = images[int(len(masks)*(1-eval_p)):len(masks)],masks[int(len(masks)*(1-eval_p)):len(masks)]
- t2 = datetime.datetime.now()
- print('load data cost time:',(t2 - t1).seconds,'s')
- return train_images,train_masks,eval_images,eval_masks
-
-
-util.makedirs(dir_checkpoint)
-print('loading data......')
-train_images,train_masks,eval_images,eval_masks = loadimage(dir_img,dir_mask,LOADSIZE,0.2)
-dataset_eval_images,dataset_eval_masks = batch_generator(eval_images,eval_masks,BATCHSIZE)
-dataset_train_images,dataset_train_masks = batch_generator(train_images,train_masks,BATCHSIZE)
-
-
-net = unet_model.UNet(n_channels = 3, n_classes = 1)
-
-
-if CONTINUE:
+def loadimage(imagepaths,maskpaths,opt,test_flag = False):
+ batchsize = len(imagepaths)
+ images = np.zeros((batchsize,3,opt.finesize,opt.finesize), dtype=np.float32)
+ masks = np.zeros((batchsize,1,opt.finesize,opt.finesize), dtype=np.float32)
+ for i in range(len(imagepaths)):
+ img = impro.resize(impro.imread(imagepaths[i]),opt.loadsize)
+ mask = impro.resize(impro.imread(maskpaths[i],mod = 'gray'),opt.loadsize)
+ img,mask = data.random_transform_image(img, mask, opt.finesize, test_flag)
+ images[i] = (img.transpose((2, 0, 1))/255.0)
+ masks[i] = (mask.reshape(1,1,opt.finesize,opt.finesize)/255.0)
+ images = Totensor(images,opt.use_gpu)
+ masks = Totensor(masks,opt.use_gpu)
+
+ return images,masks
+
+
+'''
+--------------------------checking dataset--------------------------
+'''
+print('checking dataset...')
+imagepaths = sorted(util.Traversal(dir_img))[:opt.maxload]
+maskpaths = sorted(util.Traversal(dir_mask))[:opt.maxload]
+data.shuffledata(imagepaths, maskpaths)
+if len(imagepaths) != len(maskpaths) :
+ print('dataset error!')
+ exit(0)
+img_num = len(imagepaths)
+print('find images:',img_num)
+imagepaths_train = (imagepaths[0:int(img_num*0.8)]).copy()
+maskpaths_train = (maskpaths[0:int(img_num*0.8)]).copy()
+imagepaths_eval = (imagepaths[int(img_num*0.8):]).copy()
+maskpaths_eval = (maskpaths[int(img_num*0.8):]).copy()
+
+'''
+--------------------------def network--------------------------
+'''
+if opt.model =='UNet':
+ net = unet_model.UNet(n_channels = 3, n_classes = 1)
+elif opt.model =='BiSeNet':
+ net = BiSeNet_model.BiSeNet(num_classes=1, context_path='resnet18')
+
+if opt.continuetrain:
if not os.path.isfile(os.path.join(dir_checkpoint,'last.pth')):
- CONTINUE = False
+ opt.continuetrain = False
print('can not load last.pth, training on init weight.')
-if CONTINUE:
- net.load_state_dict(torch.load(dir_checkpoint+'last.pth'))
-if use_gpu:
+if opt.continuetrain:
+ net.load_state_dict(torch.load(os.path.join(dir_checkpoint,'last.pth')))
+ f = open(os.path.join(dir_checkpoint,'epoch_log.txt'),'r')
+ opt.startepoch = int(f.read())
+ f.close()
+if opt.use_gpu:
net.cuda()
cudnn.benchmark = True
+optimizer = torch.optim.Adam(net.parameters(), lr=opt.lr)
-optimizer = torch.optim.Adam(net.parameters(), lr=LR, betas=(0.9, 0.999))
-
-criterion = nn.BCELoss()
-# criterion = nn.L1Loss()
+if opt.model =='UNet':
+ criterion = nn.BCELoss()
+elif opt.model =='BiSeNet':
+ criterion = nn.BCELoss()
+ # criterion = BiSeNet_model.DiceLoss()
+'''
+--------------------------train--------------------------
+'''
+loss_plot = {'train':[],'eval':[]}
print('begin training......')
-for epoch in range(EPOCHS):
- random_save = random.randint(0, len(dataset_train_images))
+for epoch in range(opt.startepoch,opt.maxepoch):
+ random_save = random.randint(0, int(img_num*0.8/opt.batchsize))
+ data.shuffledata(imagepaths_train, maskpaths_train)
starttime = datetime.datetime.now()
- print('Epoch {}/{}.'.format(epoch + 1, EPOCHS))
+ util.writelog(os.path.join(dir_checkpoint,'loss.txt'),'Epoch {}/{}.'.format(epoch + 1, opt.maxepoch),True)
net.train()
- if use_gpu:
+ if opt.use_gpu:
net.cuda()
epoch_loss = 0
- for i,(img,mask) in enumerate(zip(dataset_train_images,dataset_train_masks)):
- # print(epoch,i,img.shape,mask.shape)
- img,mask = Toinputshape(img, mask, FINESIZE)
- img = Totensor(img,use_gpu)
- mask = Totensor(mask,use_gpu)
+ for i in range(int(img_num*0.8/opt.batchsize)):
+ img,mask = loadimage(imagepaths_train[i*opt.batchsize:(i+1)*opt.batchsize], maskpaths_train[i*opt.batchsize:(i+1)*opt.batchsize], opt)
- mask_pred = net(img)
- loss = criterion(mask_pred, mask)
- epoch_loss += loss.item()
+ if opt.model =='UNet':
+ mask_pred = net(img)
+ loss = criterion(mask_pred, mask)
+ epoch_loss += loss.item()
+ elif opt.model =='BiSeNet':
+ mask_pred, mask_pred_sup1, mask_pred_sup2 = net(img)
+ loss1 = criterion(mask_pred, mask)
+ loss2 = criterion(mask_pred_sup1, mask)
+ loss3 = criterion(mask_pred_sup2, mask)
+ loss = loss1 + loss2 + loss3
+ epoch_loss += loss1.item()
optimizer.zero_grad()
loss.backward()
@@ -151,30 +160,47 @@ for epoch in range(EPOCHS):
data.showresult(img,mask,mask_pred,os.path.join(dir_checkpoint,'result.png'),True)
if i == random_save:
data.showresult(img,mask,mask_pred,os.path.join(dir_checkpoint,'epoch_'+str(epoch+1)+'.png'),True)
+ epoch_loss = epoch_loss/int(img_num*0.8/opt.batchsize)
+ loss_plot['train'].append(epoch_loss)
- # torch.cuda.empty_cache()
- # # net.eval()
+ #val
epoch_loss_eval = 0
with torch.no_grad():
- #net.eval()
- for i,(img,mask) in enumerate(zip(dataset_eval_images,dataset_eval_masks)):
- # print(epoch,i,img.shape,mask.shape)
- img,mask = Toinputshape(img, mask, FINESIZE,test_flag=True)
- img = Totensor(img,use_gpu)
- mask = Totensor(mask,use_gpu)
- mask_pred = net(img)
- loss = criterion(mask_pred, mask)
+ # net.eval()
+ for i in range(int(img_num*0.2/opt.batchsize)):
+ img,mask = loadimage(imagepaths_eval[i*opt.batchsize:(i+1)*opt.batchsize], maskpaths_eval[i*opt.batchsize:(i+1)*opt.batchsize], opt,test_flag=True)
+ if opt.model =='UNet':
+ mask_pred = net(img)
+ elif opt.model =='BiSeNet':
+ mask_pred, _, _ = net(img)
+ # mask_pred = net(img)
+ loss= criterion(mask_pred, mask)
epoch_loss_eval += loss.item()
+ epoch_loss_eval = epoch_loss_eval/int(img_num*0.2/opt.batchsize)
+ loss_plot['eval'].append(epoch_loss_eval)
# torch.cuda.empty_cache()
+ #savelog
endtime = datetime.datetime.now()
- print('--- Epoch train_loss: {0:.6f} eval_loss: {1:.6f} Cost time: {2:} s'.format(
- epoch_loss/len(dataset_train_images),
- epoch_loss_eval/len(dataset_eval_images),
- (endtime - starttime).seconds)),
- torch.save(net.cpu().state_dict(),dir_checkpoint+'last.pth')
-
- if (epoch+1)%SAVE_FRE == 0:
- torch.save(net.cpu().state_dict(),dir_checkpoint+'epoch'+str(epoch+1)+'.pth')
-
+ util.writelog(os.path.join(dir_checkpoint,'loss.txt'),
+ '--- Epoch train_loss: {0:.6f} eval_loss: {1:.6f} Cost time: {2:} s'.format(
+ epoch_loss,
+ epoch_loss_eval,
+ (endtime - starttime).seconds),
+ True)
+ #plot
+ plt.plot(np.linspace(opt.startepoch+1,epoch+1,epoch+1-opt.startepoch),loss_plot['train'],label='train')
+ plt.plot(np.linspace(opt.startepoch+1,epoch+1,epoch+1-opt.startepoch),loss_plot['eval'],label='eval')
+ plt.xlabel('Epoch')
+ plt.ylabel('BCELoss')
+ plt.legend(loc=1)
+ plt.savefig(os.path.join(dir_checkpoint,'loss.jpg'))
+ plt.close()
+ #save network
+ torch.save(net.cpu().state_dict(),os.path.join(dir_checkpoint,'last.pth'))
+ f = open(os.path.join(dir_checkpoint,'epoch_log.txt'),'w+')
+ f.write(str(epoch+1))
+ f.close()
+ if (epoch+1)%opt.savefreq == 0:
+ torch.save(net.cpu().state_dict(),os.path.join(dir_checkpoint,'epoch'+str(epoch+1)+'.pth'))
print('network saved.')
diff --git a/train/clean/train.py b/train/clean/train.py
index 102d08aaa635ac21ecde069eaf2a6ca822732ae7..6ec5e881aeeaa6f18ace9c0433c1a858cb54bafc 100644
--- a/train/clean/train.py
+++ b/train/clean/train.py
@@ -21,6 +21,7 @@ import torch.backends.cudnn as cudnn
'''
opt = Options()
+opt.parser.add_argument('--gpu_id',type=int,default=0, help='')
opt.parser.add_argument('--N',type=int,default=25, help='')
opt.parser.add_argument('--lr',type=float,default=0.0002, help='')
opt.parser.add_argument('--beta1',type=float,default=0.5, help='')
@@ -32,14 +33,15 @@ opt.parser.add_argument('--lambda_gan',type=float,default=1, help='')
opt.parser.add_argument('--finesize',type=int,default=256, help='')
opt.parser.add_argument('--loadsize',type=int,default=286, help='')
opt.parser.add_argument('--batchsize',type=int,default=1, help='')
-opt.parser.add_argument('--perload_num',type=int,default=16, help='')
+opt.parser.add_argument('--perload_num',type=int,default=16, help='number of images pool')
opt.parser.add_argument('--norm',type=str,default='instance', help='')
+opt.parser.add_argument('--dataset',type=str,default='./datasets/face/', help='')
opt.parser.add_argument('--maxiter',type=int,default=10000000, help='')
opt.parser.add_argument('--savefreq',type=int,default=10000, help='')
opt.parser.add_argument('--startiter',type=int,default=0, help='')
opt.parser.add_argument('--continuetrain', action='store_true', help='')
-opt.parser.add_argument('--savename',type=str,default='MosaicNet', help='')
+opt.parser.add_argument('--savename',type=str,default='face', help='')
'''
@@ -50,19 +52,27 @@ dir_checkpoint = os.path.join('checkpoints/',opt.savename)
util.makedirs(dir_checkpoint)
util.writelog(os.path.join(dir_checkpoint,'loss.txt'),
str(time.asctime(time.localtime(time.time())))+'\n'+util.opt2str(opt))
+torch.cuda.set_device(opt.gpu_id)
N = opt.N
loss_sum = [0.,0.,0.,0.]
loss_plot = [[],[]]
item_plot = []
-videos = os.listdir('./dataset')
-videos.sort()
-lengths = []
-print('check dataset...')
-for video in videos:
- video_images = os.listdir('./dataset/'+video+'/ori')
- lengths.append(len(video_images))
+# list video dir
+videonames = os.listdir(opt.dataset)
+videonames.sort()
+lengths = [];tmp = []
+print('Check dataset...')
+for video in videonames:
+ if video != 'opt.txt':
+ video_images = os.listdir(os.path.join(opt.dataset,video,'origin_image'))
+ lengths.append(len(video_images))
+ tmp.append(video)
+videonames = tmp
+video_num = len(videonames)
+#def network
+print('Init network...')
if opt.hd:
netG = videoHD_model.MosaicNet(3*N+1, 3, norm=opt.norm)
else:
@@ -71,7 +81,8 @@ loadmodel.show_paramsnumber(netG,'netG')
if opt.gan:
if opt.hd:
- netD = pix2pixHD_model.define_D(6, 64, 3, norm = opt.norm, use_sigmoid=False, num_D=2)
+ #netD = pix2pixHD_model.define_D(6, 64, 3, norm = opt.norm, use_sigmoid=False, num_D=1)
+ netD = pix2pixHD_model.define_D(6, 64, 3, norm = opt.norm, use_sigmoid=False, num_D=2,getIntermFeat=True)
else:
netD = pix2pix_model.define_D(3*2, 64, 'basic', norm = opt.norm)
netD.train()
@@ -106,36 +117,38 @@ if opt.use_gpu:
cudnn.benchmark = True
'''
---------------------------preload data--------------------------
+--------------------------preload data & data pool--------------------------
'''
-def loaddata():
- video_index = random.randint(0,len(videos)-1)
- video = videos[video_index]
+def loaddata(video_index):
+
+ videoname = videonames[video_index]
img_index = random.randint(int(N/2)+1,lengths[video_index]- int(N/2)-1)
+
input_img = np.zeros((opt.loadsize,opt.loadsize,3*N+1), dtype='uint8')
+ # this frame
+ this_mask = impro.imread(os.path.join(opt.dataset,videoname,'mask','%05d'%(img_index)+'.png'),'gray',loadsize=opt.loadsize)
+ input_img[:,:,-1] = this_mask
+ #print(os.path.join(opt.dataset,videoname,'origin_image','%05d'%(img_index)+'.jpg'))
+ ground_true = impro.imread(os.path.join(opt.dataset,videoname,'origin_image','%05d'%(img_index)+'.jpg'),loadsize=opt.loadsize)
+ mosaic_size,mod,rect_rat,father = mosaic.get_random_parameter(ground_true,this_mask)
+ # merge other frame
for i in range(0,N):
-
- img = cv2.imread('./dataset/'+video+'/mosaic/output_'+'%05d'%(img_index+i-int(N/2))+'.png')
- img = impro.resize(img,opt.loadsize)
- input_img[:,:,i*3:(i+1)*3] = img
- mask = cv2.imread('./dataset/'+video+'/mask/output_'+'%05d'%(img_index)+'.png',0)
- mask = impro.resize(mask,opt.loadsize)
- mask = impro.mask_threshold(mask,15,128)
- input_img[:,:,-1] = mask
-
- ground_true = cv2.imread('./dataset/'+video+'/ori/output_'+'%05d'%(img_index)+'.png')
- ground_true = impro.resize(ground_true,opt.loadsize)
-
+ img = impro.imread(os.path.join(opt.dataset,videoname,'origin_image','%05d'%(img_index+i-int(N/2))+'.jpg'),loadsize=opt.loadsize)
+ mask = impro.imread(os.path.join(opt.dataset,videoname,'mask','%05d'%(img_index+i-int(N/2))+'.png'),'gray',loadsize=opt.loadsize)
+ img_mosaic = mosaic.addmosaic_base(img, mask, mosaic_size,model = mod,rect_rat=rect_rat,father=father)
+ input_img[:,:,i*3:(i+1)*3] = img_mosaic
+ # to tensor
input_img,ground_true = data.random_transform_video(input_img,ground_true,opt.finesize,N)
input_img = data.im2tensor(input_img,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False,is0_1=False)
ground_true = data.im2tensor(ground_true,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False,is0_1=False)
return input_img,ground_true
-print('preloading data, please wait 5s...')
+print('Preloading data, please wait...')
if opt.perload_num <= opt.batchsize:
opt.perload_num = opt.batchsize*2
+#data pool
input_imgs = torch.rand(opt.perload_num,N*3+1,opt.finesize,opt.finesize).cuda()
ground_trues = torch.rand(opt.perload_num,3,opt.finesize,opt.finesize).cuda()
load_cnt = 0
@@ -144,14 +157,15 @@ def preload():
global load_cnt
while 1:
try:
+ video_index = random.randint(0,video_num-1)
ran = random.randint(0, opt.perload_num-1)
- input_imgs[ran],ground_trues[ran] = loaddata()
+ input_imgs[ran],ground_trues[ran] = loaddata(video_index)
load_cnt += 1
# time.sleep(0.1)
except Exception as e:
print("error:",e)
import threading
-t = threading.Thread(target=preload,args=()) #t为新创建的线程
+t = threading.Thread(target=preload,args=())
t.daemon = True
t.start()
time_start=time.time()
diff --git a/util/data.py b/util/data.py
index 567c397cf7b804cca388a74c93e9e7014aff4b44..1ffb0e1376aa21cf1ed178db7ac0585cb7bfe1bd 100755
--- a/util/data.py
+++ b/util/data.py
@@ -3,7 +3,7 @@ import numpy as np
import torch
import torchvision.transforms as transforms
import cv2
-from .image_processing import color_adjust
+from .image_processing import color_adjust,dctblur
transform = transforms.Compose([
transforms.ToTensor(),
@@ -61,6 +61,11 @@ def im2tensor(image_numpy, imtype=np.uint8, gray=False,bgr2rgb = True, reshape =
image_tensor = image_tensor.cuda()
return image_tensor
+def shuffledata(data,target):
+ state = np.random.get_state()
+ np.random.shuffle(data)
+ np.random.set_state(state)
+ np.random.shuffle(target)
def random_transform_video(src,target,finesize,N):
@@ -78,8 +83,8 @@ def random_transform_video(src,target,finesize,N):
target = target[:,::-1,:]
#random color
- alpha = random.uniform(-0.3,0.3)
- beta = random.uniform(-0.2,0.2)
+ alpha = random.uniform(-0.1,0.1)
+ beta = random.uniform(-0.1,0.1)
b = random.uniform(-0.05,0.05)
g = random.uniform(-0.05,0.05)
r = random.uniform(-0.05,0.05)
@@ -87,39 +92,54 @@ def random_transform_video(src,target,finesize,N):
src[:,:,i*3:(i+1)*3] = color_adjust(src[:,:,i*3:(i+1)*3],alpha,beta,b,g,r)
target = color_adjust(target,alpha,beta,b,g,r)
- # random_num = 15
- # bright = random.randint(-random_num*2,random_num*2)
- # for i in range(N*3): src[:,:,i]=np.clip(src[:,:,i].astype('int')+bright,0,255).astype('uint8')
- # for i in range(3): target[:,:,i]=np.clip(target[:,:,i].astype('int')+bright,0,255).astype('uint8')
-
- return src,target
+ #random blur
+ if random.random()<0.5:
+ interpolations = [cv2.INTER_LINEAR,cv2.INTER_CUBIC,cv2.INTER_LANCZOS4]
+ size_ran = random.uniform(0.7,1.5)
+ interpolation_up = interpolations[random.randint(0,2)]
+ interpolation_down =interpolations[random.randint(0,2)]
+ tmp = cv2.resize(src[:,:,:3*N], (int(finesize*size_ran),int(finesize*size_ran)),interpolation=interpolation_up)
+ src[:,:,:3*N] = cv2.resize(tmp, (finesize,finesize),interpolation=interpolation_down)
-def random_transform_image(img,mask,finesize,test_flag = False):
+ tmp = cv2.resize(target, (int(finesize*size_ran),int(finesize*size_ran)),interpolation=interpolation_up)
+ target = cv2.resize(tmp, (finesize,finesize),interpolation=interpolation_down)
- # randomsize = int(finesize*(1.2+0.2*random.random())+2)
+ return src,target
+def random_transform_single(img,out_shape):
+ out_h,out_w = out_shape
+ img = cv2.resize(img,(int(out_w*random.uniform(1.1, 1.5)),int(out_h*random.uniform(1.1, 1.5))))
h,w = img.shape[:2]
- loadsize = min((h,w))
- a = (float(h)/float(w))*random.uniform(0.9, 1.1)
-
- if h0.5:
- size_ran = random.uniform(0.5,1.5)
- img = cv2.resize(img, (int(finesize*size_ran),int(finesize*size_ran)))
- img = cv2.resize(img, (finesize,finesize))
- #img = cv2.blur(img, (random.randint(1,3), random.randint(1,3)))
+ if random.random()<0.5:
+ img = dctblur(img,random.randint(1,15))
+
+ # interpolations = [cv2.INTER_LINEAR,cv2.INTER_CUBIC,cv2.INTER_LANCZOS4]
+ # size_ran = random.uniform(0.7,1.5)
+ # img = cv2.resize(img, (int(finesize*size_ran),int(finesize*size_ran)),interpolation=interpolations[random.randint(0,2)])
+ # img = cv2.resize(img, (finesize,finesize),interpolation=interpolations[random.randint(0,2)])
+
+ #check shape
+ if img.shape[0]!= finesize or img.shape[1]!= finesize or mask.shape[0]!= finesize or mask.shape[1]!= finesize:
+ img = cv2.resize(img,(finesize,finesize))
+ mask = cv2.resize(mask,(finesize,finesize))
+ print('warning! shape error.')
return img,mask
def showresult(img1,img2,img3,name,is0_1 = False):
diff --git a/util/ffmpeg.py b/util/ffmpeg.py
index f91f888aab84f285487f6033e6b98ee0366a6ea1..8baca401a7af3a4027b761547f3c6d57666b77c4 100755
--- a/util/ffmpeg.py
+++ b/util/ffmpeg.py
@@ -2,11 +2,18 @@ import os,json
# ffmpeg 3.4.6
-def video2image(videopath,imagepath,fps=0):
- if fps == 0:
- os.system('ffmpeg -i "'+videopath+'" -f image2 '+imagepath)
+def video2image(videopath,imagepath,fps=0,start_time=0,last_time=0):
+ if start_time == 0:
+ if fps == 0:
+ os.system('ffmpeg -i "'+videopath+'" -f image2 '+'-q:v -0 '+imagepath)
+ else:
+ os.system('ffmpeg -i "'+videopath+'" -r '+str(fps)+' -f image2 '+'-q:v -0 '+imagepath)
else:
- os.system('ffmpeg -i "'+videopath+'" -r '+str(fps)+' -f image2 '+imagepath)
+ if fps == 0:
+ os.system('ffmpeg -ss '+start_time+' -t '+last_time+' -i "'+videopath+'" -f image2 '+'-q:v -0 '+imagepath)
+ else:
+ os.system('ffmpeg -ss '+start_time+' -t '+last_time+' -i "'+videopath+'" -r '+str(fps)+' -f image2 '+'-q:v -0 '+imagepath)
+
def video2voice(videopath,voicepath):
os.system('ffmpeg -i "'+videopath+'" -f mp3 '+voicepath)
@@ -53,4 +60,4 @@ def continuous_screenshot(videopath,savedir,fps):
fps: save how many images per second
'''
videoname = os.path.splitext(os.path.basename(videopath))[0]
- os.system('ffmpeg -i "'+videopath+'" -vf fps='+str(fps)+' '+savedir+'/'+videoname+'_%05d.jpg')
+ os.system('ffmpeg -i "'+videopath+'" -vf fps='+str(fps)+' -q:v -0 '+savedir+'/'+videoname+'_%06d.jpg')
diff --git a/util/image_processing.py b/util/image_processing.py
index 8c6455f79831a2eba224ec0bf34351810eae3d75..4ba9c78b085c8c41391ff053122287a438f1b0fa 100755
--- a/util/image_processing.py
+++ b/util/image_processing.py
@@ -3,13 +3,24 @@ import numpy as np
import random
import platform
+
system_type = 'Linux'
if 'Windows' in platform.platform():
system_type = 'Windows'
-def imread(file_path,mod = 'normal'):
+DCT_Q = np.array([[8,16,19,22,26,27,29,34],
+ [16,16,22,24,27,29,34,37],
+ [19,22,26,27,29,34,34,38],
+ [22,22,26,27,29,34,37,40],
+ [22,26,27,29,32,35,40,48],
+ [26,27,29,32,35,40,48,58],
+ [26,27,29,34,38,46,56,59],
+ [27,29,35,38,46,56,69,83]])
+
+def imread(file_path,mod = 'normal',loadsize = 0):
'''
- mod = 'normal' | 'gray' | 'all'
+ mod: 'normal' | 'gray' | 'all'
+ loadsize: 0->original
'''
if system_type == 'Linux':
if mod == 'normal':
@@ -26,6 +37,9 @@ def imread(file_path,mod = 'normal'):
img = cv2.imdecode(np.fromfile(file_path,dtype=np.uint8),0)
else:
img = cv2.imdecode(np.fromfile(file_path,dtype=np.uint8),-1)
+
+ if loadsize != 0:
+ img = resize(img, loadsize, interpolation=cv2.INTER_CUBIC)
return img
@@ -40,6 +54,13 @@ def imwrite(file_path,img):
cv2.imencode('.jpg', img)[1].tofile(file_path)
def resize(img,size,interpolation=cv2.INTER_LINEAR):
+ '''
+ cv2.INTER_NEAREST 最邻近插值点法
+ cv2.INTER_LINEAR 双线性插值法
+ cv2.INTER_AREA 邻域像素再取样插补
+ cv2.INTER_CUBIC 双立方插补,4*4大小的补点
+ cv2.INTER_LANCZOS4 8x8像素邻域的Lanczos插值
+ '''
h, w = img.shape[:2]
if np.min((w,h)) ==size:
return img
@@ -55,8 +76,6 @@ def resize_like(img,img_like):
return img
def ch_one2three(img):
- #zeros = np.zeros(img.shape[:2], dtype = "uint8")
- # ret,thresh = cv2.threshold(img,127,255,cv2.THRESH_BINARY)
res = cv2.merge([img, img, img])
return res
@@ -78,11 +97,11 @@ def color_adjust(img,alpha=1,beta=0,b=0,g=0,r=0,ran = False):
'''
img = img.astype('float')
if ran:
- alpha = random.uniform(-0.2,0.2)
- beta = random.uniform(-0.2,0.2)
- b = random.uniform(-0.1,0.1)
- g = random.uniform(-0.1,0.1)
- r = random.uniform(-0.1,0.1)
+ alpha = random.uniform(-0.1,0.1)
+ beta = random.uniform(-0.1,0.1)
+ b = random.uniform(-0.05,0.05)
+ g = random.uniform(-0.05,0.05)
+ r = random.uniform(-0.05,0.05)
img = (1+alpha)*img+255.0*beta
bgr = [b*255.0,g*255.0,r*255.0]
for i in range(3): img[:,:,i]=img[:,:,i]+bgr[i]
@@ -98,14 +117,6 @@ def makedataset(target_image,orgin_image):
img[0:256,256:512] = orgin_image[0:256,int(w/2-256/2):int(w/2+256/2)]
return img
-def image2folat(img,ch):
- size=img.shape[0]
- if ch == 1:
- img = (img[:,:,0].reshape(1,size,size)/255.0).astype(np.float32)
- else:
- img = (img.transpose((2, 0, 1))/255.0).astype(np.float32)
- return img
-
def spiltimage(img,size = 128):
h, w = img.shape[:2]
# size = min(h,w)
@@ -133,6 +144,34 @@ def mergeimage(img1,img2,orgin_image,size = 128):
result_img = cv2.add(new_img1,new_img2)
return result_img
+def block_dct_and_idct(g,QQF):
+ T = cv2.dct(g)
+ IT = np.round(cv2.idct(np.round(np.round(16.0*T/QQF)*QQF/16)))
+ return IT
+
+def image_dct_and_idct(I,QF):
+ h,w = I.shape
+ QQF = DCT_Q*QF
+ for i in range(int(h/8)):
+ for j in range(int(w/8)):
+ I[i*8:(i+1)*8,j*8:(j+1)*8] = block_dct_and_idct(I[i*8:(i+1)*8,j*8:(j+1)*8],QQF)
+ return I
+
+def dctblur(img,Q):
+ '''
+ Q: 1~20, 1->best
+ '''
+ h,w = img.shape[:2]
+ img[:8*int(h/8),:8*int(w/8)]
+ img = img.astype(np.float32)
+ if img.ndim == 2:
+ img = image_dct_and_idct(img, Q)
+ if img.ndim == 3:
+ h,w,ch = img.shape
+ for i in range(ch):
+ img[:,:,i] = image_dct_and_idct(img[:,:,i], Q)
+ return (np.clip(img,0,255)).astype(np.uint8)
+
def find_mostlikely_ROI(mask):
contours,hierarchy=cv2.findContours(mask, cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)
if len(contours)>0:
@@ -199,8 +238,20 @@ def mask_area(mask):
return area
-def replace_mosaic(img_origin,img_fake,x,y,size,no_father):
- img_fake = resize(img_fake,size*2,interpolation=cv2.INTER_LANCZOS4)
+def Q_lapulase(resImg):
+ '''
+ Evaluate image quality
+ score > 20 normal
+ score > 50 clear
+ '''
+ img2gray = cv2.cvtColor(resImg, cv2.COLOR_BGR2GRAY)
+ img2gray = resize(img2gray,512)
+ res = cv2.Laplacian(img2gray, cv2.CV_64F)
+ score = res.var()
+ return score
+
+def replace_mosaic(img_origin,img_fake,mask,x,y,size,no_father):
+ img_fake = cv2.resize(img_fake,(size*2,size*2),interpolation=cv2.INTER_LANCZOS4)
if no_father:
img_origin[y-size:y+size,x-size:x+size]=img_fake
img_result = img_origin
@@ -212,13 +263,20 @@ def replace_mosaic(img_origin,img_fake,x,y,size,no_father):
#eclosion
eclosion_num = int(size/5)
entad = int(eclosion_num/2+2)
- mask = np.zeros(img_origin.shape, dtype='uint8')
- mask = cv2.rectangle(mask,(x-size+entad,y-size+entad),(x+size-entad,y+size-entad),(255,255,255),-1)
+
+ # mask = np.zeros(img_origin.shape, dtype='uint8')
+ # mask = cv2.rectangle(mask,(x-size+entad,y-size+entad),(x+size-entad,y+size-entad),(255,255,255),-1)
+ mask = cv2.resize(mask,(img_origin.shape[1],img_origin.shape[0]))
+ mask = ch_one2three(mask)
+
mask = (cv2.blur(mask, (eclosion_num, eclosion_num)))
- mask = mask/255.0
+ mask_tmp = np.zeros_like(mask)
+ mask_tmp[y-size:y+size,x-size:x+size] = mask[y-size:y+size,x-size:x+size]# Fix edge overflow
+ mask = mask_tmp/255.0
img_tmp = np.zeros(img_origin.shape)
img_tmp[y-size:y+size,x-size:x+size]=img_fake
img_result = img_origin.copy()
img_result = (img_origin*(1-mask)+img_tmp*mask).astype('uint8')
+
return img_result
\ No newline at end of file
diff --git a/util/mosaic.py b/util/mosaic.py
index fbeed3804085350351932f1c8e410bc96948b7b8..936227b39430b49bf90013d6157d86d7de01ec6f 100755
--- a/util/mosaic.py
+++ b/util/mosaic.py
@@ -10,10 +10,19 @@ def addmosaic(img,mask,opt):
elif opt.mosaic_size == 0:
img = addmosaic_autosize(img, mask, opt.mosaic_mod)
else:
- img = addmosaic_normal(img,mask,opt.mosaic_size,opt.output_size,model = opt.mosaic_mod)
+ img = addmosaic_base(img,mask,opt.mosaic_size,opt.output_size,model = opt.mosaic_mod)
return img
-def addmosaic_normal(img,mask,n,out_size = 0,model = 'squa_avg',rect_rat = 1.6):
+def addmosaic_base(img,mask,n,out_size = 0,model = 'squa_avg',rect_rat = 1.6,father=0):
+ '''
+ img: input image
+ mask: input mask
+ n: mosaic size
+ out_size: output size 0->original
+ model : squa_avg squa_mid squa_random squa_avg_circle_edge rect_avg
+ rect_rat: if model==rect_avg , mosaic w/h=rect_rat
+ father : father size, -1->no 0->auto
+ '''
n = int(n)
if out_size:
img = resize(img,out_size)
@@ -44,9 +53,9 @@ def addmosaic_normal(img,mask,n,out_size = 0,model = 'squa_avg',rect_rat = 1.6):
for j in range(int(w/n)):
img_mosaic[i*n:(i+1)*n,j*n:(j+1)*n,:]=img[i*n:(i+1)*n,j*n:(j+1)*n,:].mean(0).mean(0)
mask = cv2.threshold(mask,127,255,cv2.THRESH_BINARY)[1]
- mask = ch_one2three(mask)
- mask_inv = cv2.bitwise_not(mask)
- imgroi1 = cv2.bitwise_and(mask,img_mosaic)
+ _mask = ch_one2three(mask)
+ mask_inv = cv2.bitwise_not(_mask)
+ imgroi1 = cv2.bitwise_and(_mask,img_mosaic)
imgroi2 = cv2.bitwise_and(mask_inv,img)
img_mosaic = cv2.add(imgroi1,imgroi2)
@@ -58,12 +67,21 @@ def addmosaic_normal(img,mask,n,out_size = 0,model = 'squa_avg',rect_rat = 1.6):
if mask[int(i*n_h+n_h/2),int(j*n_w+n_w/2)] == 255:
img_mosaic[i*n_h:(i+1)*n_h,j*n_w:(j+1)*n_w,:]=img[i*n_h:(i+1)*n_h,j*n_w:(j+1)*n_w,:].mean(0).mean(0)
+ if father != -1:
+ if father==0:
+ mask = (cv2.blur(mask, (n, n)))
+ else:
+ mask = (cv2.blur(mask, (father, father)))
+ mask = ch_one2three(mask)/255.0
+ img_mosaic = (img*(1-mask)+img_mosaic*mask).astype('uint8')
+
return img_mosaic
def get_autosize(img,mask,area_type = 'normal'):
h,w = img.shape[:2]
- mask = cv2.resize(mask,(w,h))
- alpha = np.min((w,h))/512
+ size = np.min([h,w])
+ mask = resize(mask,size)
+ alpha = size/512
try:
if area_type == 'normal':
area = mask_area(mask)
@@ -85,66 +103,32 @@ def get_autosize(img,mask,area_type = 'normal'):
pass
return size
-def addmosaic_autosize(img,mask,model,area_type = 'normal'):
- h,w = img.shape[:2]
- mask = cv2.resize(mask,(w,h))
- alpha = np.min((w,h))/512
- try:
- if area_type == 'normal':
- area = mask_area(mask)
- elif area_type == 'bounding':
- w,h = cv2.boundingRect(mask)[2:]
- area = w*h
- except:
- area = 0
- area = area/(alpha*alpha)
- if area>50000:
- img_mosaic = addmosaic_normal(img,mask,alpha*((area-50000)/50000+12),model = model)
- elif 2000050000:
- img_mosaic = random_mod(img,mask,alpha*random.uniform(8,30)) #16,30
- elif 20000