Optimized ROI locations in video

d790fbdb · hypox64 · e6744942 · d790fbdb · d790fbdb · d790fbdb
11 changed file
--- a/README.md
+++ b/README.md
@@ -19,9 +19,8 @@ Download this version via [[Google Drive]](https://drive.google.com/open?id=1LTE

 Attentions:<br>
 - Require Windows_x86_64, Windows10 is better.<br>
- File path cannot contain spaces (" ").<br>
 - Run time depends on computer performance.<br>
- If output video cannot be played, you can try it with [potplayer](https://daumpotplayer.com/download/).
+- If output video cannot be played, you can try with [potplayer](https://daumpotplayer.com/download/).

 ### Run from source
 #### Prerequisites

--- a/clean_cache.py
+++ b/clean_cache.py
@@ -18,6 +18,7 @@ def Traversal(filedir):
            dir_list.append(os.path.join(root,dir))
            Traversal(dir)
    return file_list,dir_list
+
 def is_img(path):
    ext = os.path.splitext(path)[1]
    ext = ext.lower()
@@ -38,13 +39,12 @@ file_list,dir_list = Traversal('./')
 for file in file_list:
    if ('tmp' in file) | ('pth' in file)|('pycache' in file) | is_video(file) | is_img(file):
        if os.path.exists(file):
-            os.remove(file)
-            print('remove file:',file)
+            if 'imgs' not in file:
+                os.remove(file)
+                print('remove file:',file)

 for dir in dir_list:
    if ('tmp'in dir)|('pycache'in dir):
        if os.path.exists(dir):
            shutil.rmtree(dir)
-
-            # os.rmdir(dir)
            print('remove dir:',dir)
\ No newline at end of file
--- a/deepmosaic.py
+++ b/deepmosaic.py
@@ -7,7 +7,7 @@ import cv2
 import torch

 from models import runmodel,loadmodel
-from util import mosaic,util,ffmpeg
+from util import mosaic,util,ffmpeg,filt
 from util import image_processing as impro
 from options import Options

@@ -31,17 +31,34 @@ if opt.mode == 'add':
        ffmpeg.video2image(path,'./tmp/video2image/output_%05d.'+opt.tempimage_type)
        imagepaths=os.listdir('./tmp/video2image')
        imagepaths.sort()
+
+        # get position
+        positions = []
        for imagepath in imagepaths:
            imagepath = os.path.join('./tmp/video2image',imagepath)
-            print('Add Mosaic:',imagepath)
+            print('Find ROI location:',imagepath)
            img = impro.imread(imagepath)
-            img = runmodel.add_mosaic_to_image(img,net,opt)
+            mask,x,y,area = runmodel.get_ROI_position(img,net,opt)
+            positions.append([x,y,area])      
+            cv2.imwrite(os.path.join('./tmp/ROI_mask',
+                                      os.path.basename(imagepath)),mask)
+        print('Optimized ROI locations...')
+        mask_index = filt.position_medfilt(np.array(positions), 7)
+
+        # add mosaic
+        print('Add mosaic to images...')
+        for i in range(len(imagepaths)):
+            mask_path = os.path.join('./tmp/ROI_mask',imagepaths[mask_index[i]])
+            mask = impro.imread(mask_path)
+            img = impro.imread(os.path.join('./tmp/video2image',imagepaths[i]))
+            img = mosaic.addmosaic(img, mask, opt)
            cv2.imwrite(os.path.join('./tmp/addmosaic_image',
-                                        os.path.basename(imagepath)),img)
+                                        os.path.basename(imagepaths[i])),img)
+
        ffmpeg.image2video( fps,
                            './tmp/addmosaic_image/output_%05d.'+opt.tempimage_type,
                            './tmp/voice_tmp.mp3',
-                             os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_AddMosaic.mp4'))
+                             os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_add.mp4'))

 elif opt.mode == 'clean':
    netG = loadmodel.pix2pix(opt)
@@ -66,16 +83,18 @@ elif opt.mode == 'clean':
        positions = []
        imagepaths=os.listdir('./tmp/video2image')
        imagepaths.sort()
+
+        # get position
        for imagepath in imagepaths:
            imagepath=os.path.join('./tmp/video2image',imagepath)
            img_origin = impro.imread(imagepath)
            x,y,size = runmodel.get_mosaic_position(img_origin,net_mosaic_pos,opt)
            positions.append([x,y,size])
            print('Find Positions:',imagepath)
-        
        positions =np.array(positions)
-        for i in range(3):positions[:,i] = impro.medfilt(positions[:,i],opt.medfilt_num)
+        for i in range(3):positions[:,i] = filt.medfilt(positions[:,i],opt.medfilt_num)

+        # clean mosaic
        for i,imagepath in enumerate(imagepaths,0):
            imagepath=os.path.join('./tmp/video2image',imagepath)
            x,y,size = positions[i][0],positions[i][1],positions[i][2]
@@ -90,6 +109,6 @@ elif opt.mode == 'clean':
        ffmpeg.image2video( fps,
                    './tmp/replace_mosaic/output_%05d.'+opt.tempimage_type,
                    './tmp/voice_tmp.mp3',
-                     os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_CleanMosaic.mp4'))                      
+                     os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_clean.mp4'))                      

 util.clean_tempfiles(tmp_init = False)
\ No newline at end of file
--- a/models/loadmodel.py
+++ b/models/loadmodel.py
@@ -3,7 +3,7 @@ from .pix2pix_model import *
 from .unet_model import UNet

 def pix2pix(opt):
-    print(opt.model_path,opt.netG)
+    # print(opt.model_path,opt.netG)
    netG = define_G(3, 3, 64, opt.netG, norm='batch',use_dropout=True, init_type='normal', gpu_ids=[])

    netG.load_state_dict(torch.load(opt.model_path))

--- a/models/runmodel.py
+++ b/models/runmodel.py
@@ -32,13 +32,11 @@ def run_pix2pix(img,net,size = 128,use_gpu = True):
    img_fake = data.tensor2im(img_fake)
    return img_fake

-
-#find mosaic position in image and add mosaic to this image
-def add_mosaic_to_image(img,net,opt):
+def get_ROI_position(img,net,opt):
    mask = run_unet_rectim(img,net,use_gpu = opt.use_gpu)
    mask = impro.mask_threshold(mask,opt.mask_extend,opt.mask_threshold)
-    img = mosaic.addmosaic(img,mask,opt.mosaic_size,opt.output_size,model = opt.mosaic_mod)
-    return img
+    x,y,halfsize,area = impro.boundingSquare(mask, 1)
+    return mask,x,y,area


 def get_mosaic_position(img_origin,net_mosaic_pos,opt):

--- a/options.py
+++ b/options.py
@@ -17,9 +17,9 @@ class Options():
        self.parser.add_argument('--tempimage_type', type=str, default='png',help='type of temp image, png | jpg, png is better but occupy more storage space')

        #AddMosaic
-        self.parser.add_argument('--mosaic_mod', type=str, default='squa_avg',help='type of mosaic -> squa_avg | squa_random | squa_avg_circle_edge | rect_avg')
-        self.parser.add_argument('--mosaic_size', type=int, default=30,help='mosaic size')
-        self.parser.add_argument('--mask_extend', type=int, default=20,help='more mosaic area')
+        self.parser.add_argument('--mosaic_mod', type=str, default='squa_avg',help='type of mosaic -> squa_avg | squa_random | squa_avg_circle_edge | rect_avg | random')
+        self.parser.add_argument('--mosaic_size', type=int, default=0,help='mosaic size,if 0 auto size')
+        self.parser.add_argument('--mask_extend', type=int, default=10,help='more mosaic area')
        self.parser.add_argument('--mask_threshold', type=int, default=64,help='threshold of recognize mosaic position 0~255')
        self.parser.add_argument('--output_size', type=int, default=0,help='size of output file,if 0 -> origin')
        
@@ -45,6 +45,6 @@ class Options():
        if self.opt.mosaic_position_model_path == 'auto':
            _path = os.path.join(os.path.split(self.opt.model_path)[0],'mosaic_position.pth')
            self.opt.mosaic_position_model_path = _path
-            print(self.opt.mosaic_position_model_path)
+            # print(self.opt.mosaic_position_model_path)

        return self.opt
\ No newline at end of file
--- a/util/ffmpeg.py
+++ b/util/ffmpeg.py
 import os,json

 def video2image(videopath,imagepath):
-    os.system('ffmpeg -i '+videopath+' -f image2 '+imagepath)
+    os.system('ffmpeg -i "'+videopath+'" -f image2 '+imagepath)

 def video2voice(videopath,voicepath):
    os.system('ffmpeg -i '+videopath+' -f mp3 '+voicepath)
@@ -12,11 +12,11 @@ def image2video(fps,imagepath,voicepath,videopath):
    os.system('ffmpeg -i ./tmp/video_tmp.mp4 -i '+voicepath+' -vcodec copy -acodec copy '+videopath)

 def get_video_infos(videopath):
-    cmd_str =  'ffprobe -v quiet -print_format json -show_format -show_streams -i "' +  videopath  + '"'  
+    cmd_str =  'ffprobe -v quiet -print_format json -show_format -show_streams -i "' + videopath + '"'  
    out_string = os.popen(cmd_str).read()
    infos = json.loads(out_string)
    fps = eval(infos['streams'][0]['avg_frame_rate'])
-    endtime = float(infos['streams'][0]['duration'])
+    endtime = float(infos['format']['duration'])
    width = int(infos['streams'][0]['width'])
    height = int(infos['streams'][0]['height'])
    return fps,endtime,width,height

--- a/util/filt.py
+++ b/util/filt.py
+import numpy as np
+
+def less_zero(arr,num = 7):
+    index = np.linspace(0,len(arr)-1,len(arr),dtype='int')
+    cnt = 0
+    for i in range(2,len(arr)-2):
+        if arr[i] != 0:
+            arr[i] = arr[i]
+            if cnt != 0:
+                if cnt <= num*2:
+                    arr[i-cnt:round(i-cnt/2)] = arr[i-cnt-1-2]
+                    arr[round(i-cnt/2):i] = arr[i+2]
+                    index[i-cnt:round(i-cnt/2)] = i-cnt-1-2
+                    index[round(i-cnt/2):i] = i+2
+                else:
+                    arr[i-cnt:i-cnt+num] = arr[i-cnt-1-2]
+                    arr[i-num:i] = arr[i+2] 
+                    index[i-cnt:i-cnt+num] = i-cnt-1-2
+                    index[i-num:i] = i+2
+                cnt = 0
+        else:
+            cnt += 1
+    return arr,index
+
+def medfilt(data,window):
+    if window%2 == 0 or window < 0:
+        print('Error: the medfilt window must be even number')
+        exit(0)
+    pad = int((window-1)/2)
+    pad_data = np.zeros(len(data)+window-1, dtype = type(data[0]))
+    result = np.zeros(len(data),dtype = type(data[0]))
+    pad_data[pad:pad+len(data)]=data[:]
+    for i in range(len(data)):
+        result[i] = np.median(pad_data[i:i+window])
+    return result
+
+def position_medfilt(positions,window):
+
+    x,mask_index = less_zero(positions[:,0],window)
+    y = less_zero(positions[:,1],window)[0]
+    area = less_zero(positions[:,2],window)[0]
+    x_filt = medfilt(x, window)
+    y_filt = medfilt(y, window)
+    area_filt = medfilt(area, window)
+    cnt = 0
+    for i in range(1,len(x)):
+        if 0.8<x_filt[i]/(x[i]+1)<1.2 and 0.8<y_filt[i]/(y[i]+1)<1.2 and 0.6<area_filt[i]/(area[i]+1)<1.4:
+            mask_index[i] = mask_index[i]
+            if cnt != 0:
+                mask_index[i-cnt:round(i-cnt/2)] = mask_index[i-cnt]
+                mask_index[round(i-cnt/2):i] = mask_index[i] 
+                cnt = 0
+        else:
+            mask_index[i] = mask_index[i-1]
+            cnt += 1
+    return mask_index
+
+# def main():
+#     import matplotlib.pyplot as plt
+#     positions = np.load('../test_pos.npy')
+#     positions_new = np.load('../test_pos.npy')
+#     print(positions.shape)
+#     mask_index = position_medfilt(positions.copy(), 7)
+#     x = positions_new[2]
+
+#     x_new = []
+#     for i in range(len(x)):
+#         x_new.append(x[mask_index[i]])
+
+#     plt.subplot(211)
+#     plt.plot(x)
+#     plt.subplot(212)
+#     plt.plot(x_new)
+#     plt.show()
+
+# if __name__ == '__main__':
+#     main()
\ No newline at end of file
--- a/util/image_processing.py
+++ b/util/image_processing.py
@@ -17,19 +17,6 @@ def resize(img,size):
        res = cv2.resize(img,(size, int(size*h/w)))
    return res

-
-def medfilt(data,window):
-    if window%2 == 0 or window < 0:
-        print('Error: the medfilt window must be even number')
-        exit(0)
-    pad = int((window-1)/2)
-    pad_data = np.zeros(len(data)+window-1, dtype = type(data[0]))
-    result = np.zeros(len(data),dtype = type(data[0]))
-    pad_data[pad:pad+len(data)]=data[:]
-    for i in range(len(data)):
-        result[i] = np.median(pad_data[i:i+window])
-    return result
-
 def ch_one2three(img):
    #zeros = np.zeros(img.shape[:2], dtype = "uint8")
    # ret,thresh = cv2.threshold(img,127,255,cv2.THRESH_BINARY)

--- a/util/mosaic.py
+++ b/util/mosaic.py
@@ -4,8 +4,16 @@ import os
 import random
 from .image_processing import resize,ch_one2three,mask_area

+def addmosaic(img,mask,opt):
+    if opt.mosaic_mod == 'random':
+        img = addmosaic_random(img,mask)
+    elif opt.mosaic_size == 0:
+        img = addmosaic_autosize(img, mask, opt.mosaic_mod)
+    else:
+        img = addmosaic_normal(img,mask,opt.mosaic_size,opt.output_size,model = opt.mosaic_mod)
+    return img

-def addmosaic(img,mask,n,out_size = 0,model = 'squa_avg'):
+def addmosaic_normal(img,mask,n,out_size = 0,model = 'squa_avg'):
    n = int(n)
    if out_size:
        img = resize(img,out_size)      
@@ -47,19 +55,28 @@ def addmosaic(img,mask,n,out_size = 0,model = 'squa_avg'):
    
    return img_mosaic

-def random_mosaic_mod(img,mask,n):
-    ran=random.random()
-    if ran < 0.1:
-        img = addmosaic(img,mask,n,model = 'squa_random')
-    if 0.1 <= ran < 0.3:
-        img = addmosaic(img,mask,n,model = 'squa_avg')
-    elif 0.3 <= ran <0.5:
-        img = addmosaic(img,mask,n,model = 'squa_avg_circle_edge')
+def addmosaic_autosize(img,mask,model):
+    h,w = img.shape[:2]
+    mask = cv2.resize(mask,(w,h))
+    alpha = np.min((w,h))/512
+    try:
+        area = mask_area(mask)
+    except:
+        area = 0
+    area = area/(alpha*alpha)
+    if area>50000:
+        img_mosaic = addmosaic_normal(img,mask,alpha*((area-50000)/50000+16),model = model)
+    elif 20000<area<=50000:
+        img_mosaic = addmosaic_normal(img,mask,alpha*((area-20000)/30000+12),model = model)
+    elif 5000<area<=20000:
+        img_mosaic = addmosaic_normal(img,mask,alpha*((area-5000)/20000+8),model = model)
+    elif 0<=area<=5000:
+        img_mosaic = addmosaic_normal(img,mask,alpha*((area-0)/5000+4),model = model)
    else:
-        img = addmosaic(img,mask,n,model = 'rect_avg')
-    return img
+        pass
+    return img_mosaic

-def random_mosaic(img,mask):
+def addmosaic_random(img,mask):
    # img = resize(img,512)
    h,w = img.shape[:2]
    mask = cv2.resize(mask,(w,h))
@@ -71,13 +88,25 @@ def random_mosaic(img,mask):
        area = 0
    area = area/(alpha*alpha)
    if area>50000:
-        img_mosaic = random_mosaic_mod(img,mask,alpha*random.uniform(16,28))
+        img_mosaic = random_mod(img,mask,alpha*random.uniform(16,28))
    elif 20000<area<=50000:
-        img_mosaic = random_mosaic_mod(img,mask,alpha*random.uniform(12,20))
+        img_mosaic = random_mod(img,mask,alpha*random.uniform(12,20))
    elif 5000<area<=20000:
-        img_mosaic = random_mosaic_mod(img,mask,alpha*random.uniform(8,15))
+        img_mosaic = random_mod(img,mask,alpha*random.uniform(8,15))
    elif 0<=area<=5000:
-        img_mosaic = random_mosaic_mod(img,mask,alpha*random.uniform(4,10))
+        img_mosaic = random_mod(img,mask,alpha*random.uniform(4,10))
    else:
        pass
    return img_mosaic
+
+def random_mod(img,mask,n):
+    ran=random.random()
+    if ran < 0.1:
+        img = addmosaic_normal(img,mask,n,model = 'squa_random')
+    if 0.1 <= ran < 0.3:
+        img = addmosaic_normal(img,mask,n,model = 'squa_avg')
+    elif 0.3 <= ran <0.5:
+        img = addmosaic_normal(img,mask,n,model = 'squa_avg_circle_edge')
+    else:
+        img = addmosaic_normal(img,mask,n,model = 'rect_avg')
+    return img
\ No newline at end of file
--- a/util/util.py
+++ b/util/util.py
@@ -29,7 +29,6 @@ def  writelog(path,log):
    f = open(path,'a+')
    f.write(log+'\n')

-
 def clean_tempfiles(tmp_init=True):
    if os.path.isdir('./tmp'):   
        shutil.rmtree('./tmp')
@@ -39,6 +38,7 @@ def clean_tempfiles(tmp_init=True):
        os.makedirs('./tmp/addmosaic_image')
        os.makedirs('./tmp/mosaic_crop')
        os.makedirs('./tmp/replace_mosaic')
+        os.makedirs('./tmp/ROI_mask')

 def file_init(opt):
    if not os.path.isdir(opt.result_dir):