iMet-get-aspect-ratio

From: https://www.kaggle.com/seefun/imet-get-aspect-ratio

Author: seefun

In [1]:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np 
import pandas as pd 
import os
import PIL

from PIL import ImageOps
from tqdm import tqdm

tqdm.pandas()
In [2]:
df_train = pd.read_csv('../input/train.csv')
print('Train.csv loaded!')
Train.csv loaded!
In [3]:
def get_img_size(img_id, path):      
    img = PIL.Image.open(f'{path}{img_id}.png')
    return img.size
In [4]:
train_path = '../input/train/'
df_train['shape'] = df_train['id'].progress_apply(lambda x : get_img_size(x, train_path))
df_train['width'] = [width for width,_ in df_train['shape'].values]
df_train['height'] = [height for _,height in df_train['shape'].values]
df_train['aspect_ratio'] = df_train.height/df_train.width
df_train['std_width'] = (df_train.width - np.mean(df_train.width)) / np.std(df_train.width)
df_train['std_height'] = (df_train.height - np.mean(df_train.height)) / np.std(df_train.height)
df_train['std_aspect_ratio'] = (df_train.aspect_ratio - np.mean(df_train.aspect_ratio)) / np.std(df_train.aspect_ratio)
df_train.to_csv('train_with_wh.csv', index=False)
100%|██████████| 109237/109237 [04:55<00:00, 369.88it/s]
In [5]:
df_train.head()
Out[5]:
id attribute_ids shape width height aspect_ratio std_width std_height std_aspect_ratio
0 1000483014d91860 147 616 813 (339, 300) 339 300 0.884956 -0.179729 -0.582173 -0.462639
1 1000fe2e667721fe 51 616 734 813 (423, 300) 423 300 0.709220 0.337904 -0.582173 -0.796301
2 1001614cb89646ee 776 (365, 300) 365 300 0.821918 -0.019509 -0.582173 -0.582326
3 10041eb49b297c08 51 671 698 813 1092 (300, 358) 300 358 1.193333 -0.420058 -0.133053 0.122864
4 100501c227f8beea 13 404 492 903 1093 (300, 528) 300 528 1.760000 -0.420058 1.183333 1.198770
In [6]:
df_test = pd.read_csv('../input/sample_submission.csv')
In [7]:
test_path = '../input/test/'
df_test['shape'] = df_test['id'].progress_apply(lambda x : get_img_size(x, test_path))
df_test['width'] = [width for width,_ in df_test['shape'].values]
df_test['height'] = [height for _,height in df_test['shape'].values]
df_test['aspect_ratio'] = df_test.height/df_test.width
df_test['std_width'] = (df_test.width - np.mean(df_test.width)) / np.std(df_test.width)
df_test['std_height'] = (df_test.height - np.mean(df_test.height)) / np.std(df_test.height)
df_test['std_aspect_ratio'] = (df_test.aspect_ratio - np.mean(df_test.aspect_ratio)) / np.std(df_test.aspect_ratio)
df_test.to_csv('test_with_wh.csv', index=False)
100%|██████████| 7443/7443 [00:17<00:00, 414.74it/s]
In [8]:
df_test.head()
Out[8]:
id attribute_ids shape width height aspect_ratio std_width std_height std_aspect_ratio
0 10023b2cc4ed5f68 0 1 2 (300, 533) 300 533 1.776667 -0.410398 1.348411 1.321055
1 100fbe75ed8fd887 0 1 2 (781, 300) 781 300 0.384123 2.499366 -0.602236 -1.480735
2 101b627524a04f19 0 1 2 (399, 300) 399 300 0.751880 0.188493 -0.602236 -0.740811
3 10234480c41284c6 0 1 2 (369, 300) 369 300 0.813008 0.007011 -0.602236 -0.617821
4 1023b0e2636dcea8 0 1 2 (398, 300) 398 300 0.753769 0.182444 -0.602236 -0.737010