From: https://www.kaggle.com/seefun/imet-get-aspect-ratio
Author: seefun
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import PIL
from PIL import ImageOps
from tqdm import tqdm
tqdm.pandas()
df_train = pd.read_csv('../input/train.csv')
print('Train.csv loaded!')
def get_img_size(img_id, path):
img = PIL.Image.open(f'{path}{img_id}.png')
return img.size
train_path = '../input/train/'
df_train['shape'] = df_train['id'].progress_apply(lambda x : get_img_size(x, train_path))
df_train['width'] = [width for width,_ in df_train['shape'].values]
df_train['height'] = [height for _,height in df_train['shape'].values]
df_train['aspect_ratio'] = df_train.height/df_train.width
df_train['std_width'] = (df_train.width - np.mean(df_train.width)) / np.std(df_train.width)
df_train['std_height'] = (df_train.height - np.mean(df_train.height)) / np.std(df_train.height)
df_train['std_aspect_ratio'] = (df_train.aspect_ratio - np.mean(df_train.aspect_ratio)) / np.std(df_train.aspect_ratio)
df_train.to_csv('train_with_wh.csv', index=False)
df_train.head()
df_test = pd.read_csv('../input/sample_submission.csv')
test_path = '../input/test/'
df_test['shape'] = df_test['id'].progress_apply(lambda x : get_img_size(x, test_path))
df_test['width'] = [width for width,_ in df_test['shape'].values]
df_test['height'] = [height for _,height in df_test['shape'].values]
df_test['aspect_ratio'] = df_test.height/df_test.width
df_test['std_width'] = (df_test.width - np.mean(df_test.width)) / np.std(df_test.width)
df_test['std_height'] = (df_test.height - np.mean(df_test.height)) / np.std(df_test.height)
df_test['std_aspect_ratio'] = (df_test.aspect_ratio - np.mean(df_test.aspect_ratio)) / np.std(df_test.aspect_ratio)
df_test.to_csv('test_with_wh.csv', index=False)
df_test.head()