From: https://www.kaggle.com/digitalvirtuoso/targeted-visual-image-exploration-eda
Author: Adam H
I created this to visually explore all images of targeted labels.
I thought others might find it useful as well. I hope this helps with your EDA and future Kernels!
In this example I use 11, 36, and 81.
Here one is able to choose as many or as few labels as they'd like to visually explore.
Note: The selection variable is in a string list syntax as follows: selected_ids = ['string', 'string', 'etc...']
# Load Libraries / Global Definitions
import pandas as pd
import re
import pylab as plt
import cv2
def find_match(text, search):
result = re.findall('\\b'+search+'\\b', text, flags=re.IGNORECASE)
if len(result)>0:
return True
else:
return False
# Load train and label data
train = pd.read_csv("../input/train.csv")
labels = pd.read_csv("../input/labels.csv")
# Choose target label ids to visually explore
sel_ids = ['81','11', '36']
# Find all train samples labeled with your chosen targets
aggregate_images = pd.DataFrame()
for x in range(len(sel_ids)):
for y in range(len(train)):
if find_match(train.iloc[y]['attribute_ids'], sel_ids[x]) is True:
aggregate_images = aggregate_images.append(train.iloc[y], ignore_index = False)
aggregate_images.head() # Taking a peek at our aggregated image train data containing selected label ids
MAX_PLOTS variable adjusts the maximum images you wish to see
MAX_PLOTS = 100
column = 3 # Adjust the maximum amount of columns you want to display
if (MAX_PLOTS > len(aggregate_images)) is True:
row = (len(aggregate_images)//column) + 1
else:
row = (MAX_PLOTS//column) + 1
count = 1
plt.figure(figsize=[30, 20], edgecolor='k')
for image_name in aggregate_images['id'].values:
image = cv2.imread("../input/train/%s.png" % image_name)
plt.subplot(row, column, count)
plt.imshow(image)
count += 1
if count-1 == MAX_PLOTS: break
plt.show # Plot all figures of chosen targets
print('Images Printed: ', count-1)