From: https://www.kaggle.com/hsinwenchang/keras-data-augmentation-visualize
Author: Beans
Score: 0.96392
- Model: vgg16
Apply transfer learning skill from pretrained model using Keras. Using vgg16 with a flatten layer followed by 2 fully connected layer with 1024 units and 1010 units. The output class probabilities based on 1010 classes which is done by the softmax activation function. Add a layer use a relu activation function. Add Dropout layers with a probability of 50%, where appropriate.
import os
import cv2
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import json
from keras.models import Sequential
from keras.layers import Dense, Flatten, Activation, Dropout, Conv2D,MaxPooling2D
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import VGG16
from keras.applications import ResNet50
from keras import optimizers
ann_file = '../input/train2019.json'
with open(ann_file) as data_file:
train_anns = json.load(data_file)
train_anns_df = pd.DataFrame(train_anns['annotations'])[['image_id','category_id']]
train_img_df = pd.DataFrame(train_anns['images'])[['id', 'file_name']].rename(columns={'id':'image_id'})
df_train_file_cat = pd.merge(train_img_df, train_anns_df, on='image_id')
df_train_file_cat['category_id']=df_train_file_cat['category_id'].astype(str)
df_train_file_cat.head()
len(df_train_file_cat['category_id'].unique())
# Example of images for category_id = 400
img_names = df_train_file_cat[df_train_file_cat['category_id']=='400']['file_name'][:30]
plt.figure(figsize=[15,15])
i = 1
for img_name in img_names:
img = cv2.imread("../input/train_val2019/%s" % img_name)[...,[2, 1, 0]]
plt.subplot(6, 5, i)
plt.imshow(img)
i += 1
plt.show()
valid_ann_file = '../input/val2019.json'
with open(valid_ann_file) as data_file:
valid_anns = json.load(data_file)
valid_anns_df = pd.DataFrame(valid_anns['annotations'])[['image_id','category_id']]
valid_anns_df.head()
valid_img_df = pd.DataFrame(valid_anns['images'])[['id', 'file_name']].rename(columns={'id':'image_id'})
valid_img_df.head()
df_valid_file_cat = pd.merge(valid_img_df, valid_anns_df, on='image_id')
df_valid_file_cat['category_id']=df_valid_file_cat['category_id'].astype(str)
df_valid_file_cat.head()
nb_classes = 1010
batch_size = 128
img_size = 128
nb_epochs = 10
%%time
train_datagen=ImageDataGenerator(rescale=1./255, rotation_range=45,
width_shift_range=.15,
height_shift_range=.15,
horizontal_flip=True,
zoom_range=0.5)
train_generator=train_datagen.flow_from_dataframe(
dataframe=df_train_file_cat,
directory="../input/train_val2019",
x_col="file_name",
y_col="category_id",
batch_size=batch_size,
shuffle=True,
class_mode="sparse",
target_size=(img_size,img_size))
# udacity_intro_to_tensorflow_for_deep_learning/l05c04_exercise_flowers_with_data_augmentation_solution.ipynb#scrollTo=jqb9OGoVKIOi
# This function will plot images in the form of a grid with 1 row and 5 columns where images are placed in each column.
def plotImages(images_arr):
fig, axes = plt.subplots(1, 5, figsize=(20,20))
axes = axes.flatten()
for img, ax in zip( images_arr, axes):
ax.imshow(img)
plt.tight_layout()
plt.show()
augmented_images = [train_generator[0][0][0] for i in range(5)]
plotImages(augmented_images)
%%time
test_datagen = ImageDataGenerator(rescale=1./255)
valid_generator=test_datagen.flow_from_dataframe(
dataframe=df_valid_file_cat,
directory="../input/train_val2019",
x_col="file_name",
y_col="category_id",
batch_size=batch_size,
class_mode="sparse",
target_size=(img_size,img_size))
vgg16_net = VGG16(weights='imagenet',
include_top=False,
input_shape=(img_size, img_size, 3))
vgg16_net.trainable = False
resnet = ResNet50(include_top=False, weights='imagenet',
input_shape=(img_size,img_size,3))
resnet.trainable = False
model = Sequential()
model.add(vgg16_net)
model.add(Flatten())
model.add(Dense(1024))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes, activation='softmax'))
model.compile(optimizers.rmsprop(lr=0.0001, decay=1e-6),loss='sparse_categorical_crossentropy',metrics=['accuracy'])
%%time
history = model.fit_generator(generator=train_generator,
steps_per_epoch=500,
validation_data=valid_generator,
validation_steps=100,
epochs=nb_epochs,
verbose=0)
with open('history.json', 'w') as f:
json.dump(history.history, f)
history_df = pd.DataFrame(history.history)
history_df[['loss', 'val_loss']].plot()
history_df[['acc', 'val_acc']].plot()
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(nb_epochs)
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()
test_ann_file = '../input/test2019.json'
with open(test_ann_file) as data_file:
test_anns = json.load(data_file)
test_img_df = pd.DataFrame(test_anns['images'])[['id', 'file_name']].rename(columns={'id':'image_id'})
test_img_df.head()
%%time
test_datagen = ImageDataGenerator(rescale=1./255.)
test_generator = test_datagen.flow_from_dataframe(
dataframe=test_img_df,
directory = "../input/test2019",
x_col="file_name",
target_size = (img_size,img_size),
batch_size = 1,
shuffle=False,
class_mode = None
)
%%time
test_generator.reset()
predict=model.predict_generator(test_generator, steps = len(test_generator.filenames),verbose=1)
len(predict)
predicted_class_indices=np.argmax(predict,axis=1)
labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]
sam_sub_df = pd.read_csv('../input/kaggle_sample_submission.csv')
sam_sub_df.head()
filenames=test_generator.filenames
results=pd.DataFrame({"file_name":filenames,
"predicted":predictions})
df_res = pd.merge(test_img_df, results, on='file_name')[['image_id','predicted']]\
.rename(columns={'image_id':'id'})
df_res.head()
df_res.to_csv("submission.csv",index=False)