From: https://www.kaggle.com/hsinwenchang/keras-mobilenet-data-augmentation-visualize
Author: Beans
Score: 0.97509
- Model: MobileNet
Apply transfer learning skill from pretrained model using Keras. Using MobileNet.
import os
import cv2
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import json
from keras.models import Sequential
from keras.layers import Dense, Flatten, Activation, Dropout, Conv2D,MaxPooling2D
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import MobileNet
from keras import optimizers
ann_file = '../input/train2019.json'
with open(ann_file) as data_file:
train_anns = json.load(data_file)
train_anns_df = pd.DataFrame(train_anns['annotations'])[['image_id','category_id']]
train_img_df = pd.DataFrame(train_anns['images'])[['id', 'file_name']].rename(columns={'id':'image_id'})
df_train_file_cat = pd.merge(train_img_df, train_anns_df, on='image_id')
df_train_file_cat['category_id']=df_train_file_cat['category_id'].astype(str)
df_train_file_cat.head()
len(df_train_file_cat['category_id'].unique())
# Example of images for category_id = 400
img_names = df_train_file_cat[df_train_file_cat['category_id']=='400']['file_name'][:30]
plt.figure(figsize=[15,15])
i = 1
for img_name in img_names:
img = cv2.imread("../input/train_val2019/%s" % img_name)[...,[2, 1, 0]]
plt.subplot(6, 5, i)
plt.imshow(img)
i += 1
plt.show()
valid_ann_file = '../input/val2019.json'
with open(valid_ann_file) as data_file:
valid_anns = json.load(data_file)
valid_anns_df = pd.DataFrame(valid_anns['annotations'])[['image_id','category_id']]
valid_anns_df.head()
valid_img_df = pd.DataFrame(valid_anns['images'])[['id', 'file_name']].rename(columns={'id':'image_id'})
valid_img_df.head()
df_valid_file_cat = pd.merge(valid_img_df, valid_anns_df, on='image_id')
df_valid_file_cat['category_id']=df_valid_file_cat['category_id'].astype(str)
df_valid_file_cat.head()
nb_classes = 1010
batch_size = 128
img_size = 128
nb_epochs = 10
%%time
train_datagen=ImageDataGenerator(rescale=1./255, rotation_range=45,
width_shift_range=.15,
height_shift_range=.15,
horizontal_flip=True,
zoom_range=0.5)
train_generator=train_datagen.flow_from_dataframe(
dataframe=df_train_file_cat,
directory="../input/train_val2019",
x_col="file_name",
y_col="category_id",
batch_size=batch_size,
shuffle=True,
class_mode="sparse",
target_size=(img_size,img_size))
# udacity_intro_to_tensorflow_for_deep_learning/l05c04_exercise_flowers_with_data_augmentation_solution.ipynb#scrollTo=jqb9OGoVKIOi
# This function will plot images in the form of a grid with 1 row and 5 columns where images are placed in each column.
def plotImages(images_arr):
fig, axes = plt.subplots(1, 5, figsize=(20,20))
axes = axes.flatten()
for img, ax in zip( images_arr, axes):
ax.imshow(img)
plt.tight_layout()
plt.show()
augmented_images = [train_generator[0][0][0] for i in range(5)]
plotImages(augmented_images)
%%time
test_datagen = ImageDataGenerator(rescale=1./255)
valid_generator=test_datagen.flow_from_dataframe(
dataframe=df_valid_file_cat,
directory="../input/train_val2019",
x_col="file_name",
y_col="category_id",
batch_size=batch_size,
class_mode="sparse",
target_size=(img_size,img_size))
model = MobileNet(input_shape=(img_size, img_size, 3), alpha=1., weights=None, classes=nb_classes)
model.compile(optimizers.rmsprop(lr=0.0001, decay=1e-6),loss='sparse_categorical_crossentropy',metrics=['accuracy'])
print(model.summary())
%%time
history = model.fit_generator(generator=train_generator,
steps_per_epoch=500,
validation_data=valid_generator,
validation_steps=100,
epochs=nb_epochs,
verbose=0)
with open('history.json', 'w') as f:
json.dump(history.history, f)
history_df = pd.DataFrame(history.history)
history_df[['loss', 'val_loss']].plot()
history_df[['acc', 'val_acc']].plot()
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(nb_epochs)
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()
test_ann_file = '../input/test2019.json'
with open(test_ann_file) as data_file:
test_anns = json.load(data_file)
test_img_df = pd.DataFrame(test_anns['images'])[['id', 'file_name']].rename(columns={'id':'image_id'})
test_img_df.head()
%%time
test_datagen = ImageDataGenerator(rescale=1./255.)
test_generator = test_datagen.flow_from_dataframe(
dataframe=test_img_df,
directory = "../input/test2019",
x_col="file_name",
target_size = (img_size,img_size),
batch_size = 1,
shuffle=False,
class_mode = None
)
%%time
test_generator.reset()
predict=model.predict_generator(test_generator, steps = len(test_generator.filenames),verbose=1)
len(predict)
predicted_class_indices=np.argmax(predict,axis=1)
labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]
sam_sub_df = pd.read_csv('../input/kaggle_sample_submission.csv')
sam_sub_df.head()
filenames=test_generator.filenames
results=pd.DataFrame({"file_name":filenames,
"predicted":predictions})
df_res = pd.merge(test_img_df, results, on='file_name')[['image_id','predicted']]\
.rename(columns={'image_id':'id'})
df_res.head()
df_res.to_csv("submission.csv",index=False)