iMet densenet and two weighted outputs model

From: https://www.kaggle.com/hengzheng/imet-densenet-and-two-weighted-outputs-model

Author: Heng Zheng

Score: 0.47

imports

In [1]:
import os
import re

import warnings
warnings.filterwarnings("ignore")

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

import cv2

import numpy as np
import pandas as pd

from keras.models import *
from keras.layers import *
from keras.optimizers import *
from keras.utils import *
from keras.callbacks import *

from keras import backend as K
from keras.applications.densenet import DenseNet121, preprocess_input
from keras.preprocessing.image import ImageDataGenerator

from sklearn.model_selection import train_test_split
from sklearn.metrics import fbeta_score

from tqdm import tqdm
Using TensorFlow backend.

load data

In [2]:
train_images = os.listdir("../input/imet-2019-fgvc6/train/")
test_images = os.listdir("../input/imet-2019-fgvc6/test/")

print("number of train images: ", len(train_images))
print("number of test  images: ", len(test_images))
number of train images:  109237
number of test  images:  7443
In [3]:
train = pd.read_csv("../input/imet-2019-fgvc6/train.csv")
train.head()
Out[3]:
id attribute_ids
0 1000483014d91860 147 616 813
1 1000fe2e667721fe 51 616 734 813
2 1001614cb89646ee 776
3 10041eb49b297c08 51 671 698 813 1092
4 100501c227f8beea 13 404 492 903 1093
In [4]:
labels = pd.read_csv("../input/imet-2019-fgvc6/labels.csv")
labels.head()
Out[4]:
attribute_id attribute_name
0 0 culture::abruzzi
1 1 culture::achaemenid
2 2 culture::aegean
3 3 culture::afghan
4 4 culture::after british
In [5]:
labels.tail()
Out[5]:
attribute_id attribute_name
1098 1098 tag::writing implements
1099 1099 tag::writing systems
1100 1100 tag::zeus
1101 1101 tag::zigzag pattern
1102 1102 tag::zodiac
In [6]:
cultures = [x for x in labels.attribute_name.values if x.startswith("culture")]
tags = [x for x in labels.attribute_name.values if x.startswith("tag")]
In [7]:
len(cultures), len(tags)
Out[7]:
(398, 705)
In [8]:
def split_culture_tag(x):
    cultures_ = list()
    tags_ = list()
    for i in x.split(" "):
        if int(i) <= len(cultures):
            cultures_.append(i)
        else:
            tags_.append(str(int(i) - len(cultures)))
    if not cultures_:
        cultures_.append(str(len(cultures)))
    if not tags_:
        tags_.append(str(len(tags)))
    return " ".join(cultures_), " ".join(tags_)
In [9]:
culture_ids = list()
tag_ids = list()

for v in tqdm(train.attribute_ids.values):
    c, t = split_culture_tag(v)
    culture_ids.append(c)
    tag_ids.append(t)
100%|██████████| 109237/109237 [00:00<00:00, 274954.67it/s]
In [10]:
train["culture_ids"] = culture_ids
train["tag_ids"] = tag_ids

train.head()
Out[10]:
id attribute_ids culture_ids tag_ids
0 1000483014d91860 147 616 813 147 218 415
1 1000fe2e667721fe 51 616 734 813 51 218 336 415
2 1001614cb89646ee 776 398 378
3 10041eb49b297c08 51 671 698 813 1092 51 273 300 415 694
4 100501c227f8beea 13 404 492 903 1093 13 6 94 505 695
In [11]:
num_classes_c = len(cultures) + 1
num_classes_t = len(tags) + 1

print(num_classes_c, num_classes_t)
399 706
In [12]:
labels_map = {v:i for i, v in zip(labels.attribute_id.values, labels.attribute_name.values)}
labels_map_rev = {i:v for i, v in zip(labels.attribute_id.values, labels.attribute_name.values)}

num_classes = len(labels_map)
print("{} categories".format(num_classes))
1103 categories
In [13]:
submission = pd.read_csv("../input/imet-2019-fgvc6/sample_submission.csv")
submission.head()
Out[13]:
id attribute_ids
0 10023b2cc4ed5f68 0 1 2
1 100fbe75ed8fd887 0 1 2
2 101b627524a04f19 0 1 2
3 10234480c41284c6 0 1 2
4 1023b0e2636dcea8 0 1 2

prepare X and y

In [14]:
def obtain_y_c(ids):
    y = np.zeros(num_classes_c)
    for idx in ids.split(" "):
        y[int(idx)] = 1
    return y

def obtain_y_t(ids):
    y = np.zeros(num_classes_t)
    for idx in ids.split(" "):
        y[int(idx)] = 1
    return y
In [15]:
paths = ["../input/imet-2019-fgvc6/train/{}.png".format(x) for x in train.id.values]

targets_c = np.array([obtain_y_c(y) for y in train.culture_ids.values])
targets_t = np.array([obtain_y_t(y) for y in train.tag_ids.values])

image generator

In [16]:
class ImageGenerator(Sequence):
    
    def __init__(self, paths, targets_c, targets_t, batch_size, shape, augment=False):
        self.paths = paths
        self.targets_c = targets_c
        self.targets_t = targets_t
        self.batch_size = batch_size
        self.shape = shape
        self.augment = augment
        
    def __len__(self):
        return int(np.ceil(len(self.paths) / float(self.batch_size)))
    
    def __getitem__(self, idx):
        batch_paths = self.paths[idx * self.batch_size : (idx + 1) * self.batch_size]
        x = np.zeros((len(batch_paths), self.shape[0], self.shape[1], self.shape[2]), dtype=np.float32)
        y = np.zeros((self.batch_size, num_classes, 1))
        for i, path in enumerate(batch_paths):
            x[i] = self.__load_image(path)
        y_c = self.targets_c[idx * self.batch_size : (idx + 1) * self.batch_size]
        y_t = self.targets_t[idx * self.batch_size : (idx + 1) * self.batch_size]
        return x, [y_c, y_t]
    
    def __iter__(self):
        for item in (self[i] for i in range(len(self))):
            yield item
            
    def __load_image(self, path):
        image = cv2.imread(path)
        image = cv2.resize(image, (self.shape[0], self.shape[1]))
        image = preprocess_input(image)
        if self.augment:
            seq = iaa.Sequential([
                iaa.OneOf([
                    iaa.Fliplr(0.5),
                    iaa.Flipud(0.5),
                    iaa.CropAndPad(percent=(-0.25, 0.25)),
                    iaa.Crop(percent=(0, 0.1)),
                    iaa.Sometimes(0.5,
                        iaa.GaussianBlur(sigma=(0, 0.5))
                    ),
                    iaa.Affine(
                        scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
                        translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
                        rotate=(-180, 180),
                        shear=(-8, 8)
                    )
                ])
            ], random_order=True)
            image = seq.augment_image(image)
        return image

train test split

In [17]:
batch_size = 64

train_paths, val_paths, train_targets_c, val_targets_c, train_targets_t, val_targets_t = train_test_split(paths, 
                                                                      targets_c,
                                                                      targets_t,
                                                                      test_size=0.1, 
                                                                      random_state=1029)

train_gen = ImageGenerator(train_paths, train_targets_c, train_targets_t, batch_size=batch_size, shape=(224,224,3), augment=False)
val_gen = ImageGenerator(val_paths, val_targets_c, val_targets_t, batch_size=batch_size, shape=(224,224,3), augment=False)

build model

In [18]:
inp = Input((224, 224, 3))
backbone = DenseNet121(input_tensor=inp,
                       weights="../input/densenet-keras/DenseNet-BC-121-32-no-top.h5",
                       include_top=False)
x = backbone.output
x = GlobalAveragePooling2D()(x)

y_c = Dense(1024, activation="relu")(x)
y_c = Dropout(0.5)(y_c)
y_c = Dense(num_classes_c, activation="sigmoid", name="cultures_out")(y_c)

y_t = Dense(2048, activation="relu")(x)
y_t = Dropout(0.5)(y_t)
y_t = Dense(num_classes_t, activation="sigmoid", name="tags_out")(y_t)


model = Model(inp, [y_c, y_t])
WARNING:tensorflow:From /opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From /opt/conda/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
In [19]:
 losses = {
     "cultures_out": 'binary_crossentropy',
     "tags_out": 'binary_crossentropy'
 }
    
loss_weights = {
    "cultures_out": 1.0,
    "tags_out": 4.0
}

f_score for Keras

In [20]:
def f_score(y_true, y_pred, threshold=0.1, beta=2):
    tp = tp_score(y_true, y_pred, threshold)
    fp = fp_score(y_true, y_pred, threshold)
    fn = fn_score(y_true, y_pred, threshold)
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    return (1+beta**2) * ((precision * recall) / ((beta**2)*precision + recall))


def tp_score(y_true, y_pred, threshold=0.1):
    tp_3d = K.concatenate(
        [
            K.cast(K.expand_dims(K.flatten(y_true)), 'bool'),
            K.cast(K.expand_dims(K.flatten(K.greater(y_pred, K.constant(threshold)))), 'bool'),
            K.cast(K.ones_like(K.expand_dims(K.flatten(y_pred))), 'bool')
        ], axis=1
    )
    tp = K.sum(K.cast(K.all(tp_3d, axis=1), 'int32'))
    return tp


def fp_score(y_true, y_pred, threshold=0.1):
    fp_3d = K.concatenate(
        [
            K.cast(K.expand_dims(K.flatten(K.abs(y_true - K.ones_like(y_true)))), 'bool'),
            K.cast(K.expand_dims(K.flatten(K.greater(y_pred, K.constant(threshold)))), 'bool'),
            K.cast(K.ones_like(K.expand_dims(K.flatten(y_pred))), 'bool')
        ], axis=-1
    )
    fp = K.sum(K.cast(K.all(fp_3d, axis=1), 'int32'))
    return fp


def fn_score(y_true, y_pred, threshold=0.1):
    fn_3d = K.concatenate(
        [
            K.cast(K.expand_dims(K.flatten(y_true)), 'bool'),
            K.cast(K.expand_dims(K.flatten(K.abs(K.cast(K.greater(y_pred, K.constant(threshold)), 'float') - K.ones_like(y_pred)))), 'bool'),
            K.cast(K.ones_like(K.expand_dims(K.flatten(y_pred))), 'bool')
        ], axis=1
    )
    fn = K.sum(K.cast(K.all(fn_3d, axis=1), 'int32'))
    return fn


def precision_score(y_true, y_pred, threshold=0.1):
    tp = tp_score(y_true, y_pred, threshold)
    fp = fp_score(y_true, y_pred, threshold)
    return tp / (tp + fp)


def recall_score(y_true, y_pred, threshold=0.1):
    tp = tp_score(y_true, y_pred, threshold)
    fn = fn_score(y_true, y_pred, threshold)
    return tp / (tp + fn)
In [21]:
checkpoint = ModelCheckpoint('model.h5', 
                             monitor='val_tags_out_f_score', 
                             verbose=1, 
                             save_best_only=True, 
                             mode='max', 
                             save_weights_only=True)

reduce_lr = ReduceLROnPlateau(monitor='val_tags_out_f_score', factor=0.2,
                              patience=1, verbose=1, mode='max',
                              min_delta=0.0001, cooldown=2, min_lr=1e-7)

early_stop = EarlyStopping(monitor="val_tags_out_f_score", mode="max", patience=5)
In [22]:
model.compile(
    loss=losses,
    loss_weights=loss_weights,
    optimizer=Adam(1e-03),
    metrics=['acc', f_score])
In [23]:
history = model.fit_generator(generator=train_gen, 
                              steps_per_epoch=len(train_gen), 
                              validation_data=val_gen, 
                              validation_steps=len(val_gen),
                              epochs=20,
                              callbacks=[checkpoint, reduce_lr, early_stop])
WARNING:tensorflow:From /opt/conda/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Epoch 1/20
1537/1537 [==============================] - 1024s 666ms/step - loss: 0.0877 - cultures_out_loss: 0.0143 - tags_out_loss: 0.0183 - cultures_out_acc: 0.9966 - cultures_out_f_score: 0.2232 - tags_out_acc: 0.9964 - tags_out_f_score: 0.1559 - val_loss: 0.1777 - val_cultures_out_loss: 0.0317 - val_tags_out_loss: 0.0365 - val_cultures_out_acc: 0.9973 - val_cultures_out_f_score: 0.1417 - val_tags_out_acc: 0.9969 - val_tags_out_f_score: 0.0930

Epoch 00001: val_tags_out_f_score improved from -inf to 0.09304, saving model to model.h5
Epoch 2/20
1537/1537 [==============================] - 970s 631ms/step - loss: 0.0756 - cultures_out_loss: 0.0115 - tags_out_loss: 0.0160 - cultures_out_acc: 0.9972 - cultures_out_f_score: 0.2876 - tags_out_acc: 0.9969 - tags_out_f_score: 0.1898 - val_loss: 0.0742 - val_cultures_out_loss: 0.0111 - val_tags_out_loss: 0.0158 - val_cultures_out_acc: 0.9973 - val_cultures_out_f_score: 0.2968 - val_tags_out_acc: 0.9969 - val_tags_out_f_score: 0.2109

Epoch 00002: val_tags_out_f_score improved from 0.09304 to 0.21087, saving model to model.h5
Epoch 3/20
1331/1537 [========================>.....] - ETA: 1:59 - loss: 0.0734 - cultures_out_loss: 0.0112 - tags_out_loss: 0.0156 - cultures_out_acc: 0.9973 - cultures_out_f_score: 0.3067 - tags_out_acc: 0.9969 - tags_out_f_score: 0.2094
In [24]:
plt.rcParams['figure.figsize'] = (6,6)

c_fscore = history.history['cultures_out_f_score']
val_c_fscore = history.history['val_cultures_out_f_score']
t_fscore = history.history['tags_out_f_score']
val_t_fscore = history.history['val_tags_out_f_score']

epochs = range(1, len(c_fscore) + 1)

plt.title('Training and validation culture f2 score')
plt.plot(epochs, c_fscore, 'red', label='Training f_score')
plt.plot(epochs, val_c_fscore, 'blue', label='Validation f_score')
plt.legend()

plt.title('Training and validation tag f2 score')
plt.plot(epochs, t_fscore, 'red', label='Training f_score')
plt.plot(epochs, val_t_fscore, 'blue', label='Validation f_score')
plt.legend()

plt.show()
In [25]:
model.load_weights("./model.h5")

prediction

test image generator

In [26]:
class TestImageGenerator(Sequence):
    
    def __init__(self, paths, batch_size, shape):
        self.paths = paths
        self.batch_size = batch_size
        self.shape = shape
        
    def __len__(self):
        return int(np.ceil(len(self.paths) / float(self.batch_size)))
    
    def __getitem__(self, idx):
        batch_paths = self.paths[idx * self.batch_size : (idx + 1) * self.batch_size]
        x = np.zeros((len(batch_paths), self.shape[0], self.shape[1], self.shape[2]), dtype=np.float32)
        for i, path in enumerate(batch_paths):
            x[i] = self.__load_image(path)
        return x
    
    def __iter__(self):
        for item in (self[i] for i in range(len(self))):
            yield item
            
    def __load_image(self, path):
        image = cv2.imread(path)
        image = cv2.resize(image, (self.shape[0], self.shape[1]))
        image = preprocess_input(image)
        return image

do prediction

In [27]:
test_paths = ["../input/imet-2019-fgvc6/test/{}.png".format(x) for x in submission.id.values]
test_gen = TestImageGenerator(test_paths, batch_size=batch_size, shape=(224,224,3))

predicts = model.predict_generator(test_gen, verbose=1)
117/117 [==============================] - 58s 495ms/step
In [28]:
predicts[0].shape, predicts[1].shape
Out[28]:
((7443, 399), (7443, 706))
In [29]:
val_predicts = model.predict_generator(val_gen, verbose=1)
171/171 [==============================] - 74s 430ms/step
In [30]:
best_threshold_c = 0.
best_score_c = 0.

for threshold in tqdm(np.arange(0, 0.5, 0.01)):
    f2_score = fbeta_score(val_targets_c, np.array(val_predicts[0]) > threshold, beta=2, average='samples')
    if f2_score > best_score_c:
        best_score_c = f2_score
        best_threshold_c = threshold
100%|██████████| 50/50 [00:11<00:00,  4.59it/s]
In [31]:
best_threshold_t = 0.
best_score_t = 0.

for threshold in tqdm(np.arange(0, 0.5, 0.01)):
    f2_score = fbeta_score(val_targets_t, np.array(val_predicts[1]) > threshold, beta=2, average='samples')
    if f2_score > best_score_t:
        best_score_t = f2_score
        best_threshold_t = threshold
100%|██████████| 50/50 [00:20<00:00,  2.22it/s]
In [32]:
print("culture classifier: best threshold: {} best score: {}".format(best_threshold_c, best_score_c))
print("tag     classifier: best threshold: {} best score: {}".format(best_threshold_t, best_score_t))
culture classifier: best threshold: 0.08 best score: 0.5638752202268228
tag     classifier: best threshold: 0.07 best score: 0.4070017101431007
In [33]:
def classifier(probs, th_c, th_t):
    c = list()
    
    # culture classifier
    a = np.array(probs[0] > th_c, dtype=np.int8)
    b = np.where(a == 1)[0]
    for idx in b.tolist():
        if idx != len(cultures):
            c.append(str(idx))
            
    # tag classifier
    a = np.array(probs[1] > th_t, dtype=np.int8)
    b = np.where(a == 1)[0]
    for idx in b.tolist():
        if idx != len(cultures) + len(tags):
            c.append(str(idx + len(cultures)))

    return " ".join(c)
In [34]:
predictions = list()

for probs in tqdm(zip(predicts[0], predicts[1])):
    predictions.append(classifier(probs, best_threshold_c, best_threshold_t))
7443it [00:00, 51031.74it/s]
In [35]:
len(predictions)
Out[35]:
7443
In [36]:
n = 6

img = cv2.imread(test_paths[n])
plt.imshow(img)

a = np.array(predicts[0][n]>best_score_c, dtype=np.int8)
b = np.where(a==1)[0]
for idx in b.tolist():
    if idx != len(cultures):
        print(labels_map_rev[idx])
    
a = np.array(predicts[1][n]>best_score_t, dtype=np.int8)
b = np.where(a==1)[0]
for idx in b.tolist():
    if idx != len(cultures) + len(tags):
        print(labels_map_rev[idx + len(cultures)])
culture::american
tag::men

submission

In [37]:
submission["attribute_ids"] = np.array(predictions)
submission.head()
Out[37]:
id attribute_ids
0 10023b2cc4ed5f68 99 121 304 369 587 766 1039 1059
1 100fbe75ed8fd887 79 188 1039
2 101b627524a04f19 79 121 369 482 497 498 728 813 961 1092
3 10234480c41284c6 13 51 111 147 480 483 501 738 776 813 1046
4 1023b0e2636dcea8 51 147 156 189 477 489 584 612 671 733 780 813...
In [38]:
submission.to_csv('submission.csv', index=False)
In [39]:
submission.shape
Out[39]:
(7443, 2)
In [40]:
!head submission.csv
id,attribute_ids
10023b2cc4ed5f68,99 121 304 369 587 766 1039 1059
100fbe75ed8fd887,79 188 1039
101b627524a04f19,79 121 369 482 497 498 728 813 961 1092
10234480c41284c6,13 51 111 147 480 483 501 738 776 813 1046
1023b0e2636dcea8,51 147 156 189 477 489 584 612 671 733 780 813 1046 1059 1092
1039cd6cf85845c,13 405 896 903 1092
103a5b3f83fbe88,13 194 670 734 744 756 813 1092
10413aaae8d6a9a2,51 147 813 1046 1092
10423822b93a65ab,51 111 147 189 813
In [41]:
submission_df = submission.copy()
submission_df.n_cate = submission.attribute_ids.apply(lambda x: len(x.split(" ")))
_ = submission_df.n_cate.value_counts().sort_index().plot.bar()