iMet First Try

From: https://www.kaggle.com/hengzheng/imet-first-try

Author: Heng Zheng

Score: 0.4

imports

In [1]:
import os
import re

import warnings
warnings.filterwarnings("ignore")

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

import cv2

import numpy as np
import pandas as pd

from keras.models import *
from keras.layers import *
from keras.optimizers import *
from keras.utils import *
from keras.callbacks import *

from keras import backend as K
from keras.applications.densenet import DenseNet121, preprocess_input
from keras.preprocessing.image import ImageDataGenerator

from sklearn.model_selection import train_test_split
from sklearn.metrics import fbeta_score

from tqdm import tqdm
Using TensorFlow backend.

load data

In [2]:
train_images = os.listdir("../input/imet-2019-fgvc6/train/")
test_images = os.listdir("../input/imet-2019-fgvc6/test/")

print("number of train images: ", len(train_images))
print("number of test  images: ", len(test_images))
number of train images:  109237
number of test  images:  7443
In [3]:
train = pd.read_csv("../input/imet-2019-fgvc6/train.csv")
train.head()
Out[3]:
id attribute_ids
0 1000483014d91860 147 616 813
1 1000fe2e667721fe 51 616 734 813
2 1001614cb89646ee 776
3 10041eb49b297c08 51 671 698 813 1092
4 100501c227f8beea 13 404 492 903 1093
In [4]:
labels = pd.read_csv("../input/imet-2019-fgvc6/labels.csv")
labels.head()
Out[4]:
attribute_id attribute_name
0 0 culture::abruzzi
1 1 culture::achaemenid
2 2 culture::aegean
3 3 culture::afghan
4 4 culture::after british
In [5]:
labels_map = {v:i for i, v in zip(labels.attribute_id.values, labels.attribute_name.values)}
labels_map_rev = {i:v for i, v in zip(labels.attribute_id.values, labels.attribute_name.values)}

num_classes = len(labels_map)
print("{} categories".format(num_classes))
1103 categories
In [6]:
submission = pd.read_csv("../input/imet-2019-fgvc6/sample_submission.csv")
submission.head()
Out[6]:
id attribute_ids
0 10023b2cc4ed5f68 0 1 2
1 100fbe75ed8fd887 0 1 2
2 101b627524a04f19 0 1 2
3 10234480c41284c6 0 1 2
4 1023b0e2636dcea8 0 1 2

EDA

In [7]:
def ids_to_lables(attribute_id):
    return "\n".join([labels_map_rev[int(i)] for i in attribute_id.split(" ")])
In [8]:
train["labels"] = train.attribute_ids.apply(lambda x: ids_to_lables(x))
train.head()
Out[8]:
id attribute_ids labels
0 1000483014d91860 147 616 813 culture::french\ntag::dogs\ntag::men
1 1000fe2e667721fe 51 616 734 813 culture::british\ntag::dogs\ntag::horses\ntag:...
2 1001614cb89646ee 776 tag::landscapes
3 10041eb49b297c08 51 671 698 813 1092 culture::british\ntag::flowers\ntag::girls\nta...
4 100501c227f8beea 13 404 492 903 1093 culture::american\ntag::actors\ntag::boys\ntag...
In [9]:
train["n_cate"] = train.attribute_ids.apply(lambda x: len(x.split(" ")))
train.head()
Out[9]:
id attribute_ids labels n_cate
0 1000483014d91860 147 616 813 culture::french\ntag::dogs\ntag::men 3
1 1000fe2e667721fe 51 616 734 813 culture::british\ntag::dogs\ntag::horses\ntag:... 4
2 1001614cb89646ee 776 tag::landscapes 1
3 10041eb49b297c08 51 671 698 813 1092 culture::british\ntag::flowers\ntag::girls\nta... 5
4 100501c227f8beea 13 404 492 903 1093 culture::american\ntag::actors\ntag::boys\ntag... 5
In [10]:
# TODO: maybe multi cultures here.

def get_culture(x):
    try: 
        return re.search(r"culture::(\w+)", x).group(1)
    except:
        return "none"

train["culture"] = train.labels.apply(lambda x: get_culture(x))
train.head()
Out[10]:
id attribute_ids labels n_cate culture
0 1000483014d91860 147 616 813 culture::french\ntag::dogs\ntag::men 3 french
1 1000fe2e667721fe 51 616 734 813 culture::british\ntag::dogs\ntag::horses\ntag:... 4 british
2 1001614cb89646ee 776 tag::landscapes 1 none
3 10041eb49b297c08 51 671 698 813 1092 culture::british\ntag::flowers\ntag::girls\nta... 5 british
4 100501c227f8beea 13 404 492 903 1093 culture::american\ntag::actors\ntag::boys\ntag... 5 american
In [11]:
def get_num_tag(x):
    return len(re.findall(r"tag::(\w+)", x))

train["n_tag"] = train.labels.apply(lambda x: get_num_tag(x))
train.head()
Out[11]:
id attribute_ids labels n_cate culture n_tag
0 1000483014d91860 147 616 813 culture::french\ntag::dogs\ntag::men 3 french 2
1 1000fe2e667721fe 51 616 734 813 culture::british\ntag::dogs\ntag::horses\ntag:... 4 british 3
2 1001614cb89646ee 776 tag::landscapes 1 none 1
3 10041eb49b297c08 51 671 698 813 1092 culture::british\ntag::flowers\ntag::girls\nta... 5 british 4
4 100501c227f8beea 13 404 492 903 1093 culture::american\ntag::actors\ntag::boys\ntag... 5 american 4
In [12]:
num_not_culture = train[train.culture == "none"].shape[0]

print("{} ({:.2f}%) not have a culture categroy".format(num_not_culture, 
                                                        num_not_culture *100 / train.shape[0]))
11872 (10.87%) not have a culture categroy
In [13]:
num_not_tag = train[train.n_tag == 0].shape[0]

print("{} ({:.2f}%) not have a tag categroy".format(num_not_tag, 
                                                    num_not_tag *100 / train.shape[0]))
220 (0.20%) not have a tag categroy
In [14]:
_ = train.n_cate.value_counts().sort_index().plot.bar()
In [15]:
_ = train.n_tag.value_counts().sort_index().plot.bar()
In [16]:
_ = train.culture.value_counts()[:10].sort_index().plot.bar()
In [17]:
def show_images(n_to_show, is_train=True):
    img_dir = "../input/imet-2019-fgvc6/train/" if is_train else "../input/imet-2019-fgvc6/test/"
    plt.figure(figsize=(16,16))
    images = os.listdir(img_dir)[:n_to_show]
    for i in range(n_to_show):
        img = mpimg.imread(img_dir + images[i])
        plt.subplot(n_to_show/2+1, 2, i+1)
        if is_train:
            plt.title(train[train.id == images[i].split(".")[0]].labels.values[0])
        plt.imshow(img)
        plt.axis('off')
In [18]:
show_images(6)
In [19]:
show_images(6, is_train=False)

prepare X and y

In [20]:
def obtain_y(ids):
    y = np.zeros(num_classes)
    for idx in ids.split(" "):
        y[int(idx)] = 1
    return y
In [21]:
paths = ["../input/imet-2019-fgvc6/train/{}.png".format(x) for x in train.id.values]
targets = np.array([obtain_y(y) for y in train.attribute_ids.values])

image generator

In [22]:
class ImageGenerator(Sequence):
    
    def __init__(self, paths, targets, batch_size, shape, augment=False):
        self.paths = paths
        self.targets = targets
        self.batch_size = batch_size
        self.shape = shape
        self.augment = augment
        
    def __len__(self):
        return int(np.ceil(len(self.paths) / float(self.batch_size)))
    
    def __getitem__(self, idx):
        batch_paths = self.paths[idx * self.batch_size : (idx + 1) * self.batch_size]
        x = np.zeros((len(batch_paths), self.shape[0], self.shape[1], self.shape[2]), dtype=np.float32)
        y = np.zeros((self.batch_size, num_classes, 1))
        for i, path in enumerate(batch_paths):
            x[i] = self.__load_image(path)
        y = self.targets[idx * self.batch_size : (idx + 1) * self.batch_size]
        return x, y
    
    def __iter__(self):
        for item in (self[i] for i in range(len(self))):
            yield item
            
    def __load_image(self, path):
        image = cv2.imread(path)
        image = cv2.resize(image, (self.shape[0], self.shape[1]))
        image = preprocess_input(image)
        if self.augment:
            seq = iaa.Sequential([
                iaa.OneOf([
                    iaa.Fliplr(0.5),
                    iaa.Flipud(0.5),
                    iaa.CropAndPad(percent=(-0.25, 0.25)),
                    iaa.Crop(percent=(0, 0.1)),
                    iaa.Sometimes(0.5,
                        iaa.GaussianBlur(sigma=(0, 0.5))
                    ),
                    iaa.Affine(
                        scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
                        translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
                        rotate=(-180, 180),
                        shear=(-8, 8)
                    )
                ])
            ], random_order=True)
            image = seq.augment_image(image)
        return image

train test split

In [23]:
batch_size = 64

train_paths, val_paths, train_targets, val_targets = train_test_split(paths, 
                                                                      targets,
                                                                      test_size=0.1, 
                                                                      random_state=1029)

train_gen = ImageGenerator(train_paths, train_targets, batch_size=batch_size, shape=(224,224,3), augment=False)
val_gen = ImageGenerator(val_paths, val_targets, batch_size=batch_size, shape=(224,224,3), augment=False)

build model

In [24]:
inp = Input((224, 224, 3))
backbone = DenseNet121(input_tensor=inp,
                       weights="../input/densenet-keras/DenseNet-BC-121-32-no-top.h5",
                       include_top=False)
x = backbone.output
x = GlobalAveragePooling2D()(x)
x = Dense(2048, activation="relu")(x)
x = Dropout(0.5)(x)
outp = Dense(num_classes, activation="sigmoid")(x)

model = Model(inp, outp)
WARNING:tensorflow:From /opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From /opt/conda/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.

f_score for Keras

In [25]:
def f_score(y_true, y_pred, threshold=0.1, beta=2):
    tp = tp_score(y_true, y_pred, threshold)
    fp = fp_score(y_true, y_pred, threshold)
    fn = fn_score(y_true, y_pred, threshold)
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    return (1+beta**2) * ((precision * recall) / ((beta**2)*precision + recall))


def tp_score(y_true, y_pred, threshold=0.1):
    tp_3d = K.concatenate(
        [
            K.cast(K.expand_dims(K.flatten(y_true)), 'bool'),
            K.cast(K.expand_dims(K.flatten(K.greater(y_pred, K.constant(threshold)))), 'bool'),
            K.cast(K.ones_like(K.expand_dims(K.flatten(y_pred))), 'bool')
        ], axis=1
    )
    tp = K.sum(K.cast(K.all(tp_3d, axis=1), 'int32'))
    return tp


def fp_score(y_true, y_pred, threshold=0.1):
    fp_3d = K.concatenate(
        [
            K.cast(K.expand_dims(K.flatten(K.abs(y_true - K.ones_like(y_true)))), 'bool'),
            K.cast(K.expand_dims(K.flatten(K.greater(y_pred, K.constant(threshold)))), 'bool'),
            K.cast(K.ones_like(K.expand_dims(K.flatten(y_pred))), 'bool')
        ], axis=-1
    )
    fp = K.sum(K.cast(K.all(fp_3d, axis=1), 'int32'))
    return fp


def fn_score(y_true, y_pred, threshold=0.1):
    fn_3d = K.concatenate(
        [
            K.cast(K.expand_dims(K.flatten(y_true)), 'bool'),
            K.cast(K.expand_dims(K.flatten(K.abs(K.cast(K.greater(y_pred, K.constant(threshold)), 'float') - K.ones_like(y_pred)))), 'bool'),
            K.cast(K.ones_like(K.expand_dims(K.flatten(y_pred))), 'bool')
        ], axis=1
    )
    fn = K.sum(K.cast(K.all(fn_3d, axis=1), 'int32'))
    return fn


def precision_score(y_true, y_pred, threshold=0.1):
    tp = tp_score(y_true, y_pred, threshold)
    fp = fp_score(y_true, y_pred, threshold)
    return tp / (tp + fp)


def recall_score(y_true, y_pred, threshold=0.1):
    tp = tp_score(y_true, y_pred, threshold)
    fn = fn_score(y_true, y_pred, threshold)
    return tp / (tp + fn)
In [26]:
checkpoint = ModelCheckpoint('model.h5', 
                             monitor='val_f_score', 
                             verbose=1, 
                             save_best_only=True, 
                             mode='max', 
                             save_weights_only=True)

reduce_lr = ReduceLROnPlateau(monitor='val_f_score', factor=0.2,
                              patience=1, verbose=1, mode='max',
                              min_delta=0.0001, cooldown=2, min_lr=1e-7)

early_stop = EarlyStopping(monitor="val_f_score", mode="max", patience=5)
In [27]:
model.compile(
    loss='binary_crossentropy',
    optimizer=Adam(1e-03),
    metrics=['acc', f_score])
In [28]:
history = model.fit_generator(generator=train_gen, 
                              steps_per_epoch=len(train_gen), 
                              validation_data=val_gen, 
                              validation_steps=len(val_gen),
                              epochs=15,
                              callbacks=[checkpoint, reduce_lr, early_stop])
WARNING:tensorflow:From /opt/conda/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Epoch 1/15
1537/1537 [==============================] - 1093s 711ms/step - loss: 0.0164 - acc: 0.9967 - f_score: nan - val_loss: 0.0140 - val_acc: 0.9971 - val_f_score: 0.2055

Epoch 00001: val_f_score improved from -inf to 0.20554, saving model to model.h5
Epoch 2/15
1537/1537 [==============================] - 1062s 691ms/step - loss: 0.0140 - acc: 0.9971 - f_score: 0.2247 - val_loss: 0.0139 - val_acc: 0.9971 - val_f_score: 0.2226

Epoch 00002: val_f_score improved from 0.20554 to 0.22258, saving model to model.h5
Epoch 3/15
1537/1537 [==============================] - 1059s 689ms/step - loss: 0.0135 - acc: 0.9972 - f_score: 0.2496 - val_loss: 0.0131 - val_acc: 0.9972 - val_f_score: 0.2753

Epoch 00003: val_f_score improved from 0.22258 to 0.27526, saving model to model.h5
Epoch 4/15
1537/1537 [==============================] - 1051s 684ms/step - loss: 0.0129 - acc: 0.9972 - f_score: 0.2817 - val_loss: 0.0133 - val_acc: 0.9972 - val_f_score: 0.2852

Epoch 00004: val_f_score improved from 0.27526 to 0.28519, saving model to model.h5
Epoch 5/15
1537/1537 [==============================] - 1047s 681ms/step - loss: 0.0123 - acc: 0.9972 - f_score: 0.3133 - val_loss: 0.0121 - val_acc: 0.9972 - val_f_score: 0.3385

Epoch 00005: val_f_score improved from 0.28519 to 0.33846, saving model to model.h5
Epoch 6/15
1537/1537 [==============================] - 1013s 659ms/step - loss: 0.0119 - acc: 0.9972 - f_score: 0.3381 - val_loss: 0.0120 - val_acc: 0.9972 - val_f_score: 0.3491

Epoch 00006: val_f_score improved from 0.33846 to 0.34914, saving model to model.h5
Epoch 7/15
1537/1537 [==============================] - 985s 641ms/step - loss: 0.0115 - acc: 0.9973 - f_score: 0.3603 - val_loss: 0.0115 - val_acc: 0.9973 - val_f_score: 0.3753

Epoch 00007: val_f_score improved from 0.34914 to 0.37526, saving model to model.h5
Epoch 8/15
 620/1537 [===========>..................] - ETA: 8:49 - loss: 0.0112 - acc: 0.9973 - f_score: 0.3747
In [29]:
plt.rcParams['figure.figsize'] = (6,6)

fscore = history.history['f_score']
val_fscore = history.history['val_f_score']
epochs = range(1, len(fscore) + 1)

plt.title('Training and validation accuracy')
plt.plot(epochs, fscore, 'red', label='Training f_score')
plt.plot(epochs, val_fscore, 'blue', label='Validation f_score')
plt.legend()

plt.show()
In [30]:
model.load_weights("./model.h5")

prediction

test image generator

In [31]:
class TestImageGenerator(Sequence):
    
    def __init__(self, paths, batch_size, shape):
        self.paths = paths
        self.targets = targets
        self.batch_size = batch_size
        self.shape = shape
        
    def __len__(self):
        return int(np.ceil(len(self.paths) / float(self.batch_size)))
    
    def __getitem__(self, idx):
        batch_paths = self.paths[idx * self.batch_size : (idx + 1) * self.batch_size]
        x = np.zeros((len(batch_paths), self.shape[0], self.shape[1], self.shape[2]), dtype=np.float32)
        for i, path in enumerate(batch_paths):
            x[i] = self.__load_image(path)
        return x
    
    def __iter__(self):
        for item in (self[i] for i in range(len(self))):
            yield item
            
    def __load_image(self, path):
        image = cv2.imread(path)
        image = cv2.resize(image, (self.shape[0], self.shape[1]))
        image = preprocess_input(image)
        return image

do prediction

In [32]:
test_paths = ["../input/imet-2019-fgvc6/test/{}.png".format(x) for x in submission.id.values]
test_gen = TestImageGenerator(test_paths, batch_size=batch_size, shape=(224,224,3))

predicts = model.predict_generator(test_gen, verbose=1)
117/117 [==============================] - 60s 509ms/step

check our prediction

In [33]:
n = 6
threshold = 0.15

img = cv2.imread(test_paths[n])
plt.imshow(img)

a = np.array(predicts[n]>threshold, dtype=np.int8)
b = np.where(a==1)[0]
for idx in b.tolist():
    print(labels_map_rev[idx])
culture::japan
tag::horse riding
tag::men
tag::women
In [34]:
train.n_tag.describe()
Out[34]:
count    109237.000000
mean          2.187794
std           1.213623
min           0.000000
25%           1.000000
50%           2.000000
75%           3.000000
max           9.000000
Name: n_tag, dtype: float64
In [35]:
def classifier(probs):
    
    culture = None
    tags = None
    arr = probs.argsort()
    
    culture_threshold = 0.1
    tag_max_threshold = 0.55
    
    n_min_tag = 1
    n_max_tag = 3
    
    # first: find culture category by sorting probs
    
    for idx in arr[::-1]:
        if labels_map_rev[idx].startswith("culture") and probs[idx] > culture_threshold:
            culture = str(idx)
            break           # TODO: maybe multi culture here.
    
    # second: find tags by different threshold
    for threshold in np.arange(0.05, tag_max_threshold, 0.05):
        n = 0                # stores len(tags)
        tags_list = list()   # stores tags
        
        a = np.array(probs > threshold, dtype=np.int8)
        b = np.where(a == 1)[0]
        for idx in b.tolist():
            if labels_map_rev[idx].startswith("tag"):
                n += 1
                tags_list.append(str(idx))
        if n >= n_min_tag and n <= n_max_tag:
            tags = tags_list
            break
    
    # finally packs our answer
    answer = list()
    if culture:
        answer.append(culture)
    if tags:
        for t in tags:
            answer.append(t)
            
    return " ".join(answer)
In [36]:
predictions = list()

for probs in tqdm(predicts):
    predictions.append(classifier(probs))
100%|██████████| 7443/7443 [00:01<00:00, 4622.43it/s]
In [37]:
submission["attribute_ids"] = np.array(predictions)
submission.head()
Out[37]:
id attribute_ids
0 10023b2cc4ed5f68 369 587 766 1059
1 100fbe75ed8fd887 188 1039 1085
2 101b627524a04f19 369 498 728 813
3 10234480c41284c6 147 776 813 1046
4 1023b0e2636dcea8 147 671 813 1092
In [38]:
submission_df = submission.copy()
submission_df.n_cate = submission.attribute_ids.apply(lambda x: len(x.split(" ")))
_ = submission_df.n_cate.value_counts().sort_index().plot.bar()

submission

In [39]:
submission.to_csv('submission.csv', index=False)
In [40]:
submission.shape
Out[40]:
(7443, 2)
In [41]:
!head submission.csv
id,attribute_ids
10023b2cc4ed5f68,369 587 766 1059
100fbe75ed8fd887,188 1039 1085
101b627524a04f19,369 498 728 813
10234480c41284c6,147 776 813 1046
1023b0e2636dcea8,147 671 813 1092
1039cd6cf85845c,13 405 896 1092
103a5b3f83fbe88,194 733 813 1092
10413aaae8d6a9a2,147 813 1046 1092
10423822b93a65ab,147