Basic Fast AI Size 320 - Resnet152

From: https://www.kaggle.com/rftexas/basic-fast-ai-size-320-resnet152

Author: PAB97

Score: 0.246

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.
['resnet152', 'imet-2019-fgvc6']
In [2]:
from fastai.vision import *
from fastai.metrics import fbeta
In [3]:
from torch.utils import model_zoo

Path('models').mkdir(exist_ok=True)
!cp '../input/resnet152/resnet152.pth' 'models/'

def load_url(*args, **kwargs):
    model_dir = Path('models')
    filename  = 'resnet152.pth'
    if not (model_dir/filename).is_file(): raise FileNotFoundError
    return torch.load(model_dir/filename)
model_zoo.load_url = load_url
In [4]:
path = Path("../input/imet-2019-fgvc6")
In [5]:
len((path/"test").ls())
Out[5]:
7443
In [6]:
bs = 32
In [7]:
train_df = pd.read_csv(path/"train.csv", nrows=4000)
test_df = pd.read_csv(path/"sample_submission.csv")
In [8]:
tfms = get_transforms(do_flip=True, flip_vert=True, max_rotate=90.0, max_zoom=1.5, max_lighting=0.2, xtra_tfms=[(symmetric_warp(magnitude=(-0,0), p=0)),])
In [9]:
train, test = [ImageList.from_df(df, path=path, cols='id', folder=folder, suffix='.png') for df, folder in zip([train_df, test_df], ['train', 'test'])]

data = (train.split_by_rand_pct(0.1, seed=42)
        .label_from_df(cols='attribute_ids', label_delim=' ')
        .add_test(test)
        .transform(tfms, size=250)
        .databunch(bs=bs).normalize(imagenet_stats))
In [10]:
data.show_batch()

Building a model

In [11]:
class FocalLoss(nn.Module):
    def __init__(self, gamma=2):
        super().__init__()
        self.gamma = gamma

    def forward(self, input, target):
        if not (target.size() == input.size()):
            raise ValueError("Target size ({}) must be the same as input size ({})"
                             .format(target.size(), input.size()))

        max_val = (-input).clamp(min=0)
        loss = input - input * target + max_val + \
               ((-max_val).exp() + (-input - max_val).exp()).log()

        invprobs = F.logsigmoid(-input * (target * 2.0 - 1.0))
        loss = (invprobs * self.gamma).exp() * loss

        return loss.sum(dim=1).mean()
In [12]:
learn = cnn_learner(data, models.resnet152, metrics=[fbeta], loss_func=FocalLoss(), model_dir="/tmp/models/")
learn.freeze()
In [13]:
learn.lr_find()
learn.recorder.plot()
LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.
In [14]:
learn.fit_one_cycle(10, slice(1e-2, 1e-1), wd=0.1)
epoch train_loss valid_loss fbeta time
0 20.372589 6.125741 0.190544 01:08
1 6.764709 4.699189 0.223991 01:06
2 6.633374 6.480094 0.144954 01:06
3 6.983151 6.353492 0.139518 01:04
4 6.815980 22.351923 0.133974 01:05
5 6.497707 5.644416 0.150666 01:05
6 5.837685 5.015705 0.200896 01:05
7 5.270652 4.759688 0.210985 01:05
8 4.862199 4.527580 0.214397 01:03
9 4.664834 4.428251 0.229239 01:06
In [15]:
learn.unfreeze()
In [16]:
learn.lr_find()
learn.recorder.plot()
LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.
In [17]:
learn.fit_one_cycle(10, slice(1e-6), wd=0.1)
epoch train_loss valid_loss fbeta time
0 4.527359 4.430551 0.227902 01:12
1 4.566643 4.426271 0.230025 01:12
2 4.549228 4.432484 0.228966 01:12
3 4.559623 4.427565 0.231223 01:13
4 4.563221 4.426656 0.230305 01:11
5 4.571761 4.429310 0.228804 01:12
6 4.563755 4.424096 0.229292 01:12
7 4.563304 4.431148 0.228271 01:13
8 4.546081 4.426924 0.230728 01:12
9 4.539570 4.422722 0.228978 01:12
In [18]:
learn.recorder.plot_losses()

Inference

In [19]:
def find_best_fixed_threshold(preds, targs, do_plot=True):
    score = []
    thrs = np.arange(0, 0.5, 0.01)
    for thr in progress_bar(thrs):
        score.append(fbeta(valid_preds[0],valid_preds[1], thresh=thr))
    score = np.array(score)
    pm = score.argmax()
    best_thr, best_score = thrs[pm], score[pm].item()
    print(f'thr={best_thr:.3f}', f'F2={best_score:.3f}')
    if do_plot:
        plt.plot(thrs, score)
        plt.vlines(x=best_thr, ymin=score.min(), ymax=score.max())
        plt.text(best_thr+0.03, best_score-0.01, f'$F_{2}=${best_score:.3f}', fontsize=14);
        plt.show()
    return best_thr

i2c = np.array([[i, c] for c, i in learn.data.train_ds.y.c2i.items()]).astype(int) # indices to class number correspondence

def join_preds(preds, thr):
    return [' '.join(i2c[np.where(t==1)[0],1].astype(str)) for t in (preds[0].sigmoid()>thr).long()]
In [20]:
valid_preds = learn.get_preds(DatasetType.Valid)
best_thr = find_best_fixed_threshold(*valid_preds)
100.00% [50/50 00:00<00:00]
thr=0.240 F2=0.254
In [21]:
test_preds = learn.get_preds(ds_type=DatasetType.Test)
test_df.attribute_ids = join_preds(test_preds, best_thr)
In [22]:
test_df.head()
Out[22]:
id attribute_ids
0 10023b2cc4ed5f68 1059 121 13 147 194 369 813
1 100fbe75ed8fd887 1059 1092 121 13 194 369 51 79 813 896
2 101b627524a04f19 1059 1092 13 147 156 189 194 51 671 79 813
3 10234480c41284c6 1046 1092 13 147 156 189 259 501 51 744 813
4 1023b0e2636dcea8 1059 1092 121 13 147 156 161 188 189 194 369 4...
In [23]:
test_df.to_csv('submission.csv', index=False)