iMet densenet/se_resnext101_32x4d

From: https://www.kaggle.com/autuanliuyc/imet-densenet-se-resnext101-32x4d

Author: 云不懂风吹

Score: 0.383

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.
['pytorch-model-zoo', 'imet-2019-fgvc6']
In [2]:
import fastai
from fastai.vision import *
fastai.__version__
Out[2]:
'1.0.51'
  • Model: densenet201/se_resnext101_32x4d
  • Loss: Focal loss
  • Metric: $F_{2}$ score
In [3]:
torch.cuda.is_available()
Out[3]:
True

数据

In [4]:
d_path = Path('../input/imet-2019-fgvc6')
m_path = Path('../input/pytorch-model-zoo')

训练数据

In [5]:
train_df = pd.read_csv(d_path/'train.csv')
train_df.head()
Out[5]:
id attribute_ids
0 1000483014d91860 147 616 813
1 1000fe2e667721fe 51 616 734 813
2 1001614cb89646ee 776
3 10041eb49b297c08 51 671 698 813 1092
4 100501c227f8beea 13 404 492 903 1093

标签

In [6]:
labels_df = pd.read_csv(d_path/'labels.csv')
labels_df.head()
Out[6]:
attribute_id attribute_name
0 0 culture::abruzzi
1 1 culture::achaemenid
2 2 culture::aegean
3 3 culture::afghan
4 4 culture::after british
In [7]:
test_df = pd.read_csv(d_path/'sample_submission.csv')
test_df.head()
Out[7]:
id attribute_ids
0 10023b2cc4ed5f68 0 1 2
1 100fbe75ed8fd887 0 1 2
2 101b627524a04f19 0 1 2
3 10234480c41284c6 0 1 2
4 1023b0e2636dcea8 0 1 2

数据块

In [8]:
# SIZE = 224
# BATCH = 64
In [9]:
tfms = get_transforms(max_lighting=0.1, max_zoom=1.05, max_warp=0.1, xtra_tfms=[(symmetric_warp(magnitude=(-0,0), p=0)),])
In [10]:
train = ImageList.from_df(train_df, path=d_path, cols='id', folder='train', suffix='.png') 
test = ImageList.from_df(test_df, path=d_path, cols='id', folder='test', suffix='.png') 
# data = (train.split_by_rand_pct(0.2, seed=1234)
#         .label_from_df(cols='attribute_ids', label_delim=' ')
#         .add_test(test)
#         .transform(tfms, size=SIZE, resize_method=ResizeMethod.PAD, padding_mode='border')
#         .databunch(path=Path('.'), bs=BATCH, device=torch.device('cuda:0')).normalize(imagenet_stats))
In [11]:
# data.show_batch(rows=3)

FocalLoss

In [12]:
# Source: https://www.kaggle.com/c/human-protein-atlas-image-classification/discussion/78109
class FocalLoss(nn.Module):
    def __init__(self, gamma=2):
        super().__init__()
        self.gamma = gamma

    def forward(self, logit, target):
        target = target.float()
        max_val = (-logit).clamp(min=0)
        loss = logit - logit * target + max_val + \
               ((-max_val).exp() + (-logit - max_val).exp()).log()

        invprobs = F.logsigmoid(-logit * (target * 2.0 - 1.0))
        loss = (invprobs * self.gamma).exp() * loss
        if len(loss.size())==2:
            loss = loss.sum(dim=1)
        return loss.mean()

学习器

In [13]:
from collections import OrderedDict
import math
import torch.nn as nn
from torch.utils import model_zoo

pretrained_settings = {
    'senet154': {
        'url': m_path/'senet154-c7b49a05.pth'
    },
    'se_resnet152': {
        'url': m_path/'se_resnet152-d17c99b7.pth'
    },
    'se_resnext101_32x4d': {
        'url': m_path/'se_resnext101_32x4d-3b2fe3d8.pth'
    }
}

class SEModule(nn.Module):

    def __init__(self, channels, reduction):
        super(SEModule, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1, padding=0)
        self.relu = nn.ReLU(inplace=True)
        self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1, padding=0)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        module_input = x
        x = self.avg_pool(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.sigmoid(x)
        return module_input * x

class Bottleneck(nn.Module):
    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv3(out)
        out = self.bn3(out)
        if self.downsample is not None:
            residual = self.downsample(x)
        out = self.se_module(out) + residual
        out = self.relu(out)
        return out

class SEBottleneck(Bottleneck):
    expansion = 4
    def __init__(self, inplanes, planes, groups, reduction, stride=1, downsample=None):
        super(SEBottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes * 2, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes * 2)
        self.conv2 = nn.Conv2d(planes * 2, planes * 4, kernel_size=3, stride=stride, padding=1, groups=groups, bias=False)
        self.bn2 = nn.BatchNorm2d(planes * 4)
        self.conv3 = nn.Conv2d(planes * 4, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.se_module = SEModule(planes * 4, reduction=reduction)
        self.downsample = downsample
        self.stride = stride


class SEResNetBottleneck(Bottleneck):
    expansion = 4
    def __init__(self, inplanes, planes, groups, reduction, stride=1, downsample=None):
        super(SEResNetBottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False, stride=stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1, groups=groups, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.se_module = SEModule(planes * 4, reduction=reduction)
        self.downsample = downsample
        self.stride = stride


class SEResNeXtBottleneck(Bottleneck):
    expansion = 4

    def __init__(self, inplanes, planes, groups, reduction, stride=1, downsample=None, base_width=4):
        super(SEResNeXtBottleneck, self).__init__()
        width = math.floor(planes * (base_width / 64)) * groups
        self.conv1 = nn.Conv2d(inplanes, width, kernel_size=1, bias=False, stride=1)
        self.bn1 = nn.BatchNorm2d(width)
        self.conv2 = nn.Conv2d(width, width, kernel_size=3, stride=stride, padding=1, groups=groups, bias=False)
        self.bn2 = nn.BatchNorm2d(width)
        self.conv3 = nn.Conv2d(width, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.se_module = SEModule(planes * 4, reduction=reduction)
        self.downsample = downsample
        self.stride = stride


class SENet(nn.Module):

    def __init__(self, block, layers, groups, reduction, dropout_p=0.2,
                 inplanes=128, input_3x3=True, downsample_kernel_size=3,
                 downsample_padding=1, num_classes=1000):
        super(SENet, self).__init__()
        self.inplanes = inplanes
        if input_3x3:
            layer0_modules = [
                ('conv1', nn.Conv2d(3, 64, 3, stride=2, padding=1, bias=False)),
                ('bn1', nn.BatchNorm2d(64)),
                ('relu1', nn.ReLU(inplace=True)),
                ('conv2', nn.Conv2d(64, 64, 3, stride=1, padding=1, bias=False)),
                ('bn2', nn.BatchNorm2d(64)),
                ('relu2', nn.ReLU(inplace=True)),
                ('conv3', nn.Conv2d(64, inplanes, 3, stride=1, padding=1, bias=False)),
                ('bn3', nn.BatchNorm2d(inplanes)),
                ('relu3', nn.ReLU(inplace=True)),
            ]
        else:
            layer0_modules = [
                ('conv1', nn.Conv2d(3, inplanes, kernel_size=7, stride=2, padding=3, bias=False)),
                ('bn1', nn.BatchNorm2d(inplanes)),
                ('relu1', nn.ReLU(inplace=True)),
            ]
        layer0_modules.append(('pool', nn.MaxPool2d(3, stride=2, ceil_mode=True)))
        self.layer0 = nn.Sequential(OrderedDict(layer0_modules))
        self.layer1 = self._make_layer(
            block,
            planes=64,
            blocks=layers[0],
            groups=groups,
            reduction=reduction,
            downsample_kernel_size=1,
            downsample_padding=0
        )
        self.layer2 = self._make_layer(
            block,
            planes=128,
            blocks=layers[1],
            stride=2,
            groups=groups,
            reduction=reduction,
            downsample_kernel_size=downsample_kernel_size,
            downsample_padding=downsample_padding
        )
        self.layer3 = self._make_layer(
            block,
            planes=256,
            blocks=layers[2],
            stride=2,
            groups=groups,
            reduction=reduction,
            downsample_kernel_size=downsample_kernel_size,
            downsample_padding=downsample_padding
        )
        self.layer4 = self._make_layer(
            block,
            planes=512,
            blocks=layers[3],
            stride=2,
            groups=groups,
            reduction=reduction,
            downsample_kernel_size=downsample_kernel_size,
            downsample_padding=downsample_padding
        )
        self.avg_pool = nn.AvgPool2d(7, stride=1)
        self.dropout = nn.Dropout(dropout_p) if dropout_p is not None else None
        self.last_linear = nn.Linear(512 * block.expansion, num_classes)

    def _make_layer(self, block, planes, blocks, groups, reduction, stride=1, downsample_kernel_size=1, downsample_padding=0):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=downsample_kernel_size, stride=stride, padding=downsample_padding, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, groups, reduction, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes, groups, reduction))

        return nn.Sequential(*layers)

    def features(self, x):
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        return x

    def logits(self, x):
        x = self.avg_pool(x)
        if self.dropout is not None:
            x = self.dropout(x)
        x = x.view(x.size(0), -1)
        x = self.last_linear(x)
        return x

    def forward(self, x):
        x = self.features(x)
        x = self.logits(x)
        return x

def senet154(pretrained=False):
    model = SENet(SEBottleneck, [3, 8, 36, 3], groups=64, reduction=16, dropout_p=0.2, num_classes=1000)
    if pretrained:
        model.load_state_dict(torch.load(pretrained_settings['senet154']['url']))
    return model


def se_resnet152(pretrained=False):
    model = SENet(SEResNetBottleneck, [3, 8, 36, 3], groups=1, reduction=16,
                  dropout_p=None, inplanes=64, input_3x3=False,
                  downsample_kernel_size=1, downsample_padding=0,
                  num_classes=1000)
    if pretrained:
        model.load_state_dict(torch.load(pretrained_settings['se_resnet152']['url']))
    return model


def se_resnext101_32x4d(pretrained=False):
    model = SENet(SEResNeXtBottleneck, [3, 4, 23, 3], groups=32, reduction=16,
                  dropout_p=None, inplanes=64, input_3x3=False,
                  downsample_kernel_size=1, downsample_padding=0,
                  num_classes=1000)
    if pretrained:
        model.load_state_dict(torch.load(pretrained_settings['se_resnext101_32x4d']['url']))
    return model
In [14]:
# learn = cnn_learner(data, base_arch=models.densenet169, loss_func=FocalLoss(), metrics=fbeta).mixup()
# learn = cnn_learner(data, se_resnext101_32x4d, loss_func=FocalLoss(), metrics=fbeta)

训练模型

In [15]:
# learn.lr_find()
# learn.recorder.plot(suggestion=True)
In [16]:
# lr = 1e-2
In [17]:
# learn.fit_one_cycle(5, slice(lr))
In [18]:
# learn.unfreeze()
In [19]:
# learn.fit_one_cycle(3, slice(lr/1e4, lr/100))
In [20]:
# learn.save('s1')

更改图片大小继续训练

In [21]:
SIZE = 224
BATCH=32
In [22]:
# learn = None
# gc.collect()
# learn = cnn_learner(data, se_resnext101_32x4d, loss_func=FocalLoss(), metrics=fbeta).load('s1')
In [23]:
data1 = (train.split_by_rand_pct(0.2, seed=1234)
        .label_from_df(cols='attribute_ids', label_delim=' ')
        .add_test(test)
        .transform(tfms, size=SIZE)
        .databunch(path=Path('.'), bs=BATCH, device=torch.device('cuda:0')).normalize(imagenet_stats))
In [24]:
learn = cnn_learner(data1, se_resnext101_32x4d, loss_func=FocalLoss(), metrics=fbeta)
In [25]:
data1.show_batch(rows=3)
In [26]:
learn.data=data1
In [27]:
learn.freeze()
In [28]:
learn.lr_find()
learn.recorder.plot(suggestion=True)
LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.
Min numerical gradient: 2.29E-02
In [29]:
lr2 = 1e-6
In [30]:
learn.fit_one_cycle(5, slice(lr2))
20.00% [1/5 31:03<2:04:13]
epoch train_loss valid_loss fbeta time
0 441.142090 303.011688 0.014116 31:03

50.48% [1378/2730 13:13<12:58 435.5897]
In [31]:
learn.unfreeze()
In [32]:
learn.fit_one_cycle(3, slice(lr2/2.6**3, lr2/5))
33.33% [1/3 36:26<1:12:53]
epoch train_loss valid_loss fbeta time
0 415.335571 282.751648 0.014193 36:26

57.69% [1575/2730 18:32<13:36 413.3803]
In [33]:
learn.recorder.plot_losses()

预测

In [34]:
def find_best_fixed_threshold(preds, targs, do_plot=True):
    score = []
    thrs = np.arange(0, 0.5, 0.01)
    for thr in progress_bar(thrs):
        score.append(fbeta(valid_preds[0],valid_preds[1], thresh=thr))
    score = np.array(score)
    pm = score.argmax()
    best_thr, best_score = thrs[pm], score[pm].item()
    print(f'thr={best_thr:.3f}', f'F2={best_score:.3f}')
    if do_plot:
        plt.plot(thrs, score)
        plt.vlines(x=best_thr, ymin=score.min(), ymax=score.max())
        plt.text(best_thr+0.03, best_score-0.01, f'$F_{2}=${best_score:.3f}', fontsize=14);
        plt.show()
    return best_thr

i2c = np.array([[i, c] for c, i in learn.data.train_ds.y.c2i.items()]).astype(int) # indices to class number correspondence

def join_preds(preds, thr):
    return [' '.join(i2c[np.where(t==1)[0],1].astype(str)) for t in (preds[0].sigmoid()>thr).long()]
In [35]:
valid_preds = learn.TTA(ds_type=DatasetType.Valid)
best_thr = find_best_fixed_threshold(*valid_preds)
75.00% [6/8 36:35<12:11]
28.11% [192/683 01:44<04:26]
In [36]:
test_preds = learn.TTA(ds_type=DatasetType.Test)
test_df.attribute_ids = join_preds(test_preds, best_thr)
In [37]:
test_df.to_csv('submission.csv', index=False)