From: https://www.kaggle.com/blondinka/predict-submit-seresnext101
Author: Blonde
Score: 0.631
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in
import argparse
from pathlib import Path
from typing import Callable, List
import numpy as np
import pandas as pd
import tqdm
from multiprocessing.pool import ThreadPool
import torch
from torch import nn, cuda
from torch.nn import functional as F
import torchvision.models as M
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import cv2
from PIL import Image
from torchvision.transforms import (
ToTensor, Normalize, Compose, Resize, CenterCrop, RandomCrop,
RandomHorizontalFlip, RandomGrayscale)
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory
import os
print(os.listdir("../input"))
ON_KAGGLE = True
N_CLASSES = 1103
DATA_ROOT = Path('../input/imet-2019-fgvc6' if ON_KAGGLE else '../data')
RUN_ROOT = '../input/seresnext101-folds/' if ON_KAGGLE else '../data/results/'
use_sample = False
use_cuda = cuda.is_available()
SIZE = 352
train_root = DATA_ROOT / 'train'
test_root = DATA_ROOT / 'test'
import os
print('Files present in this directory', os.listdir(RUN_ROOT))
print('Files present in this directory', os.listdir(DATA_ROOT))
"""
SeResnet models
https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
"""
from collections import OrderedDict
import math
import torch.nn as nn
from torch.utils import model_zoo
class SEModule(nn.Module):
def __init__(self, channels, reduction):
super(SEModule, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1,
padding=0)
self.relu = nn.ReLU(inplace=True)
self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1,
padding=0)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
module_input = x
x = self.avg_pool(x)
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
x = self.sigmoid(x)
return module_input * x
class Bottleneck(nn.Module):
"""
Base class for bottlenecks that implements `forward()` method.
"""
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out = self.se_module(out) + residual
out = self.relu(out)
return out
class SEBottleneck(Bottleneck):
"""
Bottleneck for SENet154.
"""
expansion = 4
def __init__(self, inplanes, planes, groups, reduction, stride=1,
downsample=None):
super(SEBottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes * 2, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes * 2)
self.conv2 = nn.Conv2d(planes * 2, planes * 4, kernel_size=3,
stride=stride, padding=1, groups=groups,
bias=False)
self.bn2 = nn.BatchNorm2d(planes * 4)
self.conv3 = nn.Conv2d(planes * 4, planes * 4, kernel_size=1,
bias=False)
self.bn3 = nn.BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.se_module = SEModule(planes * 4, reduction=reduction)
self.downsample = downsample
self.stride = stride
class SEResNetBottleneck(Bottleneck):
"""
ResNet bottleneck with a Squeeze-and-Excitation module. It follows Caffe
implementation and uses `stride=stride` in `conv1` and not in `conv2`
(the latter is used in the torchvision implementation of ResNet).
"""
expansion = 4
def __init__(self, inplanes, planes, groups, reduction, stride=1,
downsample=None):
super(SEResNetBottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False,
stride=stride)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1,
groups=groups, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.se_module = SEModule(planes * 4, reduction=reduction)
self.downsample = downsample
self.stride = stride
class SEResNeXtBottleneck(Bottleneck):
"""
ResNeXt bottleneck type C with a Squeeze-and-Excitation module.
"""
expansion = 4
def __init__(self, inplanes, planes, groups, reduction, stride=1,
downsample=None, base_width=4):
super(SEResNeXtBottleneck, self).__init__()
width = math.floor(planes * (base_width / 64)) * groups
self.conv1 = nn.Conv2d(inplanes, width, kernel_size=1, bias=False,
stride=1)
self.bn1 = nn.BatchNorm2d(width)
self.conv2 = nn.Conv2d(width, width, kernel_size=3, stride=stride,
padding=1, groups=groups, bias=False)
self.bn2 = nn.BatchNorm2d(width)
self.conv3 = nn.Conv2d(width, planes * 4, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.se_module = SEModule(planes * 4, reduction=reduction)
self.downsample = downsample
self.stride = stride
class SENet(nn.Module):
def __init__(self, block, layers, groups, reduction, dropout_p=0.2,
inplanes=128, input_3x3=True, downsample_kernel_size=3,
downsample_padding=1, num_classes=1000):
"""
Parameters
----------
block (nn.Module): Bottleneck class.
- For SENet154: SEBottleneck
- For SE-ResNet models: SEResNetBottleneck
- For SE-ResNeXt models: SEResNeXtBottleneck
layers (list of ints): Number of residual blocks for 4 layers of the
network (layer1...layer4).
groups (int): Number of groups for the 3x3 convolution in each
bottleneck block.
- For SENet154: 64
- For SE-ResNet models: 1
- For SE-ResNeXt models: 32
reduction (int): Reduction ratio for Squeeze-and-Excitation modules.
- For all models: 16
dropout_p (float or None): Drop probability for the Dropout layer.
If `None` the Dropout layer is not used.
- For SENet154: 0.2
- For SE-ResNet models: None
- For SE-ResNeXt models: None
inplanes (int): Number of input channels for layer1.
- For SENet154: 128
- For SE-ResNet models: 64
- For SE-ResNeXt models: 64
input_3x3 (bool): If `True`, use three 3x3 convolutions instead of
a single 7x7 convolution in layer0.
- For SENet154: True
- For SE-ResNet models: False
- For SE-ResNeXt models: False
downsample_kernel_size (int): Kernel size for downsampling convolutions
in layer2, layer3 and layer4.
- For SENet154: 3
- For SE-ResNet models: 1
- For SE-ResNeXt models: 1
downsample_padding (int): Padding for downsampling convolutions in
layer2, layer3 and layer4.
- For SENet154: 1
- For SE-ResNet models: 0
- For SE-ResNeXt models: 0
num_classes (int): Number of outputs in `last_linear` layer.
- For all models: 1000
"""
super(SENet, self).__init__()
self.inplanes = inplanes
if input_3x3:
layer0_modules = [
('conv1', nn.Conv2d(3, 64, 3, stride=2, padding=1,
bias=False)),
('bn1', nn.BatchNorm2d(64)),
('relu1', nn.ReLU(inplace=True)),
('conv2', nn.Conv2d(64, 64, 3, stride=1, padding=1,
bias=False)),
('bn2', nn.BatchNorm2d(64)),
('relu2', nn.ReLU(inplace=True)),
('conv3', nn.Conv2d(64, inplanes, 3, stride=1, padding=1,
bias=False)),
('bn3', nn.BatchNorm2d(inplanes)),
('relu3', nn.ReLU(inplace=True)),
]
else:
layer0_modules = [
('conv1', nn.Conv2d(3, inplanes, kernel_size=7, stride=2,
padding=3, bias=False)),
('bn1', nn.BatchNorm2d(inplanes)),
('relu1', nn.ReLU(inplace=True)),
]
# To preserve compatibility with Caffe weights `ceil_mode=True`
# is used instead of `padding=1`.
layer0_modules.append(('pool', nn.MaxPool2d(3, stride=2,
ceil_mode=True)))
self.layer0 = nn.Sequential(OrderedDict(layer0_modules))
self.layer1 = self._make_layer(
block,
planes=64,
blocks=layers[0],
groups=groups,
reduction=reduction,
downsample_kernel_size=1,
downsample_padding=0
)
self.layer2 = self._make_layer(
block,
planes=128,
blocks=layers[1],
stride=2,
groups=groups,
reduction=reduction,
downsample_kernel_size=downsample_kernel_size,
downsample_padding=downsample_padding
)
self.layer3 = self._make_layer(
block,
planes=256,
blocks=layers[2],
stride=2,
groups=groups,
reduction=reduction,
downsample_kernel_size=downsample_kernel_size,
downsample_padding=downsample_padding
)
self.layer4 = self._make_layer(
block,
planes=512,
blocks=layers[3],
stride=2,
groups=groups,
reduction=reduction,
downsample_kernel_size=downsample_kernel_size,
downsample_padding=downsample_padding
)
#self.avg_pool = nn.AvgPool2d(7, stride=1)
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.dropout = nn.Dropout(dropout_p) if dropout_p is not None else None
self.last_linear = nn.Linear(512 * block.expansion, num_classes)
def _make_layer(self, block, planes, blocks, groups, reduction, stride=1,
downsample_kernel_size=1, downsample_padding=0):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=downsample_kernel_size, stride=stride,
padding=downsample_padding, bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, groups, reduction, stride,
downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes, groups, reduction))
return nn.Sequential(*layers)
def features(self, x):
x = self.layer0(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
return x
def logits(self, x):
x = self.avg_pool(x)
if self.dropout is not None:
x = self.dropout(x)
x = x.view(x.size(0), -1)
x = self.last_linear(x)
return x
def forward(self, x):
x = self.features(x)
x = self.logits(x)
return x
def initialize_pretrained_model(model, num_classes, settings):
assert num_classes == settings['num_classes'], \
'num_classes should be {}, but is {}'.format(
settings['num_classes'], num_classes)
model.load_state_dict(model_zoo.load_url(settings['url']))
model.input_space = settings['input_space']
model.input_size = settings['input_size']
model.input_range = settings['input_range']
model.mean = settings['mean']
model.std = settings['std']
def se_resnext101(num_classes=1000, pretrained=None):
model = SENet(SEResNeXtBottleneck, [3, 4, 23, 3], groups=32, reduction=16,
dropout_p=0.5, inplanes=64, input_3x3=False,
downsample_kernel_size=1, downsample_padding=0,
num_classes=num_classes)
return model
model = se_resnext101()
model.last_linear = nn.Linear(model.last_linear.in_features, N_CLASSES)
all_params = list(model.parameters())
use_cuda = cuda.is_available()
if use_cuda:
model = model.cuda()
def load_model(model, root: str, fold: int, use_cuda: bool):
"""Loads model checkpoints
Choose evaluation mode
"""
best_model_path = root + 'best-metric_fold'+str(fold)+'.pt'
if use_cuda:
state_dict = torch.load(best_model_path)
else:
state_dict = torch.load(best_model_path, map_location='cpu')
# model’s parameters
model.load_state_dict(state_dict['model'])
print('Loaded model from epoch {epoch}, step {step:,}'.format(**state_dict))
model.eval()
test_transform = Compose([
Resize((SIZE, SIZE)),
RandomHorizontalFlip(0.5),
# CenterCrop(SIZE),
# RandomCrop(SIZE),
# ColorJitter(brightness=(0.6, 1.4), contrast=0, saturation=0, hue=0),
# ColorJitter(brightness=0, contrast=(0.6, 1.4), saturation=0, hue=0),
# ColorJitter(brightness=0, contrast=0, saturation=(0.6, 1.4), hue=0),
# ColorJitter(brightness=0, contrast=0, saturation=0, hue=(-0.2, 0.2)),
# RandomGrayscale(p=0.2),
])
tensor_transform = Compose([
ToTensor(),
Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
def load_image(item, root: Path) -> Image.Image:
image = cv2.imread(str(root / f'{item.id}.png'))
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
return Image.fromarray(image)
def get_ids(root: Path) -> List[str]:
return sorted({p.name.split('_')[0] for p in root.glob('*.png')})
def mean_df(df: pd.DataFrame) -> pd.DataFrame:
return df.groupby(level=0).mean()
"""
Datasets
"""
class TTADataset(Dataset):
def __init__(self, root: Path, df: pd.DataFrame,
image_transform: Callable, tta: int):
super().__init__()
self._root = root
self._df = df
self._image_transform = image_transform
self._tta = tta
def __len__(self):
return len(self._df) * self._tta
def __getitem__(self, idx: int):
item = self._df.iloc[idx % len(self._df)]
image = load_image(item, self._root)
width, height = image.size
if height < 320:
ratio = 400/height
image = image.resize((int(width * ratio), int(height * ratio)), Image.ANTIALIAS)
if width < 320:
ratio = 400/width
image = image.resize((int(width * ratio), int(height * ratio)), Image.ANTIALIAS)
image = self._image_transform(image)
image = tensor_transform(image)
return image, item.id
def predict(model, root: Path, predict_df: pd.DataFrame, save_root: str,
image_transform, batch_size: int, tta: int, workers: int, use_cuda: bool):
"""
Make and save preditions
"""
valid_loader = DataLoader(
TTADataset(root, predict_df, image_transform, tta),
shuffle=False,
batch_size=batch_size,
num_workers=workers,
)
print(f'{len(valid_loader.dataset):,} in valid')
all_outputs, all_ids = [], []
with torch.no_grad():
for inputs, ids in tqdm.tqdm(valid_loader, desc='Predict'):
if use_cuda:
inputs = inputs.cuda()
outputs = torch.sigmoid(model(inputs))
all_outputs.append(outputs.data.cpu().numpy())
all_ids.extend(ids)
# print(all_outputs)
df = pd.DataFrame(
data=np.concatenate(all_outputs),
index=all_ids,
columns=map(str, range(N_CLASSES)))
df = mean_df(df)
print('probs: ', df.head(10))
#df.to_hdf(save_root, 'prob', index_label='id')
#print(f'Saved predictions to {out_path}')
return df
def binarize_prediction(probabilities, threshold: float, argsorted=None,
min_labels=1, max_labels=10):
""" Return matrix of 0/1 predictions, same shape as probabilities.
"""
assert probabilities.shape[1] == N_CLASSES
if argsorted is None:
argsorted = probabilities.argsort(axis=1)
max_mask = _make_mask(argsorted, max_labels)
min_mask = _make_mask(argsorted, min_labels)
prob_mask = probabilities > threshold
return (max_mask & prob_mask) | min_mask
def _make_mask(argsorted, top_n: int):
mask = np.zeros_like(argsorted, dtype=np.uint8)
col_indices = argsorted[:, -top_n:].reshape(-1)
row_indices = [i // top_n for i in range(len(col_indices))]
mask[row_indices, col_indices] = 1
return mask
predict_kwargs = dict(
image_transform = test_transform,
batch_size=16,
tta=2,
workers=0,
use_cuda=use_cuda
)
ss = pd.read_csv(DATA_ROOT/'sample_submission.csv')
if use_sample:
ss = ss.head(100)
print(ss.head())
sample_submission = pd.read_csv(
DATA_ROOT / 'sample_submission.csv', index_col='id')
def get_classes(item):
return ' '.join(cls for cls, is_present in item.items() if is_present)
dfs = []
predictions = []
folds = [0, 11, 2, 3, 4]
for fold in folds:
load_model(model, RUN_ROOT, fold, use_cuda)
out_path='test_'+str(fold)+'.h5'
df = predict(model, test_root, ss, out_path, **predict_kwargs)
df = df.reindex(sample_submission.index)
dfs.append(df)
print(dfs)
df = pd.concat(dfs)
print(df.head())
# average 5 folds
df = mean_df(df)
df[:] = binarize_prediction(df.values, threshold=0.11)
df = df.apply(get_classes, axis=1)
df.name = 'attribute_ids'
df.to_csv('submission.csv', header=True)
print(df.head())
print('Done!')