From: https://www.kaggle.com/axel81/imet-fastai-starter-resnet152-focal-loss
Author: Ram Ramrakhya
Score: 0.569
Simple baseline for iMet Collection 2019 competition using fastai v1
What to try next?
Refereed from original kernel here
import fastai
from fastai.vision import *
fastai.__version__
'1.0.51'
BATCH = 32
SIZE = 250
def seed_everything(seed=42):
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
seed_everything()
path = Path('../input/imet-2019-fgvc6/') # iMet data path
import os
os.listdir('../input/')
['resnet15216epochs', 'imet-2019-fgvc6', 'resnet152', 'densenet201', 'densenet121', 'resnet101']
# Making pretrained weights work without needing to find the default filename
from torch.utils import model_zoo
Path('models').mkdir(exist_ok=True)
!cp '../input/resnet15216epochs/stage-1.pth' 'models/resnet152.pth'
def load_url(*args, **kwargs):
model_dir = Path('models')
filename = 'resnet152.pth'
if not (model_dir/filename).is_file(): raise FileNotFoundError
return torch.load(model_dir/filename)
model_zoo.load_url = load_url
# Load train dataframe
train_df = pd.read_csv(path/'train.csv')
train_df.head()
id | attribute_ids | |
---|---|---|
0 | 1000483014d91860 | 147 616 813 |
1 | 1000fe2e667721fe | 51 616 734 813 |
2 | 1001614cb89646ee | 776 |
3 | 10041eb49b297c08 | 51 671 698 813 1092 |
4 | 100501c227f8beea | 13 404 492 903 1093 |
# Load labels dataframe
labels_df = pd.read_csv(path/'labels.csv')
labels_df.head()
attribute_id | attribute_name | |
---|---|---|
0 | 0 | culture::abruzzi |
1 | 1 | culture::achaemenid |
2 | 2 | culture::aegean |
3 | 3 | culture::afghan |
4 | 4 | culture::after british |
# Load sample submission
test_df = pd.read_csv(path/'sample_submission.csv')
test_df.head()
id | attribute_ids | |
---|---|---|
0 | 10023b2cc4ed5f68 | 0 1 2 |
1 | 100fbe75ed8fd887 | 0 1 2 |
2 | 101b627524a04f19 | 0 1 2 |
3 | 10234480c41284c6 | 0 1 2 |
4 | 1023b0e2636dcea8 | 0 1 2 |
tfms = get_transforms(do_flip=True, flip_vert=False, max_rotate=0.10, max_zoom=1.5, max_warp=0.2, max_lighting=0.2,
xtra_tfms=[(symmetric_warp(magnitude=(-0,0), p=0)),])
train, test = [ImageList.from_df(df, path=path, cols='id', folder=folder, suffix='.png')
for df, folder in zip([train_df, test_df], ['train', 'test'])]
data = (train.split_by_rand_pct(0.1, seed=42)
.label_from_df(cols='attribute_ids', label_delim=' ')
.add_test(test)
.transform(tfms, size=SIZE, resize_method=ResizeMethod.PAD, padding_mode='border',)
.databunch(path=Path('.'), bs=BATCH).normalize(imagenet_stats))
For problems with high class imbalance Focal Loss is usually a better choice than the usual Cross Entropy Loss.
# Source: https://www.kaggle.com/c/human-protein-atlas-image-classification/discussion/78109
class FocalLoss(nn.Module):
def __init__(self, gamma=2):
super().__init__()
self.gamma = gamma
def forward(self, logit, target):
target = target.float()
max_val = (-logit).clamp(min=0)
loss = logit - logit * target + max_val + \
((-max_val).exp() + (-logit - max_val).exp()).log()
invprobs = F.logsigmoid(-logit * (target * 2.0 - 1.0))
loss = (invprobs * self.gamma).exp() * loss
if len(loss.size())==2:
loss = loss.sum(dim=1)
return loss.mean()
learn = cnn_learner(data, base_arch=models.resnet152, loss_func=FocalLoss(), metrics=fbeta, pretrained=False)
learn.load('resnet152')
/opt/conda/lib/python3.6/site-packages/torch/serialization.py:251: UserWarning: Couldn't retrieve source code for container of type FocalLoss. It won't be checked for correctness upon loading. "type " + obj.__name__ + ". It won't be checked "
Learner(data=ImageDataBunch; Train: LabelList (98314 items) x: ImageList Image (3, 250, 250),Image (3, 250, 250),Image (3, 250, 250),Image (3, 250, 250),Image (3, 250, 250) y: MultiCategoryList 147;616;813,51;616;734;813,776,51;671;698;813;1092,13;404;492;903;1093 Path: ../input/imet-2019-fgvc6; Valid: LabelList (10923 items) x: ImageList Image (3, 250, 250),Image (3, 250, 250),Image (3, 250, 250),Image (3, 250, 250),Image (3, 250, 250) y: MultiCategoryList 872,147;542;733;813;1092,51;393;584;671;746;784;954,13;813;896,498;637;704 Path: ../input/imet-2019-fgvc6; Test: LabelList (7443 items) x: ImageList Image (3, 250, 250),Image (3, 250, 250),Image (3, 250, 250),Image (3, 250, 250),Image (3, 250, 250) y: EmptyLabelList ,,,, Path: ../input/imet-2019-fgvc6, model=Sequential( (0): Sequential( (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): ReLU(inplace) (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) (4): Sequential( (0): Bottleneck( (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) (downsample): Sequential( (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): Bottleneck( (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (2): Bottleneck( (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) ) (5): Sequential( (0): Bottleneck( (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) (downsample): Sequential( (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): Bottleneck( (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (2): Bottleneck( (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (3): Bottleneck( (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (4): Bottleneck( (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (5): Bottleneck( (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (6): Bottleneck( (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (7): Bottleneck( (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) ) (6): Sequential( (0): Bottleneck( (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) (downsample): Sequential( (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (2): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (3): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (4): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (5): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (6): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (7): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (8): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (9): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (10): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (11): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (12): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (13): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (14): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (15): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (16): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (17): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (18): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (19): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (20): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (21): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (22): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (23): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (24): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (25): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (26): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (27): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (28): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (29): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (30): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (31): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (32): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (33): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (34): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (35): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) ) (7): Sequential( (0): Bottleneck( (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) (downsample): Sequential( (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): Bottleneck( (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (2): Bottleneck( (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) ) ) (1): Sequential( (0): AdaptiveConcatPool2d( (ap): AdaptiveAvgPool2d(output_size=1) (mp): AdaptiveMaxPool2d(output_size=1) ) (1): Flatten() (2): BatchNorm1d(4096, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (3): Dropout(p=0.25) (4): Linear(in_features=4096, out_features=512, bias=True) (5): ReLU(inplace) (6): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (7): Dropout(p=0.5) (8): Linear(in_features=512, out_features=1103, bias=True) ) ), opt_func=functools.partial(<class 'torch.optim.adam.Adam'>, betas=(0.9, 0.99)), loss_func=FocalLoss(), metrics=[<function fbeta at 0x7fdb641ac048>], true_wd=True, bn_wd=True, wd=0.01, train_bn=True, path=PosixPath('.'), model_dir='models', callback_fns=[functools.partial(<class 'fastai.basic_train.Recorder'>, add_time=True)], callbacks=[], layer_groups=[Sequential( (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): ReLU(inplace) (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) (4): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (6): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (7): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (8): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (10): ReLU(inplace) (11): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (12): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (13): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (14): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (15): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (16): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (17): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (18): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (19): ReLU(inplace) (20): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (21): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (22): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (23): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (24): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (25): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (26): ReLU(inplace) (27): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (28): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (29): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (30): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (31): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (32): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (33): ReLU(inplace) (34): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False) (35): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (36): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (37): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (38): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (39): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (40): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (41): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (42): ReLU(inplace) (43): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (44): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (45): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (46): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (47): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (48): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (49): ReLU(inplace) (50): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (51): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (52): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (53): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (54): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (55): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (56): ReLU(inplace) (57): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (58): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (59): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (60): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (61): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (62): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (63): ReLU(inplace) (64): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (65): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (66): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (67): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (68): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (69): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (70): ReLU(inplace) (71): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (72): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (73): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (74): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (75): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (76): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (77): ReLU(inplace) (78): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (79): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (80): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (81): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (82): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (83): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (84): ReLU(inplace) ), Sequential( (0): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (4): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (5): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (6): ReLU(inplace) (7): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False) (8): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (9): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (10): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (11): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (12): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (13): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (14): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (15): ReLU(inplace) (16): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (17): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (18): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (19): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (20): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (21): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (22): ReLU(inplace) (23): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (24): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (25): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (26): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (27): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (28): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (29): ReLU(inplace) (30): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (31): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (32): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (33): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (34): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (35): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (36): ReLU(inplace) (37): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (38): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (39): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (40): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (41): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (42): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (43): ReLU(inplace) (44): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (45): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (46): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (47): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (48): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (49): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (50): ReLU(inplace) (51): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (52): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (53): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (54): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (55): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (56): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (57): ReLU(inplace) (58): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (59): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (60): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (61): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (62): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (63): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (64): ReLU(inplace) (65): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (66): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (67): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (68): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (69): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (70): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (71): ReLU(inplace) (72): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (73): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (74): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (75): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (76): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (77): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (78): ReLU(inplace) (79): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (80): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (81): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (82): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (83): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (84): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (85): ReLU(inplace) (86): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (87): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (88): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (89): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (90): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (91): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (92): ReLU(inplace) (93): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (94): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (95): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (96): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (97): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (98): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (99): ReLU(inplace) (100): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (101): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (102): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (103): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (104): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (105): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (106): ReLU(inplace) (107): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (108): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (109): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (110): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (111): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (112): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (113): ReLU(inplace) (114): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (115): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (116): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (117): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (118): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (119): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (120): ReLU(inplace) (121): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (122): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (123): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (124): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (125): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (126): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (127): ReLU(inplace) (128): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (129): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (130): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (131): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (132): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (133): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (134): ReLU(inplace) (135): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (136): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (137): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (138): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (139): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (140): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (141): ReLU(inplace) (142): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (143): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (144): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (145): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (146): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (147): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (148): ReLU(inplace) (149): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (150): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (151): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (152): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (153): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (154): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (155): ReLU(inplace) (156): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (157): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (158): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (159): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (160): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (161): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (162): ReLU(inplace) (163): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (164): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (165): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (166): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (167): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (168): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (169): ReLU(inplace) (170): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (171): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (172): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (173): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (174): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (175): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (176): ReLU(inplace) (177): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (178): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (179): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (180): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (181): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (182): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (183): ReLU(inplace) (184): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (185): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (186): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (187): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (188): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (189): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (190): ReLU(inplace) (191): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (192): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (193): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (194): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (195): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (196): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (197): ReLU(inplace) (198): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (199): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (200): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (201): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (202): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (203): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (204): ReLU(inplace) (205): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (206): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (207): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (208): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (209): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (210): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (211): ReLU(inplace) (212): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (213): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (214): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (215): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (216): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (217): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (218): ReLU(inplace) (219): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (220): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (221): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (222): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (223): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (224): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (225): ReLU(inplace) (226): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (227): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (228): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (229): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (230): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (231): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (232): ReLU(inplace) (233): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (234): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (235): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (236): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (237): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (238): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (239): ReLU(inplace) (240): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (241): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (242): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (243): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (244): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (245): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (246): ReLU(inplace) (247): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (248): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (249): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (250): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (251): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (252): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (253): ReLU(inplace) (254): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (255): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (256): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (257): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (258): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False) (259): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (260): ReLU(inplace) (261): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False) (262): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (263): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (264): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (265): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (266): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (267): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False) (268): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (269): ReLU(inplace) (270): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (271): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (272): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (273): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (274): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False) (275): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (276): ReLU(inplace) ), Sequential( (0): AdaptiveAvgPool2d(output_size=1) (1): AdaptiveMaxPool2d(output_size=1) (2): Flatten() (3): BatchNorm1d(4096, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (4): Dropout(p=0.25) (5): Linear(in_features=4096, out_features=512, bias=True) (6): ReLU(inplace) (7): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (8): Dropout(p=0.5) (9): Linear(in_features=512, out_features=1103, bias=True) )], add_time=True)
# Find a good learning rate
learn.lr_find()
learn.recorder.plot()
LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.
learn.unfreeze()
lr = 1e-2
learn.fit_one_cycle(10, slice(1e-5, 1e-3))
epoch | train_loss | valid_loss | fbeta | time |
---|---|---|---|---|
0 | 2.663224 | 17.611235 | 0.505424 | 37:00 |
learn.save('stage-1', return_path=True)
learn.export()
/opt/conda/lib/python3.6/site-packages/torch/serialization.py:251: UserWarning: Couldn't retrieve source code for container of type FocalLoss. It won't be checked for correctness upon loading. "type " + obj.__name__ + ". It won't be checked "
def find_best_fixed_threshold(preds, targs, do_plot=True):
score = []
thrs = np.arange(0, 0.5, 0.01)
for thr in progress_bar(thrs):
score.append(fbeta(valid_preds[0],valid_preds[1], thresh=thr))
score = np.array(score)
pm = score.argmax()
best_thr, best_score = thrs[pm], score[pm].item()
print(f'thr={best_thr:.3f}', f'F2={best_score:.3f}')
if do_plot:
plt.plot(thrs, score)
plt.vlines(x=best_thr, ymin=score.min(), ymax=score.max())
plt.text(best_thr+0.03, best_score-0.01, f'$F_{2}=${best_score:.3f}', fontsize=14);
plt.show()
return best_thr
i2c = np.array([[i, c] for c, i in learn.data.train_ds.y.c2i.items()]).astype(int) # indices to class number correspondence
def join_preds(preds, thr):
return [' '.join(i2c[np.where(t==1)[0],1].astype(str)) for t in (preds[0].sigmoid()>thr).long()]
# Validation predictions
valid_preds = learn.get_preds(DatasetType.Valid)
best_thr = find_best_fixed_threshold(*valid_preds)
thr=0.280 F2=0.565
# Test predictions
test_preds = learn.TTA(ds_type=DatasetType.Test)
test_df.attribute_ids = join_preds(test_preds, best_thr)
test_df.head()
id | attribute_ids | |
---|---|---|
0 | 10023b2cc4ed5f68 | 1059 369 587 766 |
1 | 100fbe75ed8fd887 | 1039 231 |
2 | 101b627524a04f19 | 420 624 784 79 835 |
3 | 10234480c41284c6 | 1046 13 147 480 483 725 738 776 813 830 923 |
4 | 1023b0e2636dcea8 | 1046 1092 147 156 227 322 584 671 813 954 |
test_df.to_csv('submission.csv', index=False)