fast.ai ootb (cutout+efficientnet)

From: https://www.kaggle.com/khursani8/fast-ai-ootb-cutout-efficientnet

Author: khursani

Score: 0.38584

Try

  • Use mixup
  • fp_16
  • Oversampling
  • cutout
  • efficientnet b3
In [1]:
from fastai.vision import *
from fastai.metrics import *
PATH = Path('../input')
In [2]:
ann_file = '../input/train2019.json'
with open(ann_file) as data_file:
        train_anns = json.load(data_file)

train_anns_df = pd.DataFrame(train_anns['annotations'])[['image_id','category_id']]
train_img_df = pd.DataFrame(train_anns['images'])[['id', 'file_name']].rename(columns={'id':'image_id'})
df_train_file_cat = pd.merge(train_img_df, train_anns_df, on='image_id')
df_train_file_cat['category_id']=df_train_file_cat['category_id'].astype(str)
df_train_file_cat = df_train_file_cat.drop(['image_id'],axis=1)
df_train_file_cat.head()
Out[2]:
file_name category_id
0 train_val2019/Plants/400/d1322d13ccd856eb4236c... 400
1 train_val2019/Plants/570/15edbc1e2ef000d8ace48... 570
2 train_val2019/Reptiles/167/c87a32e8927cbf4f06d... 167
3 train_val2019/Birds/254/9fcdd1d37e96d8fd94dfdc... 254
4 train_val2019/Plants/739/ffa06f951e99de9d220ae... 739
In [3]:
%%time
# Try Oversampling

res = None
sample_to = df_train_file_cat.category_id.value_counts().max() # which is 500

for grp in df_train_file_cat.groupby('category_id'):
    n = grp[1].shape[0]
    additional_rows = grp[1].sample(0 if sample_to < n  else sample_to - n, replace=True)
    rows = pd.concat((grp[1], additional_rows))
    
    if res is None: res = rows
    else: res = pd.concat((res, rows))
CPU times: user 11.3 s, sys: 20 ms, total: 11.3 s
Wall time: 11.3 s
In [4]:
res.category_id.value_counts()[:10]
Out[4]:
298    500
879    500
42     500
161    500
268    500
819    500
452    500
511    500
195    500
593    500
Name: category_id, dtype: int64
In [5]:
test_ann_file = '../input/test2019.json'
with open(test_ann_file) as data_file:
        test_anns = json.load(data_file)
test_img_df = pd.DataFrame(test_anns['images'])[['file_name','id']].rename(columns={'id':'image_id'})
test_img_df.head()
Out[5]:
file_name image_id
0 test2019/e295f3c7046b1f1e80c0301401324aa9.jpg 268243
1 test2019/ad3dcbb6846ed0b4dab58d7b1a4210ba.jpg 268244
2 test2019/e697be8e296b4b140cff4f96f85c364f.jpg 268245
3 test2019/7e7ba55e6aa26ba99e814d63b15d0121.jpg 268246
4 test2019/6cb6372079d23702511c06923970f13f.jpg 268247
In [6]:
src = (
ImageList.from_df(df=res,path=PATH/"train_val2019")
    .use_partial_data(0.3)
    .split_by_rand_pct(0.1)
    .label_from_df()
    .add_test(ImageList.from_df(df=test_img_df,path=PATH/"test2019"))
)
In [7]:
data = (
    src
    .transform(get_transforms(),size=128)
    .databunch(bs=64*2)
    .normalize(imagenet_stats)
)
In [8]:
!pip install efficientnet_pytorch
Collecting efficientnet_pytorch
  Downloading https://files.pythonhosted.org/packages/06/ff/881afd965c46b11fc6f3c8316de9e08d37fc3b71056dbab861b76faee6ca/efficientnet_pytorch-0.1.0-py3-none-any.whl
Requirement already satisfied: torch in /opt/conda/lib/python3.6/site-packages (from efficientnet_pytorch) (1.0.1.post2)
Installing collected packages: efficientnet-pytorch
Successfully installed efficientnet-pytorch-0.1.0
You are using pip version 19.0.3, however version 19.1.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.
In [9]:
from efficientnet_pytorch import EfficientNet
In [10]:
model_name = 'efficientnet-b3'
def getModel(pret):
    model = EfficientNet.from_pretrained(model_name)
#     model._bn1 = nn.Identity()
    model._fc = nn.Linear(1536,data.c)
    return model
In [11]:
# learn = cnn_learner(data,models.densenet201,metrics=[error_rate],model_dir='/kaggle/working',pretrained=True,loss_func=LabelSmoothingCrossEntropy()).mixup()
In [12]:
learn = Learner(data,getModel(False),metrics=[error_rate],model_dir='/kaggle/working',loss_func=LabelSmoothingCrossEntropy()).mixup().to_fp16()
Downloading: "http://storage.googleapis.com/public-models/efficientnet-b3-c8376fa2.pth" to /tmp/.torch/models/efficientnet-b3-c8376fa2.pth
49380963it [00:00, 152758227.72it/s]
Loaded pretrained weights for efficientnet-b3
In [13]:
learn.lr_find()
learn.recorder.plot()
LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.
In [14]:
learn.fit_one_cycle(3,1e-3)
66.67% [2/3 1:39:07<49:33]
epoch train_loss valid_loss error_rate time
0 4.651476 3.750852 0.711023 49:33
1 3.969582 2.883739 0.512937 49:33

43.76% [466/1065 19:53<25:33 3.6636]
In [15]:
SZ=224
cutout_frac = 0.25
p_cutout = 0.75
cutout_sz = round(SZ*cutout_frac)
cutout_tfm = cutout(n_holes=(1,1), length=(cutout_sz, cutout_sz), p=p_cutout)
In [16]:
learn.data = (
    src
    .transform(get_transforms(xtra_tfms=[cutout_tfm]),size=SZ)
    .databunch(bs=64)
    .normalize(imagenet_stats)
)
In [17]:
learn.fit_one_cycle(7,1e-3)
0.00% [0/7 00:00<00:00]
epoch train_loss valid_loss error_rate time

Interrupted
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-17-2a1f08144d6a> in <module>()
----> 1 learn.fit_one_cycle(7,1e-3)

/opt/conda/lib/python3.6/site-packages/fastai/train.py in fit_one_cycle(learn, cyc_len, max_lr, moms, div_factor, pct_start, final_div, wd, callbacks, tot_epochs, start_epoch)
     20     callbacks.append(OneCycleScheduler(learn, max_lr, moms=moms, div_factor=div_factor, pct_start=pct_start,
     21                                        final_div=final_div, tot_epochs=tot_epochs, start_epoch=start_epoch))
---> 22     learn.fit(cyc_len, max_lr, wd=wd, callbacks=callbacks)
     23 
     24 def lr_find(learn:Learner, start_lr:Floats=1e-7, end_lr:Floats=10, num_it:int=100, stop_div:bool=True, wd:float=None):

/opt/conda/lib/python3.6/site-packages/fastai/basic_train.py in fit(self, epochs, lr, wd, callbacks)
    194         callbacks = [cb(self) for cb in self.callback_fns] + listify(callbacks)
    195         if defaults.extra_callbacks is not None: callbacks += defaults.extra_callbacks
--> 196         fit(epochs, self, metrics=self.metrics, callbacks=self.callbacks+callbacks)
    197 
    198     def create_opt(self, lr:Floats, wd:Floats=0.)->None:

/opt/conda/lib/python3.6/site-packages/fastai/basic_train.py in fit(epochs, learn, callbacks, metrics)
     98             for xb,yb in progress_bar(learn.data.train_dl, parent=pbar):
     99                 xb, yb = cb_handler.on_batch_begin(xb, yb)
--> 100                 loss = loss_batch(learn.model, xb, yb, learn.loss_func, learn.opt, cb_handler)
    101                 if cb_handler.on_batch_end(loss): break
    102 

/opt/conda/lib/python3.6/site-packages/fastai/basic_train.py in loss_batch(model, xb, yb, loss_func, opt, cb_handler)
     23     if not is_listy(xb): xb = [xb]
     24     if not is_listy(yb): yb = [yb]
---> 25     out = model(*xb)
     26     out = cb_handler.on_loss_begin(out)
     27 

/opt/conda/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    487             result = self._slow_forward(*input, **kwargs)
    488         else:
--> 489             result = self.forward(*input, **kwargs)
    490         for hook in self._forward_hooks.values():
    491             hook_result = hook(self, input, result)

/opt/conda/lib/python3.6/site-packages/efficientnet_pytorch/model.py in forward(self, inputs)
    167 
    168         # Convolution layers
--> 169         x = self.extract_features(inputs)
    170 
    171         # Head

/opt/conda/lib/python3.6/site-packages/efficientnet_pytorch/model.py in extract_features(self, inputs)
    152 
    153         # Stem
--> 154         x = relu_fn(self._bn0(self._conv_stem(inputs)))
    155 
    156         # Blocks

/opt/conda/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    487             result = self._slow_forward(*input, **kwargs)
    488         else:
--> 489             result = self.forward(*input, **kwargs)
    490         for hook in self._forward_hooks.values():
    491             hook_result = hook(self, input, result)

/opt/conda/lib/python3.6/site-packages/efficientnet_pytorch/utils.py in forward(self, x)
     91         if pad_h > 0 or pad_w > 0:
     92             x = F.pad(x, [pad_w//2, pad_w - pad_w//2, pad_h//2, pad_h - pad_h//2])
---> 93         return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
     94 
     95 

RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.cuda.HalfTensor) should be the same
In [18]:
learn.save('cutout-efficient')
In [19]:
# learn.unfreeze()
# learn.fit_one_cycle(8,slice(1e-6,1e-4))
In [20]:
preds,y = learn.get_preds(DatasetType.Test)
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-20-dba8a276a455> in <module>()
----> 1 preds,y = learn.get_preds(DatasetType.Test)

/opt/conda/lib/python3.6/site-packages/fastai/basic_train.py in get_preds(self, ds_type, with_loss, n_batch, pbar)
    332         lf = self.loss_func if with_loss else None
    333         return get_preds(self.model, self.dl(ds_type), cb_handler=CallbackHandler(self.callbacks),
--> 334                          activ=_loss_func2activ(self.loss_func), loss_func=lf, n_batch=n_batch, pbar=pbar)
    335 
    336     def pred_batch(self, ds_type:DatasetType=DatasetType.Valid, batch:Tuple=None, reconstruct:bool=False) -> List[Tensor]:

/opt/conda/lib/python3.6/site-packages/fastai/basic_train.py in get_preds(model, dl, pbar, cb_handler, activ, loss_func, n_batch)
     41     "Tuple of predictions and targets, and optional losses (if `loss_func`) using `dl`, max batches `n_batch`."
     42     res = [torch.cat(o).cpu() for o in
---> 43            zip(*validate(model, dl, cb_handler=cb_handler, pbar=pbar, average=False, n_batch=n_batch))]
     44     if loss_func is not None:
     45         with NoneReduceOnCPU(loss_func) as lf: res.append(lf(res[0], res[1]))

/opt/conda/lib/python3.6/site-packages/fastai/basic_train.py in validate(model, dl, loss_func, cb_handler, pbar, average, n_batch)
     56         for xb,yb in progress_bar(dl, parent=pbar, leave=(pbar is not None)):
     57             if cb_handler: xb, yb = cb_handler.on_batch_begin(xb, yb, train=False)
---> 58             val_loss = loss_batch(model, xb, yb, loss_func, cb_handler=cb_handler)
     59             val_losses.append(val_loss)
     60             if not is_listy(yb): yb = [yb]

/opt/conda/lib/python3.6/site-packages/fastai/basic_train.py in loss_batch(model, xb, yb, loss_func, opt, cb_handler)
     23     if not is_listy(xb): xb = [xb]
     24     if not is_listy(yb): yb = [yb]
---> 25     out = model(*xb)
     26     out = cb_handler.on_loss_begin(out)
     27 

/opt/conda/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    487             result = self._slow_forward(*input, **kwargs)
    488         else:
--> 489             result = self.forward(*input, **kwargs)
    490         for hook in self._forward_hooks.values():
    491             hook_result = hook(self, input, result)

/opt/conda/lib/python3.6/site-packages/efficientnet_pytorch/model.py in forward(self, inputs)
    167 
    168         # Convolution layers
--> 169         x = self.extract_features(inputs)
    170 
    171         # Head

/opt/conda/lib/python3.6/site-packages/efficientnet_pytorch/model.py in extract_features(self, inputs)
    152 
    153         # Stem
--> 154         x = relu_fn(self._bn0(self._conv_stem(inputs)))
    155 
    156         # Blocks

/opt/conda/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    487             result = self._slow_forward(*input, **kwargs)
    488         else:
--> 489             result = self.forward(*input, **kwargs)
    490         for hook in self._forward_hooks.values():
    491             hook_result = hook(self, input, result)

/opt/conda/lib/python3.6/site-packages/efficientnet_pytorch/utils.py in forward(self, x)
     91         if pad_h > 0 or pad_w > 0:
     92             x = F.pad(x, [pad_w//2, pad_w - pad_w//2, pad_h//2, pad_h - pad_h//2])
---> 93         return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
     94 
     95 

RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.cuda.HalfTensor) should be the same
In [21]:
results = torch.topk(preds,5)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-21-72b74067e8c1> in <module>()
----> 1 results = torch.topk(preds,5)

NameError: name 'preds' is not defined
In [22]:
out = []
for i in results[1].numpy():
    temp = ""
    for j in i:
        temp += (" "+str(data.classes[j])) 
    out.append(temp)
# print(out)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-22-e55f5aed5bfc> in <module>()
      1 out = []
----> 2 for i in results[1].numpy():
      3     temp = ""
      4     for j in i:
      5         temp += (" "+str(data.classes[j]))

NameError: name 'results' is not defined
In [23]:
sam_sub_df = pd.read_csv('../input/kaggle_sample_submission.csv')
# sam_sub_df.head()
sam_sub_df["predicted"] = out
sam_sub_df.head()
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-23-fb66247ff237> in <module>()
      1 sam_sub_df = pd.read_csv('../input/kaggle_sample_submission.csv')
      2 # sam_sub_df.head()
----> 3 sam_sub_df["predicted"] = out
      4 sam_sub_df.head()

/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in __setitem__(self, key, value)
   3117         else:
   3118             # set column
-> 3119             self._set_item(key, value)
   3120 
   3121     def _setitem_slice(self, key, value):

/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in _set_item(self, key, value)
   3192 
   3193         self._ensure_valid_index(value)
-> 3194         value = self._sanitize_column(key, value)
   3195         NDFrame._set_item(self, key, value)
   3196 

/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in _sanitize_column(self, key, value, broadcast)
   3389 
   3390             # turn me into an ndarray
-> 3391             value = _sanitize_index(value, self.index, copy=False)
   3392             if not isinstance(value, (np.ndarray, Index)):
   3393                 if isinstance(value, list) and len(value) > 0:

/opt/conda/lib/python3.6/site-packages/pandas/core/series.py in _sanitize_index(data, index, copy)
   3999 
   4000     if len(data) != len(index):
-> 4001         raise ValueError('Length of values does not match length of ' 'index')
   4002 
   4003     if isinstance(data, ABCIndexClass) and not copy:

ValueError: Length of values does not match length of index
In [24]:
sam_sub_df.to_csv("submission.csv",index=False)