Pytorch Resnet50 starter

From: https://www.kaggle.com/gskdhiman/pytorch-resnet50-starter

Author: Gursewak Dhiman

Score: 0.449

In [1]:
import os
import pandas as pd
import numpy as np
from random import seed
from random import randint

import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models

from PIL import Image
from matplotlib import pyplot as plt
%matplotlib inline

input_dir = os.path.join('..','input','imet-2019-fgvc6')
train_dir = os.path.join(input_dir,'train')
test_dir  = os.path.join(input_dir,'test')
labels_csv= os.path.join(input_dir,'labels.csv')
train_csv = os.path.join(input_dir,'train.csv')
resnet_weights_path = os.path.join('..','input','resnet50','resnet50.pth')
In [2]:
BATCH_SIZE = 128
NUM_EPOCHS = 20
PERCENTILE = 99.7
LEARNING_RATE = 0.002
In [3]:
df = pd.read_csv(labels_csv)
attribute_dict = dict(zip(df.attribute_id,df.attribute_name))
del df,labels_csv
In [4]:
tag_count = 0 
culture_count = 0
for idx,data in attribute_dict.items():
    if data.split("::")[0] == 'tag':
        tag_count+=1
    if data.split("::")[0] == 'culture':
        culture_count+=1
print('total_categories: {0}\ntag_categories: {1} \nculture_categories: {2} ' \
      .format(len(attribute_dict),tag_count,culture_count))
#cross check your results
assert tag_count+culture_count == len(attribute_dict)
total_categories: 1103
tag_categories: 705 
culture_categories: 398 
In [5]:
df = pd.read_csv(train_csv)
labels_dict = dict(zip(df.id,df.attribute_ids))
In [6]:
idx = len(os.listdir(train_dir))
number = randint(0,idx)
image_name = os.listdir(train_dir)[number]
def imshow(image):
    """Display image"""
    plt.figure(figsize=(6, 6))
    plt.imshow(image)
    plt.show()
# Example image
x = Image.open(os.path.join(train_dir,image_name))
for i in labels_dict[os.listdir(train_dir)[number].split('.')[0]].split():
    print(attribute_dict[int(i)])
np.array(x).shape
imshow(x)
culture::japan
tag::women
In [7]:
# need to add more transforms here
data_transforms = transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
    ])

Custom Dataset class

In [8]:
from collections import Counter
from torch.utils import data
class ImageData(data.Dataset):
    def __init__(self,df,dirpath,transform,test = False):
        self.df = df
        self.test = test
        self.dirpath = dirpath
        self.conv_to_tensor = transform
        #image data 
        if not self.test:
            self.image_arr = np.asarray(str(self.dirpath)+'/'+self.df.iloc[:, 0]+'.png')
        else:
            self.image_arr = np.asarray(str(self.dirpath)+'/'+self.df.iloc[:, 0])
        
        #labels data
        if not self.test:
            data_labels = self.df.iloc[:,1].str.split().apply(lambda x: [int(i) for i in x]).apply(Counter)
            data = pd.DataFrame.from_records(data_labels).fillna(value=0)
            self.label_arr = np.asarray(data)
        
        # Calculate length of df
        self.data_len = len(self.df.index)

    def __len__(self):
        return self.data_len
    
    def __getitem__(self, idx):
        image_name = self.image_arr[idx]
        img = Image.open(image_name)
        img_tensor = self.conv_to_tensor(img)
        if not self.test:
            image_label = self.label_arr[idx]
            image_label = torch.tensor(image_label,dtype= torch.float32)
            return (img_tensor,image_label)
        return (img_tensor)
In [9]:
train_df = pd.read_csv(train_csv)
train_dataset = ImageData(train_df,train_dir,data_transforms)
train_loader = data.DataLoader(dataset=train_dataset,batch_size=BATCH_SIZE,shuffle=False)

test_df = pd.DataFrame(os.listdir(test_dir))
test_dataset = ImageData(test_df,test_dir,data_transforms,test = True)
test_loader = data.DataLoader(dataset=test_dataset,batch_size=BATCH_SIZE,shuffle=False)
In [10]:
print("************Train*****************")
features, labels = next(iter(train_loader))
print(f'Features Size: {features.shape}\nLabels Size: {labels.shape}')
print()
print("************Test*****************")
features = next(iter(test_loader))
print(f'Features Size: {features.shape}\n')
************Train*****************
Features Size: torch.Size([128, 3, 224, 224])
Labels Size: torch.Size([128, 1103])

************Test*****************
Features Size: torch.Size([128, 3, 224, 224])

In [11]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device
Out[11]:
device(type='cuda', index=0)

Model Using Resnet50

In [12]:
resnet_cls = models.resnet50()
resnet_cls.load_state_dict(torch.load(resnet_weights_path))

class AvgPool(nn.Module):
    def forward(self, x):
        return F.avg_pool2d(x, x.shape[2:])
    
class ResNet50(nn.Module):
    def __init__(self,num_outputs):
        super(ResNet50,self).__init__()
        self.resnet = resnet_cls
        self.resnet.avgpool = AvgPool()
        self.resnet.fc = nn.Linear(2048, num_outputs)
        for param in self.resnet.parameters():
            param.requires_grad = False

        for param in self.resnet.layer4.parameters():
            param.requires_grad = True

        for param in self.resnet.fc.parameters():
            param.requires_grad = True
            
    def forward(self,x):
        out = self.resnet(x)
        return out
In [13]:
output_dim = len(attribute_dict) 
NeuralNet = ResNet50(num_outputs = output_dim) 
In [14]:
total_params = sum(p.numel() for p in NeuralNet.parameters())
print(f'{total_params:,} total parameters.')
total_trainable_params = sum(p.numel() for p in NeuralNet.parameters() if p.requires_grad)
print(f'{total_trainable_params:,} training parameters.')
25,768,079 total parameters.
17,224,783 training parameters.
In [15]:
print("training examples: ",len(train_dataset))
print("batch size: ",BATCH_SIZE)
print("batches available: ",len(train_loader))
training examples:  109237
batch size:  128
batches available:  854

Train the Model

In [16]:
import time
from torch import optim
NeuralNet = NeuralNet.to(device)
loss_func = torch.nn.BCEWithLogitsLoss()
optimizer = optim.Adam(NeuralNet.parameters() ,lr = LEARNING_RATE)
from tqdm import tqdm_notebook as tqdm
total_loss = {}
for epoch in range(NUM_EPOCHS):
    start_time = time.time()
    NeuralNet.train()
    avg_loss = 0. 
    avg_loss_history = {}
    i = 0
    for images_batch, labels_batch in tqdm(train_loader):
        images_batch = images_batch.to(device)
        labels_batch = labels_batch.to(device)
        pred_batch = NeuralNet(images_batch)
        loss = loss_func(pred_batch, labels_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        avg_loss += loss.item() / len(train_loader)
        if i%250 == 0: 
            print(avg_loss)
            avg_loss_history[i]=avg_loss
        i+=1
    print("Epoch: {}/{} | loss: {} | time: {:.3f}sec".format(epoch+1,NUM_EPOCHS,avg_loss,time.time()-start_time))
    total_loss[epoch] =  avg_loss_history   
    
# total_loss dict can be used for loss visualization
0.000822587789361315
0.005104648626564326
0.008426213607549731
0.011510890205139932

Epoch: 1/20 | loss: 0.012730456577871321 | time: 1267.988sec
1.2587236820674333e-05
0.0028839116268278703
0.005666550590200851
0.008365670910467173

Epoch: 2/20 | loss: 0.009446554939399984 | time: 1265.157sec
1.1255504428223648e-05
0.002582797529808391
0.0051023387988108826
0.007553324041194529

Epoch: 3/20 | loss: 0.008533664118057975 | time: 1271.659sec
1.025486484484036e-05
0.002345847415757437
0.004635426674400227
0.006854893596062054

Epoch: 4/20 | loss: 0.007739207403084361 | time: 1250.236sec
9.265696669141359e-06
0.002110298088832432
0.0041710946379574015
0.006163859285689034

Epoch: 5/20 | loss: 0.0069552415870653495 | time: 1245.020sec
8.356240545279527e-06
0.001888960200215774
0.0037389542308769014
0.005529996403002237

Epoch: 6/20 | loss: 0.0062435360830407965 | time: 1279.448sec
7.489481308351356e-06
0.0016956274665944468

Predictions from the model

In [17]:
NeuralNet.eval()
predictions = np.zeros((len(test_dataset), output_dim))
i = 0
for test_batch in tqdm(test_loader):
    test_batch = test_batch.to(device)
    batch_prediction = NeuralNet(test_batch).detach().cpu().numpy()
    predictions[i * BATCH_SIZE:(i+1) * BATCH_SIZE, :] = batch_prediction
    i+=1

Generating submission

In [18]:
"""credits to https://www.kaggle.com/ateplyuk/keras-starter"""
"""this is little modified version of above with tqdm and percentile addition"""
"""np.where will be much faster than this"""

import operator
predicted_class_idx = []
for i in tqdm(range(len(predictions))):         
    d = {}
    for index, value in enumerate(predictions[i]):               
        if value > np.percentile(predictions[i],PERCENTILE):            
            d[index] = value 
    sorted_d = sorted(d.items(), key=operator.itemgetter(1), reverse=True)
    
    predicted_class_idx.append([i[0] for i in sorted_d[:10]])

In [19]:
test_df['attribute_ids'] = predicted_class_idx
test_df['attribute_ids'] = test_df['attribute_ids'].apply(lambda x : ' '.join(map(str,list(x))))
test_df = test_df.rename(columns={0: 'id'})
test_df['id'] = test_df['id'].apply(lambda x : x.split('.')[0])
test_df.head()
Out[19]:
id attribute_ids
0 b7bb1254bf81c71e 1062 79 1059 194
1 a6689c35fce7a4ea 1059 962 575 738
2 d0801eed43ebf742 949 25 1092 147
3 8cbcbba676d26bec 612 1046 671 369
4 b899b628ae6db634 1039 974 147 813
In [20]:
test_df.to_csv('submission.csv',index = False)