Using Pytorch and VGG

From: https://www.kaggle.com/koushikcon/using-pytorch-and-vgg

Author: Koushik

Score: 0.235

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import torch
import cv2
from glob import glob
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image, ImageFile
from torch.utils.data import TensorDataset, DataLoader,Dataset
from sklearn.model_selection import train_test_split
import copy 

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
ImageFile.LOAD_TRUNCATED_IMAGES = True
#print(os.listdir("../input"))
#../input/train/train
#../input/train/test

# Any results you write to the current directory are saved as output.
use_cuda = torch.cuda.is_available()
if not use_cuda:
    print('No GPU found. Please use a GPU to train your neural network.')
In [2]:
df_train = pd.read_csv('../input/train.csv')
df_label = pd.read_csv('../input/labels.csv')
df_test = pd.read_csv('../input/sample_submission.csv')
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-2-03015dfaa45b> in <module>()
----> 1 df_train = pd.read_csv('../input/train.csv')
      2 df_label = pd.read_csv('../input/labels.csv')
      3 df_test = pd.read_csv('../input/sample_submission.csv')

/opt/conda/lib/python3.6/site-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, doublequote, delim_whitespace, low_memory, memory_map, float_precision)
    676                     skip_blank_lines=skip_blank_lines)
    677 
--> 678         return _read(filepath_or_buffer, kwds)
    679 
    680     parser_f.__name__ = name

/opt/conda/lib/python3.6/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    438 
    439     # Create the parser.
--> 440     parser = TextFileReader(filepath_or_buffer, **kwds)
    441 
    442     if chunksize or iterator:

/opt/conda/lib/python3.6/site-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
    785             self.options['has_index_names'] = kwds['has_index_names']
    786 
--> 787         self._make_engine(self.engine)
    788 
    789     def close(self):

/opt/conda/lib/python3.6/site-packages/pandas/io/parsers.py in _make_engine(self, engine)
   1012     def _make_engine(self, engine='c'):
   1013         if engine == 'c':
-> 1014             self._engine = CParserWrapper(self.f, **self.options)
   1015         else:
   1016             if engine == 'python':

/opt/conda/lib/python3.6/site-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
   1706         kwds['usecols'] = self.usecols
   1707 
-> 1708         self._reader = parsers.TextReader(src, **kwds)
   1709 
   1710         passed_names = self.names is None

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.__cinit__()

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source()

FileNotFoundError: File b'../input/train.csv' does not exist
In [3]:
df_train = df_train[:50000]
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-3-f38bc410c923> in <module>()
----> 1 df_train = df_train[:50000]

NameError: name 'df_train' is not defined
In [4]:
label_names = df_label['attribute_name'].values
label_id = df_label['attribute_id'].values
train_labels = np.zeros((df_train.shape[0], len(label_names)))

#for row_index, row in enumerate(df_train['attribute_ids']):
    #for label in row.split():
        #train_labels[row_index, int(label)] = 1

#print(train_labels[:20][:50])
#for col in range(len(label_names)):
    #df_train[col] = 0
    
#df_train[label_id] = train_labels
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-4-2137db1fead0> in <module>()
----> 1 label_names = df_label['attribute_name'].values
      2 label_id = df_label['attribute_id'].values
      3 train_labels = np.zeros((df_train.shape[0], len(label_names)))
      4 
      5 #for row_index, row in enumerate(df_train['attribute_ids']):

NameError: name 'df_label' is not defined
In [5]:
#df_test = df_train[:5]
df_test.head()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-5-1182075d9e69> in <module>()
      1 #df_test = df_train[:5]
----> 2 df_test.head()

NameError: name 'df_test' is not defined
In [6]:
# NOTE: class is inherited from Dataset
class ImageLabelDataset(Dataset):
    def __init__(self, df_data, prediction, folder="../input"):
        super().__init__()
        self.df = df_data.values
        self.prediction = prediction.values
        self.folder = folder

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        tensorimage = self.preprocess_image(self.df[index])
        label = self.prediction[index]
        label_tensor = self.get_dummies(label)
        #x = np.squeeze(label_tensor.detach().cpu().numpy())
        #print(x.argsort()[-6:][::-1])
        return [tensorimage, label_tensor]
    
    def preprocess_image(self, img_path):
        data_transform = transforms.Compose([transforms.ToPILImage(),
                                             transforms.Resize(224), 
                                             transforms.CenterCrop(224), 
                                             transforms.RandomRotation(30), 
                                             transforms.ToTensor()
                                            ])
        image = cv2.imread("{}/{}.png".format(self.folder, img_path))
        image = data_transform(image)
        return image
    
    def get_dummies(self, attribute_id):
        label_tensor = torch.zeros((1, 1103))
        for label in attribute_id.split():
            label_tensor[0, int(label)] = 1
        return label_tensor
In [7]:
# df_train.head()
batch_size = 1
train_image = df_train["id"]
#target = df_train.drop(['id', 'attribute_ids'],axis=1)
target = df_train["attribute_ids"]
X_train, X_val, y_train, y_val = train_test_split(train_image, target, random_state=42, test_size=0.1)


test_image = df_test["id"]
test_target = df_test["attribute_ids"]
#test_target = df_test.drop(['id', 'attribute_ids'],axis=1)

train_set = ImageLabelDataset(df_data=X_train, prediction=y_train, folder="../input/train")
val_set = ImageLabelDataset(df_data=X_val, prediction=y_val, folder="../input/train")
predict_set = ImageLabelDataset(df_data=test_image, prediction=test_target, folder="../input/test")

train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=1)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=True, num_workers=1)
test_loader = torch.utils.data.DataLoader(predict_set, batch_size=1, num_workers=0)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-7-c992162f0cc6> in <module>()
      1 # df_train.head()
      2 batch_size = 1
----> 3 train_image = df_train["id"]
      4 #target = df_train.drop(['id', 'attribute_ids'],axis=1)
      5 target = df_train["attribute_ids"]

NameError: name 'df_train' is not defined
In [8]:
#pd.set_option('display.max_columns', None)
#pd.set_option('display.max_rows', None)
#print(y_train.head())
In [9]:
# Hyperparameters
n_output=1
# Number of Epochs
num_epochs = 2
# Learning Rate
learning_rate = 0.0001
# Model parameters

# Show stats for every n number of batches
show_every_n_batches = 1
In [10]:
def train_rnn(model, batch_size, optimizer, criterion, n_epochs, show_every_n_batches=100):
    train_loss = 0
    valid_loss = 0
    batch_losses = []
    val_batch_losses = []
    valid_loss_min = np.Inf
    
    model.train()
    
    previousLoss = np.Inf
    minLoss = np.Inf

    print("Training for %d epoch(s)..." % n_epochs)
    for epoch_i in range(1, n_epochs + 1):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            # clear the gradients of all optimized variables
            target = np.squeeze(target)
            target = target.view(1,-1)
            #target= target.float()
            #print(np.squeeze(target))
            #targs = target.view(-1).long()
            # print(np.squeeze(target))
            optimizer.zero_grad()
            # forward pass: Train compute predicted outputs by passing inputs to the model
            output = model(data)
            # calculate the Train batch loss      
            loss = criterion(output, target)
            # backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()
            # perform a single optimization step (parameter update)
            optimizer.step()
            # update training loss
            ## find the loss and update the model parameters accordingly
            ## record the average training loss, using something like
            #train_loss += ((1 / (batch_idx + 1)) * (loss.item() - train_loss))
            train_loss += loss.item() / len(train_loader)
            
        model.eval()
        for batch_idx, (data, target) in enumerate(val_loader):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            ## update the average validation loss
            # forward pass: compute predicted outputs by passing inputs to the model
            target = np.squeeze(target)
            target = target.view(1,-1)
            
            output = model(data)
            # calculate the Val batch loss
            loss = criterion(output, target)
            # update average validation loss 
            #val_batch_losses.append(loss.item())
            #valid_loss += ((1 / (batch_idx + 1)) * (loss.item() - valid_loss))
            valid_loss += loss.item() / len(val_loader)
        
        #valid_loss = valid_loss/len(val_loader.dataset)
        #train_loss = train_loss/len(train_loader.dataset)
        
        # print training/validation statistics 
        if epoch_i%show_every_n_batches == 0:
            print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(epoch_i, train_loss, valid_loss))
        
            ## TODO: save the model if validation loss has decreased
            # save model if validation loss has decreased
            if valid_loss < valid_loss_min:
                print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
                valid_loss_min,
                valid_loss))
                with open('trained_rnn_new', 'wb') as pickle_file:
                    torch.save(model.state_dict(), 'trained_rnn_new')
                valid_loss_min = valid_loss
                train_loss = 0
                valid_loss = 0
                #batch_losses = []
                #val_batch_losses = []
  
    return model
In [11]:
model_transfer = models.vgg16(pretrained=True)
print(model_transfer)
Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /tmp/.torch/models/vgg16-397923af.pth
---------------------------------------------------------------------------
gaierror                                  Traceback (most recent call last)
/opt/conda/lib/python3.6/site-packages/urllib3/connection.py in _new_conn(self)
    140             conn = connection.create_connection(
--> 141                 (self.host, self.port), self.timeout, **extra_kw)
    142 

/opt/conda/lib/python3.6/site-packages/urllib3/util/connection.py in create_connection(address, timeout, source_address, socket_options)
     59 
---> 60     for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
     61         af, socktype, proto, canonname, sa = res

/opt/conda/lib/python3.6/socket.py in getaddrinfo(host, port, family, type, proto, flags)
    744     addrlist = []
--> 745     for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
    746         af, socktype, proto, canonname, sa = res

gaierror: [Errno -3] Temporary failure in name resolution

During handling of the above exception, another exception occurred:

NewConnectionError                        Traceback (most recent call last)
/opt/conda/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    600                                                   body=body, headers=headers,
--> 601                                                   chunked=chunked)
    602 

/opt/conda/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    345         try:
--> 346             self._validate_conn(conn)
    347         except (SocketTimeout, BaseSSLError) as e:

/opt/conda/lib/python3.6/site-packages/urllib3/connectionpool.py in _validate_conn(self, conn)
    849         if not getattr(conn, 'sock', None):  # AppEngine might not have  `.sock`
--> 850             conn.connect()
    851 

/opt/conda/lib/python3.6/site-packages/urllib3/connection.py in connect(self)
    283         # Add certificate verification
--> 284         conn = self._new_conn()
    285 

/opt/conda/lib/python3.6/site-packages/urllib3/connection.py in _new_conn(self)
    149             raise NewConnectionError(
--> 150                 self, "Failed to establish a new connection: %s" % e)
    151 

NewConnectionError: <urllib3.connection.VerifiedHTTPSConnection object at 0x7f392d156550>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution

During handling of the above exception, another exception occurred:

MaxRetryError                             Traceback (most recent call last)
/opt/conda/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
    448                     retries=self.max_retries,
--> 449                     timeout=timeout
    450                 )

/opt/conda/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    638             retries = retries.increment(method, url, error=e, _pool=self,
--> 639                                         _stacktrace=sys.exc_info()[2])
    640             retries.sleep()

/opt/conda/lib/python3.6/site-packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
    387         if new_retry.is_exhausted():
--> 388             raise MaxRetryError(_pool, url, error or ResponseError(cause))
    389 

MaxRetryError: HTTPSConnectionPool(host='download.pytorch.org', port=443): Max retries exceeded with url: /models/vgg16-397923af.pth (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x7f392d156550>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution',))

During handling of the above exception, another exception occurred:

ConnectionError                           Traceback (most recent call last)
<ipython-input-11-94d2d8de556a> in <module>()
----> 1 model_transfer = models.vgg16(pretrained=True)
      2 print(model_transfer)

/opt/conda/lib/python3.6/site-packages/torchvision/models/vgg.py in vgg16(pretrained, **kwargs)
    150     model = VGG(make_layers(cfg['D']), **kwargs)
    151     if pretrained:
--> 152         model.load_state_dict(model_zoo.load_url(model_urls['vgg16']))
    153     return model
    154 

/opt/conda/lib/python3.6/site-packages/torch/utils/model_zoo.py in load_url(url, model_dir, map_location, progress)
     64         sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file))
     65         hash_prefix = HASH_REGEX.search(filename).group(1)
---> 66         _download_url_to_file(url, cached_file, hash_prefix, progress=progress)
     67     return torch.load(cached_file, map_location=map_location)
     68 

/opt/conda/lib/python3.6/site-packages/torch/utils/model_zoo.py in _download_url_to_file(url, dst, hash_prefix, progress)
     71     file_size = None
     72     if requests_available:
---> 73         u = urlopen(url, stream=True)
     74         if hasattr(u.headers, "Content-Length"):
     75             file_size = int(u.headers["Content-Length"])

/opt/conda/lib/python3.6/site-packages/requests/api.py in get(url, params, **kwargs)
     73 
     74     kwargs.setdefault('allow_redirects', True)
---> 75     return request('get', url, params=params, **kwargs)
     76 
     77 

/opt/conda/lib/python3.6/site-packages/requests/api.py in request(method, url, **kwargs)
     58     # cases, and look like a memory leak in others.
     59     with sessions.Session() as session:
---> 60         return session.request(method=method, url=url, **kwargs)
     61 
     62 

/opt/conda/lib/python3.6/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
    531         }
    532         send_kwargs.update(settings)
--> 533         resp = self.send(prep, **send_kwargs)
    534 
    535         return resp

/opt/conda/lib/python3.6/site-packages/requests/sessions.py in send(self, request, **kwargs)
    644 
    645         # Send the request
--> 646         r = adapter.send(request, **kwargs)
    647 
    648         # Total elapsed time of the request (approximately)

/opt/conda/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
    514                 raise SSLError(e, request=request)
    515 
--> 516             raise ConnectionError(e, request=request)
    517 
    518         except ClosedPoolError as e:

ConnectionError: HTTPSConnectionPool(host='download.pytorch.org', port=443): Max retries exceeded with url: /models/vgg16-397923af.pth (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x7f392d156550>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution',))
In [12]:
# Freeze training for all "features" layers
for param in model_transfer.features.parameters():
    param.requires_grad = False
    
custom_model = nn.Sequential(nn.Linear(25088, 1024), 
                  nn.ReLU(),
                  nn.Dropout(p=0.5), 
                  nn.Linear(1024, 1103),
                  nn.Sigmoid()
                 )

model_transfer.classifier = custom_model

if use_cuda:
    model_transfer = model_transfer.cuda()

# print(model_transfer)

# specify loss function
#criterion_scratch = nn.CrossEntropyLoss()
#criterion_scratch = nn.BCELoss()
criterion_scratch = nn.BCELoss(reduction="mean").to('cuda:0')

# specify optimizer
#optimizer_scratch = optim.SGD(model_transfer.classifier.parameters(), lr=learning_rate, momentum=0.9)
optimizer_scratch = optim.Adam(model_transfer.classifier.parameters(), lr=learning_rate)

trained_rnn = train_rnn(model_transfer, batch_size, optimizer_scratch, criterion_scratch, num_epochs, show_every_n_batches)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-12-80a09cd02340> in <module>()
      1 # Freeze training for all "features" layers
----> 2 for param in model_transfer.features.parameters():
      3     param.requires_grad = False
      4 
      5 custom_model = nn.Sequential(nn.Linear(25088, 1024), 

NameError: name 'model_transfer' is not defined
In [13]:
model_transfer.load_state_dict(torch.load('trained_rnn_new'))
model_transfer.eval()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-13-b8608261a671> in <module>()
----> 1 model_transfer.load_state_dict(torch.load('trained_rnn_new'))
      2 model_transfer.eval()

NameError: name 'model_transfer' is not defined
In [14]:
preds = []
np.set_printoptions(threshold=100)
submission = pd.read_csv('../input/sample_submission.csv')
for batch_idx, (data, target) in enumerate(test_loader):
    # move to GPU
    #print(np.squeeze(target.detach().cpu().numpy()))
    if use_cuda:
        data, target = data.cuda(), target.cuda()
    ## update the average validation loss
    # forward pass: compute predicted outputs by passing inputs to the model
    output = model_transfer(data)
    #values, indices = output.max(1)    
    pr = np.squeeze(output.detach().cpu().numpy())
    #preds.append(indices.item())
    #x = np.squeeze(target.detach().cpu().numpy())
    x = list(pr.argsort()[-4:][::-1])
    #print(x)
    preds.append( ' '.join(map(str, x)) )
    #for i in pr:
        #preds.append(i)
    #print(batch_idx)
    #print(pr)
    #print(x.argsort()[-6:][::-1])
    #print(pr.argsort()[-6:][::-1])
    #print(values)
    # calculate the batch loss
submission["attribute_ids"] = preds
print(submission.head())
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-14-462148d3491c> in <module>()
      1 preds = []
      2 np.set_printoptions(threshold=100)
----> 3 submission = pd.read_csv('../input/sample_submission.csv')
      4 for batch_idx, (data, target) in enumerate(test_loader):
      5     # move to GPU

/opt/conda/lib/python3.6/site-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, doublequote, delim_whitespace, low_memory, memory_map, float_precision)
    676                     skip_blank_lines=skip_blank_lines)
    677 
--> 678         return _read(filepath_or_buffer, kwds)
    679 
    680     parser_f.__name__ = name

/opt/conda/lib/python3.6/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    438 
    439     # Create the parser.
--> 440     parser = TextFileReader(filepath_or_buffer, **kwds)
    441 
    442     if chunksize or iterator:

/opt/conda/lib/python3.6/site-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
    785             self.options['has_index_names'] = kwds['has_index_names']
    786 
--> 787         self._make_engine(self.engine)
    788 
    789     def close(self):

/opt/conda/lib/python3.6/site-packages/pandas/io/parsers.py in _make_engine(self, engine)
   1012     def _make_engine(self, engine='c'):
   1013         if engine == 'c':
-> 1014             self._engine = CParserWrapper(self.f, **self.options)
   1015         else:
   1016             if engine == 'python':

/opt/conda/lib/python3.6/site-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
   1706         kwds['usecols'] = self.usecols
   1707 
-> 1708         self._reader = parsers.TextReader(src, **kwds)
   1709 
   1710         passed_names = self.names is None

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.__cinit__()

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source()

FileNotFoundError: File b'../input/sample_submission.csv' does not exist
In [15]:
submission.to_csv('submission.csv', index=False)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-15-83c839f2ecee> in <module>()
----> 1 submission.to_csv('submission.csv', index=False)

NameError: name 'submission' is not defined
In [16]:
# import the modules we'll need
from IPython.display import HTML
import base64

# function that takes in a dataframe and creates a text link to  
# download it (will only work for files < 2MB or so)
def create_download_link(df, title = "Download CSV file", filename="submission_1.csv"):  
    csv = df.to_csv(index=False)
    b64 = base64.b64encode(csv.encode())
    payload = b64.decode()
    html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
    html = html.format(payload=payload,title=title,filename=filename)
    return HTML(html)

# create a link to download the dataframe
create_download_link(submission)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-16-202cf2f12df9> in <module>()
     14 
     15 # create a link to download the dataframe
---> 16 create_download_link(submission)

NameError: name 'submission' is not defined
In [17]:
predictoutput = pd.read_csv("../input/mydatasetforaerialcactus/imetsubmission_1.csv")
predictoutput.to_csv('submission.csv', index=False)