From: https://www.kaggle.com/khursani8/fast-ai-ootb-cutout-efficientnet
Author: khursani
Score: 0.38584
Try
from fastai.vision import *
from fastai.metrics import *
PATH = Path('../input')
ann_file = '../input/train2019.json'
with open(ann_file) as data_file:
train_anns = json.load(data_file)
train_anns_df = pd.DataFrame(train_anns['annotations'])[['image_id','category_id']]
train_img_df = pd.DataFrame(train_anns['images'])[['id', 'file_name']].rename(columns={'id':'image_id'})
df_train_file_cat = pd.merge(train_img_df, train_anns_df, on='image_id')
df_train_file_cat['category_id']=df_train_file_cat['category_id'].astype(str)
df_train_file_cat = df_train_file_cat.drop(['image_id'],axis=1)
df_train_file_cat.head()
%%time
# Try Oversampling
res = None
sample_to = df_train_file_cat.category_id.value_counts().max() # which is 500
for grp in df_train_file_cat.groupby('category_id'):
n = grp[1].shape[0]
additional_rows = grp[1].sample(0 if sample_to < n else sample_to - n, replace=True)
rows = pd.concat((grp[1], additional_rows))
if res is None: res = rows
else: res = pd.concat((res, rows))
res.category_id.value_counts()[:10]
test_ann_file = '../input/test2019.json'
with open(test_ann_file) as data_file:
test_anns = json.load(data_file)
test_img_df = pd.DataFrame(test_anns['images'])[['file_name','id']].rename(columns={'id':'image_id'})
test_img_df.head()
src = (
ImageList.from_df(df=res,path=PATH/"train_val2019")
.use_partial_data(0.3)
.split_by_rand_pct(0.1)
.label_from_df()
.add_test(ImageList.from_df(df=test_img_df,path=PATH/"test2019"))
)
data = (
src
.transform(get_transforms(),size=128)
.databunch(bs=64*2)
.normalize(imagenet_stats)
)
!pip install efficientnet_pytorch
from efficientnet_pytorch import EfficientNet
model_name = 'efficientnet-b3'
def getModel(pret):
model = EfficientNet.from_pretrained(model_name)
# model._bn1 = nn.Identity()
model._fc = nn.Linear(1536,data.c)
return model
# learn = cnn_learner(data,models.densenet201,metrics=[error_rate],model_dir='/kaggle/working',pretrained=True,loss_func=LabelSmoothingCrossEntropy()).mixup()
learn = Learner(data,getModel(False),metrics=[error_rate],model_dir='/kaggle/working',loss_func=LabelSmoothingCrossEntropy()).mixup().to_fp16()
learn.lr_find()
learn.recorder.plot()
learn.fit_one_cycle(3,1e-3)
SZ=224
cutout_frac = 0.25
p_cutout = 0.75
cutout_sz = round(SZ*cutout_frac)
cutout_tfm = cutout(n_holes=(1,1), length=(cutout_sz, cutout_sz), p=p_cutout)
learn.data = (
src
.transform(get_transforms(xtra_tfms=[cutout_tfm]),size=SZ)
.databunch(bs=64)
.normalize(imagenet_stats)
)
learn.fit_one_cycle(7,1e-3)
learn.save('cutout-efficient')
# learn.unfreeze()
# learn.fit_one_cycle(8,slice(1e-6,1e-4))
preds,y = learn.get_preds(DatasetType.Test)
results = torch.topk(preds,5)
out = []
for i in results[1].numpy():
temp = ""
for j in i:
temp += (" "+str(data.classes[j]))
out.append(temp)
# print(out)
sam_sub_df = pd.read_csv('../input/kaggle_sample_submission.csv')
# sam_sub_df.head()
sam_sub_df["predicted"] = out
sam_sub_df.head()
sam_sub_df.to_csv("submission.csv",index=False)