From: https://www.kaggle.com/deeplearningzy/angtk-0190409-test1
Author: Zhangyao
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory
import os
print(os.listdir("../input"))
# Any results you write to the current directory are saved as output.
train_img = pd.read_csv('../input/train.csv')
labels_img = pd.read_csv('../input/labels.csv')
submission = pd.read_csv('../input/sample_submission.csv')
print("Train data shape - rows:",train_img.shape[0],"columns:",train_img.shape[1])
print("labels data shape - rows:",labels_img.shape[0],"columns:",labels_img.shape[1])
print("Test data shape - roes:",submission.shape[0],"columns:",submission.shape[1])
th5 = pd.DataFrame(train_img.attribute_ids.value_counts().head(5))
th5.reset_index(level=0, inplace=True)
th5.columns = ['landmark_id','count']
th5
tb5 = pd.DataFrame(train_img.attribute_ids.value_counts().tail(5))
tb5.reset_index(level=0, inplace=True)
tb5.columns = ['landmark_id','count']
tb5
# Plot the least frequent landmark occurences
plt.figure(figsize = (6,10))
plt.title('most frequent landmarks')
sns.set_color_codes("pastel")
sns.barplot(x="landmark_id", y="count", data=th5,
label="Count", color="blue")
plt.show()
# Plot the least frequent landmark occurences
plt.figure(figsize = (6,10))
plt.title('Least frequent landmarks')
sns.set_color_codes("pastel")
sns.barplot(x="landmark_id", y="count", data=tb5,
label="Count", color="orange")
plt.show()