From 4437b0e3f0af8930830f4c79fad1e36a437a634a Mon Sep 17 00:00:00 2001
From: wizardforcel <562826179@qq.com>
Date: Sat, 29 Dec 2018 15:58:46 +0800
Subject: [PATCH] 20

---
 20.md | 986 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 986 insertions(+)
 create mode 100644 20.md

diff --git a/20.md b/20.md
new file mode 100644
index 0000000..c9966c0
--- /dev/null
+++ b/20.md
@@ -0,0 +1,986 @@
+# 二十、数据可视化
+
+> 作者：[Chris Albon](https://chrisalbon.com/)
+> 
+> 译者：[飞龙](https://github.com/wizardforcel)
+> 
+> 协议：[CC BY-NC-SA 4.0](http://creativecommons.org/licenses/by-nc-sa/4.0/)
+
+## MatPlotLib 中的双向条形图
+
+```
+%matplotlib inline
+import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
+
+# 创建数据帧
+raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
+        'pre_score': [4, 24, 31, 2, 3],
+        'mid_score': [25, 94, 57, 62, 70],
+        'post_score': [5, 43, 23, 23, 51]}
+df = pd.DataFrame(raw_data, columns = ['first_name', 'pre_score', 'mid_score', 'post_score'])
+df
+```
+
+|  | first_name | pre_score | mid_score | post_score |
+| --- | --- | --- | --- | --- |
+| 0 | Jason | 4 | 25 | 5 |
+| 1 | Molly | 24 | 94 | 43 |
+| 2 | Tina | 31 | 57 | 23 |
+| 3 | Jake | 2 | 62 | 23 |
+| 4 | Amy | 3 | 70 | 51 |
+
+```
+# 输入数据，特别是第二和
+# 第三行，跳过第一列
+x1 = df.ix[1, 1:]
+x2 = df.ix[2, 1:]
+
+# 创建条形标签
+bar_labels = ['Pre Score', 'Mid Score', 'Post Score']
+
+# 创建图形
+fig = plt.figure(figsize=(8,6))
+
+# 设置 y 的位置
+y_pos = np.arange(len(x1))
+y_pos = [x for x in y_pos]
+plt.yticks(y_pos, bar_labels, fontsize=10)
+
+# 在 y_pos 的位置上创建水平条形
+plt.barh(y_pos, 
+         # 使用数据 x1
+         x1, 
+         # 中心对齐
+         align='center', 
+         # 透明度为 0.4
+         alpha=0.4, 
+         # 颜色为绿色
+         color='#263F13')
+
+# 在 y_pos 的位置上创建水平条形
+plt.barh(y_pos, 
+         # 使用数据 -x2
+         -x2,
+         # 中心对齐
+         align='center', 
+         # 透明度为 0.4
+         alpha=0.4, 
+         # 颜色为绿色
+         color='#77A61D')
+
+# 注解和标签
+plt.xlabel('Tina\'s Score: Light Green. Molly\'s Score: Dark Green')
+t = plt.title('Comparison of Molly and Tina\'s Score')
+plt.ylim([-1,len(x1)+0.1])
+plt.xlim([-max(x2)-10, max(x1)+10])
+plt.grid()
+
+plt.show()
+```
+
+![png](https://chrisalbon.com/python/data_visualization/matplotlib_back_to_back_bar_plot_6_0.png)
+
+## MatPlotLib 中的条形图
+
+```
+%matplotlib inline
+import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
+
+# 创建数据帧
+raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
+        'pre_score': [4, 24, 31, 2, 3],
+        'mid_score': [25, 94, 57, 62, 70],
+        'post_score': [5, 43, 23, 23, 51]}
+df = pd.DataFrame(raw_data, columns = ['first_name', 'pre_score', 'mid_score', 'post_score'])
+df
+```
+
+|  | first_name | pre_score | mid_score | post_score |
+| --- | --- | --- | --- | --- |
+| 0 | Jason | 4 | 25 | 5 |
+| 1 | Molly | 24 | 94 | 43 |
+| 2 | Tina | 31 | 57 | 23 |
+| 3 | Jake | 2 | 62 | 23 |
+| 4 | Amy | 3 | 70 | 51 |
+
+```
+# 为每个变量创建得分均值的列表
+mean_values = [df['pre_score'].mean(), df['mid_score'].mean(), df['post_score'].mean()]
+
+# 创建变动列表，设为得分上下 .25
+variance = [df['pre_score'].mean() * 0.25, df['pre_score'].mean() * 0.25, df['pre_score'].mean() * 0.25]
+
+# 设置条形标签
+bar_labels = ['Pre Score', 'Mid Score', 'Post Score']
+
+# 创建条形的 x 位置
+x_pos = list(range(len(bar_labels)))
+
+# 在 x 位置上创建条形图
+plt.bar(x_pos,
+        # 使用 mean_values 中的数据
+        mean_values, 
+        # y-error 直线设置为变动
+        yerr=variance, 
+        # 中心对齐
+        align='center',
+        # 颜色
+        color='#FFC222',
+        # 透明度为 0.5
+        alpha=0.5)
+
+# 添加网格
+plt.grid()
+
+# 设置 y 轴高度
+max_y = max(zip(mean_values, variance)) # returns a tuple, here: (3, 5)
+plt.ylim([0, (max_y[0] + max_y[1]) * 1.1])
+
+# 设置轴标签和标题
+plt.ylabel('Score')
+plt.xticks(x_pos, bar_labels)
+plt.title('Mean Scores For Each Test')
+
+plt.show()
+```
+
+![png](https://chrisalbon.com/python/data_visualization/matplotlib_bar_plot_6_0.png)
+
+## Seaborn 中的调色板
+
+```
+import pandas as pd
+%matplotlib inline
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+# 创建数据帧
+data = {'date': ['2014-05-01 18:47:05.069722', '2014-05-01 18:47:05.119994', '2014-05-02 18:47:05.178768', '2014-05-02 18:47:05.230071', '2014-05-02 18:47:05.230071', '2014-05-02 18:47:05.280592', '2014-05-03 18:47:05.332662', '2014-05-03 18:47:05.385109', '2014-05-04 18:47:05.436523', '2014-05-04 18:47:05.486877'], 
+        'deaths_regiment_1': [34, 43, 14, 15, 15, 14, 31, 25, 62, 41],
+        'deaths_regiment_2': [52, 66, 78, 15, 15, 5, 25, 25, 86, 1],
+        'deaths_regiment_3': [13, 73, 82, 58, 52, 87, 26, 5, 56, 75],
+        'deaths_regiment_4': [44, 75, 26, 15, 15, 14, 54, 25, 24, 72],
+        'deaths_regiment_5': [25, 24, 25, 15, 57, 68, 21, 27, 62, 5],
+        'deaths_regiment_6': [84, 84, 26, 15, 15, 14, 26, 25, 62, 24],
+        'deaths_regiment_7': [46, 57, 26, 15, 15, 14, 26, 25, 62, 41]}
+df = pd.DataFrame(data, columns = ['date', 'battle_deaths', 'deaths_regiment_1', 'deaths_regiment_2',
+                                   'deaths_regiment_3', 'deaths_regiment_4', 'deaths_regiment_5',
+                                   'deaths_regiment_6', 'deaths_regiment_7'])
+df = df.set_index(df.date)
+
+sns.palplot(sns.color_palette("deep", 10))
+```
+
+![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_5_0.png)
+
+```
+sns.palplot(sns.color_palette("muted", 10))
+```
+
+![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_6_0.png)
+
+```
+sns.palplot(sns.color_palette("bright", 10))
+```
+
+![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_7_0.png)
+
+```
+sns.palplot(sns.color_palette("dark", 10))
+```
+
+![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_8_0.png)
+
+```
+sns.palplot(sns.color_palette("colorblind", 10))
+```
+
+![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_9_0.png)
+
+```
+sns.palplot(sns.color_palette("Paired", 10))
+```
+
+![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_10_0.png)
+
+```
+sns.palplot(sns.color_palette("BuGn", 10))
+```
+
+![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_11_0.png)
+
+```
+sns.palplot(sns.color_palette("GnBu", 10))
+```
+
+![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_12_0.png)
+
+```
+sns.palplot(sns.color_palette("OrRd", 10))
+```
+
+![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_13_0.png)
+
+```
+sns.palplot(sns.color_palette("PuBu", 10))
+```
+
+![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_14_0.png)
+
+```
+sns.palplot(sns.color_palette("YlGn", 10))
+```
+
+![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_15_0.png)
+
+```
+sns.palplot(sns.color_palette("YlGnBu", 10))
+```
+
+![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_16_0.png)
+
+```
+sns.palplot(sns.color_palette("YlOrBr", 10))
+```
+
+![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_17_0.png)
+
+```
+sns.palplot(sns.color_palette("YlOrRd", 10))
+```
+
+![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_18_0.png)
+
+```
+sns.palplot(sns.color_palette("BrBG", 10))
+```
+
+![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_19_0.png)
+
+```
+sns.palplot(sns.color_palette("PiYG", 10))
+```
+
+![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_20_0.png)
+
+```
+sns.palplot(sns.color_palette("PRGn", 10))
+```
+
+![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_21_0.png)
+
+```
+sns.palplot(sns.color_palette("PuOr", 10))
+```
+
+![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_22_0.png)
+
+```
+sns.palplot(sns.color_palette("RdBu", 10))
+```
+
+![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_23_0.png)
+
+```
+sns.palplot(sns.color_palette("RdGy", 10))
+```
+
+![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_24_0.png)
+
+```
+sns.palplot(sns.color_palette("RdYlBu", 10))
+```
+
+![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_25_0.png)
+
+```
+sns.palplot(sns.color_palette("RdYlGn", 10))
+```
+
+![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_26_0.png)
+
+```
+sns.palplot(sns.color_palette("Spectral", 10))
+```
+
+![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_27_0.png)
+
+```
+# 创建调色板并将其设为当前调色板
+flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]
+sns.set_palette(flatui)
+sns.palplot(sns.color_palette())
+```
+
+![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_29_0.png)
+
+```
+# 设置绘图颜色
+sns.tsplot([df.deaths_regiment_1, df.deaths_regiment_2, df.deaths_regiment_3, df.deaths_regiment_4,
+            df.deaths_regiment_5, df.deaths_regiment_6, df.deaths_regiment_7], color="#34495e")
+
+# <matplotlib.axes._subplots.AxesSubplot at 0x116f5db70> 
+```
+
+![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_31_1.png)
+
+## 使用 Seaborn 和 pandas 创建时间序列绘图
+
+```
+import pandas as pd
+%matplotlib inline
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+data = {'date': ['2014-05-01 18:47:05.069722', '2014-05-01 18:47:05.119994', '2014-05-02 18:47:05.178768', '2014-05-02 18:47:05.230071', '2014-05-02 18:47:05.230071', '2014-05-02 18:47:05.280592', '2014-05-03 18:47:05.332662', '2014-05-03 18:47:05.385109', '2014-05-04 18:47:05.436523', '2014-05-04 18:47:05.486877'], 
+        'deaths_regiment_1': [34, 43, 14, 15, 15, 14, 31, 25, 62, 41],
+        'deaths_regiment_2': [52, 66, 78, 15, 15, 5, 25, 25, 86, 1],
+        'deaths_regiment_3': [13, 73, 82, 58, 52, 87, 26, 5, 56, 75],
+        'deaths_regiment_4': [44, 75, 26, 15, 15, 14, 54, 25, 24, 72],
+        'deaths_regiment_5': [25, 24, 25, 15, 57, 68, 21, 27, 62, 5],
+        'deaths_regiment_6': [84, 84, 26, 15, 15, 14, 26, 25, 62, 24],
+        'deaths_regiment_7': [46, 57, 26, 15, 15, 14, 26, 25, 62, 41]}
+df = pd.DataFrame(data, columns = ['date', 'battle_deaths', 'deaths_regiment_1', 'deaths_regiment_2',
+                                   'deaths_regiment_3', 'deaths_regiment_4', 'deaths_regiment_5',
+                                   'deaths_regiment_6', 'deaths_regiment_7'])
+df = df.set_index(df.date)
+
+sns.tsplot([df.deaths_regiment_1, df.deaths_regiment_2, df.deaths_regiment_3, df.deaths_regiment_4,
+            df.deaths_regiment_5, df.deaths_regiment_6, df.deaths_regiment_7], color="indianred")
+
+# <matplotlib.axes._subplots.AxesSubplot at 0x1140be780> 
+```
+
+![png](https://chrisalbon.com/python/data_visualization/seaborn_pandas_timeseries_plot_5_1.png)
+
+```
+# 带有置信区间直线，但是没有直线的时间序列绘图
+sns.tsplot([df.deaths_regiment_1, df.deaths_regiment_2, df.deaths_regiment_3, df.deaths_regiment_4,
+            df.deaths_regiment_5, df.deaths_regiment_6, df.deaths_regiment_7], err_style="ci_bars", interpolate=False)
+
+# <matplotlib.axes._subplots.AxesSubplot at 0x116400668> 
+```
+
+![png](https://chrisalbon.com/python/data_visualization/seaborn_pandas_timeseries_plot_7_1.png)
+
+## 使用 Seaborn 创建散点图
+
+```
+import pandas as pd
+%matplotlib inline
+import random
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+# 创建空数据帧
+df = pd.DataFrame()
+
+# 添加列
+df['x'] = random.sample(range(1, 1000), 5)
+df['y'] = random.sample(range(1, 1000), 5)
+df['z'] = [1,0,0,1,0]
+df['k'] = ['male','male','male','female','female']
+
+# 查看前几行数据
+df.head()
+```
+
+|  | x | y | z | k |
+| --- | --- | --- | --- | --- |
+| 0 | 466 | 948 | 1 | male |
+| 1 | 832 | 481 | 0 | male |
+| 2 | 978 | 465 | 0 | male |
+| 3 | 510 | 206 | 1 | female |
+| 4 | 848 | 357 | 0 | female |
+
+```
+# 设置散点图样式
+sns.set_context("notebook", font_scale=1.1)
+sns.set_style("ticks")
+
+# 创建数据帧的散点图
+sns.lmplot('x', # 横轴
+           'y', # 纵轴
+           data=df, # 数据源
+           fit_reg=False, # 不要拟合回归直线
+           hue="z", # 设置颜色
+           scatter_kws={"marker": "D", # 设置标记样式
+                        "s": 100}) # 设置标记大小
+
+# 设置标题
+plt.title('Histogram of IQ')
+
+# 设置横轴标签
+plt.xlabel('Time')
+
+# 设置纵轴标签
+plt.ylabel('Deaths')
+
+# <matplotlib.text.Text at 0x112b7bb70> 
+```
+
+![png](https://chrisalbon.com/python/data_visualization/seaborn_scatterplot_7_1.png)
+
+## MatPlotLib 中的分组条形图
+
+```
+%matplotlib inline
+import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
+
+raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
+        'pre_score': [4, 24, 31, 2, 3],
+        'mid_score': [25, 94, 57, 62, 70],
+        'post_score': [5, 43, 23, 23, 51]}
+df = pd.DataFrame(raw_data, columns = ['first_name', 'pre_score', 'mid_score', 'post_score'])
+df
+```
+
+|  | first_name | pre_score | mid_score | post_score |
+| --- | --- | --- | --- | --- |
+| 0 | Jason | 4 | 25 | 5 |
+| 1 | Molly | 24 | 94 | 43 |
+| 2 | Tina | 31 | 57 | 23 |
+| 3 | Jake | 2 | 62 | 23 |
+| 4 | Amy | 3 | 70 | 51 |
+
+```
+# 设置条形的位置和宽度
+pos = list(range(len(df['pre_score']))) 
+width = 0.25 
+
+# 绘制条形
+fig, ax = plt.subplots(figsize=(10,5))
+
+# 使用 pre_score 数据，
+# 在位置 pos 上创建条形
+plt.bar(pos, 
+        # 使用数据 df['pre_score']
+        df['pre_score'], 
+        # 宽度
+        width, 
+        # 透明度为 0.5
+        alpha=0.5, 
+        # 颜色
+        color='#EE3224', 
+        # 标签是 first_name 的第一个值
+        label=df['first_name'][0]) 
+
+# 使用 mid_score 数据，
+# 在位置 pos + 一定宽度上创建条形
+plt.bar([p + width for p in pos], 
+        # 使用数据 df['mid_score']
+        df['mid_score'],
+        # 宽度
+        width, 
+        # 透明度为 0.5
+        alpha=0.5, 
+        # 颜色
+        color='#F78F1E', 
+        # 标签是 first_name 的第二个值
+        label=df['first_name'][1]) 
+
+# 使用 post_score 数据，
+# 在位置 pos + 一定宽度上创建条形
+plt.bar([p + width*2 for p in pos], 
+        # 使用数据 df['post_score']
+        df['post_score'], 
+        # 宽度
+        width, 
+        # 透明度为 0.5
+        alpha=0.5, 
+        # 颜色
+        color='#FFC222', 
+        # 标签是 first_name 的第三个值
+        label=df['first_name'][2]) 
+
+# 设置纵轴标签
+ax.set_ylabel('Score')
+
+# 设置标题
+ax.set_title('Test Subject Scores')
+
+# 设置 x 刻度的位置
+ax.set_xticks([p + 1.5 * width for p in pos])
+
+# 设置 x 刻度的标签
+ax.set_xticklabels(df['first_name'])
+
+# 设置横轴和纵轴的区域
+plt.xlim(min(pos)-width, max(pos)+width*4)
+plt.ylim([0, max(df['pre_score'] + df['mid_score'] + df['post_score'])] )
+
+# 添加图例并展示绘图
+plt.legend(['Pre Score', 'Mid Score', 'Post Score'], loc='upper left')
+plt.grid()
+plt.show()
+```
+
+![png](https://chrisalbon.com/python/data_visualization/matplotlib_grouped_bar_plot_6_0.png)
+
+## MatPlotLib 中的直方图
+
+```
+%matplotlib inline
+import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
+import math
+
+# 设置 ipython 的最大行数
+pd.set_option('display.max_row', 1000)
+
+# 将 ipython 的最大列宽设为 50
+pd.set_option('display.max_columns', 50)
+
+df = pd.read_csv('https://www.dropbox.com/s/52cb7kcflr8qm2u/5kings_battles_v1.csv?dl=1')
+df.head()
+```
+
+|  | name | year | battle_number | attacker_king | defender_king | attacker_1 | attacker_2 | attacker_3 | attacker_4 | defender_1 | defender_2 | defender_3 | defender_4 | attacker_outcome | battle_type | major_death | major_capture | attacker_size | defender_size | attacker_commander | defender_commander | summer | location | region | note |
+| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
+| 0 | Battle of the Golden Tooth | 298 | 1 | Joffrey/Tommen Baratheon | Robb Stark | Lannister | NaN | NaN | NaN | Tully | NaN | NaN | NaN | win | pitched battle | 1 | 0 | 15000 | 4000 | Jaime Lannister | Clement Piper, Vance | 1 | Golden Tooth | The Westerlands | NaN |
+| 1 | Battle at the Mummer's Ford | 298 | 2 | Joffrey/Tommen Baratheon | Robb Stark | Lannister | NaN | NaN | NaN | Baratheon | NaN | NaN | NaN | win | ambush | 1 | 0 | NaN | 120 | Gregor Clegane | Beric Dondarrion | 1 | Mummer's Ford | The Riverlands | NaN |
+| 2 | Battle of Riverrun | 298 | 3 | Joffrey/Tommen Baratheon | Robb Stark | Lannister | NaN | NaN | NaN | Tully | NaN | NaN | NaN | win | pitched battle | 0 | 1 | 15000 | 10000 | Jaime Lannister, Andros Brax | Edmure Tully, Tytos Blackwood | 1 | Riverrun | The Riverlands | NaN |
+| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
+| 3 | Battle of the Green Fork | 298 | 4 | Robb Stark | Joffrey/Tommen Baratheon | Stark | NaN | NaN | NaN | Lannister | NaN | NaN | NaN | loss | pitched battle | 1 | 1 | 18000 | 20000 | Roose Bolton, Wylis Manderly, Medger Cerwyn, H... | Tywin Lannister, Gregor Clegane, Kevan Lannist... | 1 | Green Fork | The Riverlands | NaN |
+| 4 | Battle of the Whispering Wood | 298 | 5 | Robb Stark | Joffrey/Tommen Baratheon | Stark | Tully | NaN | NaN | Lannister | NaN | NaN | NaN | win | ambush | 1 | 1 | 1875 | 6000 | Robb Stark, Brynden Tully | Jaime Lannister | 1 | Whispering Wood | The Riverlands | NaN |
+
+```
+# 制作攻击方和防守方大小的两个变量
+# 但是当有超过 10000 个攻击方时将其排除在外
+data1 = df['attacker_size'][df['attacker_size'] < 90000]
+data2 = df['defender_size'][df['attacker_size'] < 90000]
+
+# 创建 2000 个桶
+bins = np.arange(data1.min(), data2.max(), 2000) # 固定桶的大小
+
+# 绘制攻击方大小的直方图
+plt.hist(data1, 
+         bins=bins, 
+         alpha=0.5, 
+         color='#EDD834',
+         label='Attacker')
+
+# 绘制防守方大小的直方图
+plt.hist(data2, 
+         bins=bins, 
+         alpha=0.5, 
+         color='#887E43',
+         label='Defender')
+
+# 设置图形的 x 和 y 边界
+plt.ylim([0, 10])
+
+# 设置标题和标签
+plt.title('Histogram of Attacker and Defender Size')
+plt.xlabel('Number of troops')
+plt.ylabel('Number of battles')
+plt.legend(loc='upper right')
+
+plt.show()
+```
+
+![png](https://chrisalbon.com/python/data_visualization/matplotlib_histogram_6_0.png)
+
+```
+# 制作攻击方和防守方大小的两个变量
+# 但是当有超过 10000 个攻击方时将其排除在外
+data1 = df['attacker_size'][df['attacker_size'] < 90000]
+data2 = df['defender_size'][df['attacker_size'] < 90000]
+
+# 创建 10 个桶，最小值为 
+# data1 和 data2 的最小值
+bins = np.linspace(min(data1 + data2), 
+                   # 最大值为它们的最大值
+                   max(data1 + data2),
+                   # 并分为 10 个桶
+                   10)
+
+# 绘制攻击方大小的直方图
+plt.hist(data1, 
+         # 使用定义好的桶
+         bins=bins, 
+         # 透明度
+         alpha=0.5, 
+         # 颜色
+         color='#EDD834',
+         # 攻击方的标签
+         label='Attacker')
+
+# 绘制防守方大小的直方图
+plt.hist(data2, 
+         # 使用定义好的桶
+         bins=bins, 
+         # 透明度
+         alpha=0.5, 
+         # 颜色
+         color='#887E43',
+         # 防守方的标签
+         label='Defender')
+
+# 设置图形的 x 和 y 边界
+plt.ylim([0, 10])
+
+# 设置标题和标签
+plt.title('Histogram of Attacker and Defender Size')
+plt.xlabel('Number of troops')
+plt.ylabel('Number of battles')
+plt.legend(loc='upper right')
+
+plt.show()
+```
+
+![png](https://chrisalbon.com/python/data_visualization/matplotlib_histogram_8_0.png)
+
+## 从 Pandas 数据帧生成 MatPlotLib 散点图
+
+```
+%matplotlib inline
+import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
+
+raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'], 
+        'last_name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze'], 
+        'female': [0, 1, 1, 0, 1],
+        'age': [42, 52, 36, 24, 73], 
+        'preTestScore': [4, 24, 31, 2, 3],
+        'postTestScore': [25, 94, 57, 62, 70]}
+df = pd.DataFrame(raw_data, columns = ['first_name', 'last_name', 'age', 'female', 'preTestScore', 'postTestScore'])
+df
+```
+
+|  | first_name | last_name | age | female | preTestScore | postTestScore |
+| --- | --- | --- | --- | --- | --- | --- |
+| 0 | Jason | Miller | 42 | 0 | 4 | 25 |
+| 1 | Molly | Jacobson | 52 | 1 | 24 | 94 |
+| 2 | Tina | Ali | 36 | 1 | 31 | 57 |
+| 3 | Jake | Milner | 24 | 0 | 2 | 62 |
+| 4 | Amy | Cooze | 73 | 1 | 3 | 70 |
+
+```
+# preTestScore 和 postTestScore 的散点图
+# 每个点的大小取决于年龄
+plt.scatter(df.preTestScore, df.postTestScore
+, s=df.age)
+
+# <matplotlib.collections.PathCollection at 0x10ca42b00> 
+```
+
+![png](https://chrisalbon.com/python/data_visualization/matplotlib_scatterplot_from_pandas_6_1.png)
+
+```
+# preTestScore 和 postTestScore 的散点图
+# 大小为 300，颜色取决于性别
+plt.scatter(df.preTestScore, df.postTestScore, s=300, c=df.female)
+
+# <matplotlib.collections.PathCollection at 0x10cb90a90> 
+```
+
+![png](https://chrisalbon.com/python/data_visualization/matplotlib_scatterplot_from_pandas_8_1.png)
+
+## Matplotlib 的简单示例
+
+```
+# 让 Jupyter 加载 matplotlib 
+# 并内联创建所有绘图（也就是在页面上）
+%matplotlib inline
+
+import matplotlib.pyplot as pyplot
+
+pyplot.plot([1.6, 2.7])
+
+# [<matplotlib.lines.Line2D at 0x10c4e7978>] 
+```
+
+![png](https://chrisalbon.com/python/data_visualization/matplotlib_simple_example_6_1.png)
+
+## MatPlotLib 中的饼图
+
+```
+%matplotlib inline
+import pandas as pd
+import matplotlib.pyplot as plt
+
+raw_data = {'officer_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
+        'jan_arrests': [4, 24, 31, 2, 3],
+        'feb_arrests': [25, 94, 57, 62, 70],
+        'march_arrests': [5, 43, 23, 23, 51]}
+df = pd.DataFrame(raw_data, columns = ['officer_name', 'jan_arrests', 'feb_arrests', 'march_arrests'])
+df
+```
+
+|  | officer_name | jan_arrests | feb_arrests | march_arrests |
+| --- | --- | --- | --- | --- |
+| 0 | Jason | 4 | 25 | 5 |
+| 1 | Molly | 24 | 94 | 43 |
+| 2 | Tina | 31 | 57 | 23 |
+| 3 | Jake | 2 | 62 | 23 |
+| 4 | Amy | 3 | 70 | 51 |
+
+```
+# 创建一列，其中包含每个官员的总逮捕数
+df['total_arrests'] = df['jan_arrests'] + df['feb_arrests'] + df['march_arrests']
+df
+```
+
+|  | officer_name | jan_arrests | feb_arrests | march_arrests | total_arrests |
+| --- | --- | --- | --- | --- | --- |
+| 0 | Jason | 4 | 25 | 5 | 34 |
+| 1 | Molly | 24 | 94 | 43 | 161 |
+| 2 | Tina | 31 | 57 | 23 | 111 |
+| 3 | Jake | 2 | 62 | 23 | 87 |
+| 4 | Amy | 3 | 70 | 51 | 124 |
+
+```
+# （从 iWantHue）创建一列颜色
+colors = ["#E13F29", "#D69A80", "#D63B59", "#AE5552", "#CB5C3B", "#EB8076", "#96624E"]
+
+# 创建饼图
+plt.pie(
+    # 使用数据 total_arrests
+    df['total_arrests'],
+    # 标签为官员名称
+    labels=df['officer_name'],
+    # 没有阴影
+    shadow=False,
+    # 颜色
+    colors=colors,
+    # 将一块扇形移出去
+    explode=(0, 0, 0, 0, 0.15),
+    # 起始角度为 90 度
+    startangle=90,
+    # 将百分比列为分数
+    autopct='%1.1f%%',
+    )
+
+# 使饼状图为正圆
+plt.axis('equal')
+
+# 查看绘图
+plt.tight_layout()
+plt.show()
+```
+
+![png](https://chrisalbon.com/python/data_visualization/matplotlib_pie_chart_7_0.png)
+
+## MatPlotLib 中的散点图
+
+```
+%matplotlib inline
+import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
+
+# 展示 ipython 的最大行数
+pd.set_option('display.max_row', 1000)
+
+# 将 ipython 的最大列宽设为 50
+pd.set_option('display.max_columns', 50)
+
+df = pd.read_csv('https://raw.githubusercontent.com/chrisalbon/war_of_the_five_kings_dataset/master/5kings_battles_v1.csv')
+df.head()
+```
+
+|  | name | year | battle_number | attacker_king | defender_king | attacker_1 | attacker_2 | attacker_3 | attacker_4 | defender_1 | defender_2 | defender_3 | defender_4 | attacker_outcome | battle_type | major_death | major_capture | attacker_size | defender_size | attacker_commander | defender_commander | summer | location | region | note |
+| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
+| 0 | Battle of the Golden Tooth | 298 | 1 | Joffrey/Tommen Baratheon | Robb Stark | Lannister | NaN | NaN | NaN | Tully | NaN | NaN | NaN | win | pitched battle | 1.0 | 0.0 | 15000.0 | 4000.0 | Jaime Lannister | Clement Piper, Vance | 1.0 | Golden Tooth | The Westerlands | NaN |
+| 1 | Battle at the Mummer's Ford | 298 | 2 | Joffrey/Tommen Baratheon | Robb Stark | Lannister | NaN | NaN | NaN | Baratheon | NaN | NaN | NaN | win | ambush | 1.0 | 0.0 | NaN | 120.0 | Gregor Clegane | Beric Dondarrion | 1.0 | Mummer's Ford | The Riverlands | NaN |
+| 2 | Battle of Riverrun | 298 | 3 | Joffrey/Tommen Baratheon | Robb Stark | Lannister | NaN | NaN | NaN | Tully | NaN | NaN | NaN | win | pitched battle | 0.0 | 1.0 | 15000.0 | 10000.0 | Jaime Lannister, Andros Brax | Edmure Tully, Tytos Blackwood | 1.0 | Riverrun | The Riverlands | NaN |
+| 3 | Battle of the Green Fork | 298 | 4 | Robb Stark | Joffrey/Tommen Baratheon | Stark | NaN | NaN | NaN | Lannister | NaN | NaN | NaN | loss | pitched battle | 1.0 | 1.0 | 18000.0 | 20000.0 | Roose Bolton, Wylis Manderly, Medger Cerwyn, H... | Tywin Lannister, Gregor Clegane, Kevan Lannist... | 1.0 | Green Fork | The Riverlands | NaN |
+| 4 | Battle of the Whispering Wood | 298 | 5 | Robb Stark | Joffrey/Tommen Baratheon | Stark | Tully | NaN | NaN | Lannister | NaN | NaN | NaN | win | ambush | 1.0 | 1.0 | 1875.0 | 6000.0 | Robb Stark, Brynden Tully | Jaime Lannister | 1.0 | Whispering Wood | The Riverlands | NaN |
+
+```
+# 创建图形
+plt.figure(figsize=(10,8))
+
+# 创建散点图
+            # 298 年的攻击方大小为 x 轴
+plt.scatter(df['attacker_size'][df['year'] == 298], 
+            # 298 年的防守方大小为 y 轴
+            df['defender_size'][df['year'] == 298], 
+            # 标记
+            marker='x', 
+            # 颜色
+            color='b',
+            # 透明度
+            alpha=0.7,
+            # 大小
+            s = 124,
+            # 标签
+            label='Year 298')
+
+            # 299 年的攻击方大小为 x 轴
+plt.scatter(df['attacker_size'][df['year'] == 299], 
+            # 299 年的防守方大小为 y 轴
+            df['defender_size'][df['year'] == 299], 
+            # 标记
+            marker='o', 
+            # 颜色
+            color='r', 
+            # 透明度
+            alpha=0.7,
+            # 大小
+            s = 124,
+            # 标签
+            label='Year 299')
+
+            # 300 年的攻击方大小为 x 轴
+plt.scatter(df['attacker_size'][df['year'] == 300], 
+            # 300 年的防守方大小为 x 轴
+            df['defender_size'][df['year'] == 300], 
+            # 标记
+            marker='^', 
+            # 颜色
+            color='g', 
+            # 透明度
+            alpha=0.7, 
+            # 大小
+            s = 124,
+            # 标签
+            label='Year 300')
+
+# 标题
+plt.title('Battles Of The War Of The Five Kings')
+
+# y 标签
+plt.ylabel('Defender Size')
+
+# x 标签
+plt.xlabel('Attacker Size')
+
+# 图例
+plt.legend(loc='upper right')
+
+# 设置图形边界
+plt.xlim([min(df['attacker_size'])-1000, max(df['attacker_size'])+1000])
+plt.ylim([min(df['defender_size'])-1000, max(df['defender_size'])+1000])
+
+plt.show()
+```
+
+![png](https://chrisalbon.com/python/data_visualization/matplotlib_simple_scatterplot_6_0.png)
+
+## MatPlotLib 中的栈式百分比条形图
+
+```
+%matplotlib inline
+import pandas as pd
+import matplotlib.pyplot as plt
+
+raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
+        'pre_score': [4, 24, 31, 2, 3],
+        'mid_score': [25, 94, 57, 62, 70],
+        'post_score': [5, 43, 23, 23, 51]}
+df = pd.DataFrame(raw_data, columns = ['first_name', 'pre_score', 'mid_score', 'post_score'])
+df
+```
+
+|  | first_name | pre_score | mid_score | post_score |
+| --- | --- | --- | --- | --- |
+| 0 | Jason | 4 | 25 | 5 |
+| 1 | Molly | 24 | 94 | 43 |
+| 2 | Tina | 31 | 57 | 23 |
+| 3 | Jake | 2 | 62 | 23 |
+| 4 | Amy | 3 | 70 | 51 |
+
+```
+# 创建带有一个子图的图形
+f, ax = plt.subplots(1, figsize=(10,5))
+
+# 将条宽设为 1
+bar_width = 1
+
+# 条形左边界的位置
+bar_l = [i for i in range(len(df['pre_score']))] 
+
+# x 轴刻度的位置（条形的中心是条形标签）
+tick_pos = [i+(bar_width/2) for i in bar_l] 
+
+# 创建每个参与者的总得分
+totals = [i+j+k for i,j,k in zip(df['pre_score'], df['mid_score'], df['post_score'])]
+
+# 创建每个参与者的 pre_score 和总得分的百分比
+pre_rel = [i / j * 100 for  i,j in zip(df['pre_score'], totals)]
+
+# 创建每个参与者的 mid_score 和总得分的百分比
+mid_rel = [i / j * 100 for  i,j in zip(df['mid_score'], totals)]
+
+# 创建每个参与者的 post_score 和总得分的百分比
+post_rel = [i / j * 100 for  i,j in zip(df['post_score'], totals)]
+
+# 在位置 bar_1 创建条形图
+ax.bar(bar_l, 
+       # 使用数据 pre_rel
+       pre_rel, 
+       # 标签 
+       label='Pre Score', 
+       # 透明度
+       alpha=0.9, 
+       # 颜色
+       color='#019600',
+       # 条形宽度
+       width=bar_width,
+       # 边框颜色
+       edgecolor='white'
+       )
+
+# 在位置 bar_1 创建条形图
+ax.bar(bar_l, 
+       # 使用数据 mid_rel
+       mid_rel, 
+       # 底部为 pre_rel
+       bottom=pre_rel, 
+       # 标签
+       label='Mid Score', 
+       # 透明度
+       alpha=0.9, 
+       # 颜色
+       color='#3C5F5A', 
+       # 条形宽度
+       width=bar_width,
+       # 边框颜色
+       edgecolor='white'
+       )
+
+# Create a bar chart in position bar_1
+ax.bar(bar_l, 
+       # 使用数据 post_rel
+       post_rel, 
+       # 底部为 pre_rel 和 mid_rel
+       bottom=[i+j for i,j in zip(pre_rel, mid_rel)], 
+       # 标签
+       label='Post Score',
+       # 透明度
+       alpha=0.9, 
+       # 颜色
+       color='#219AD8', 
+       # 条形宽度
+       width=bar_width,
+       # 边框颜色
+       edgecolor='white'
+       )
+
+# 将刻度设为 first_name
+plt.xticks(tick_pos, df['first_name'])
+ax.set_ylabel("Percentage")
+ax.set_xlabel("")
+
+# 设置图形边界
+plt.xlim([min(tick_pos)-bar_width, max(tick_pos)+bar_width])
+plt.ylim(-10, 110)
+
+# 旋转轴标签
+plt.setp(plt.gca().get_xticklabels(), rotation=45, horizontalalignment='right')
+
+# 展示绘图
+plt.show()
+```
+
+![png](https://chrisalbon.com/python/data_visualization/matplotlib_percentage_stacked_bar_plot_6_0.png)
-- 
GitLab