參考:https://www.machinelearningplus.com/plots/top-50-matplotlib-visualizations-the-master-plots-python/python
Scatteplot 是用於研究兩個變量之間關係的經典和基本圖。若是數據中有多個組,則可能須要以不一樣顏色可視化每一個組。在Matplotlib,你能夠方便地使用。git
# Import dataset %matplotlib import pandas as pd import numpy as np import matplotlib as mpl from matplotlib import patches from matplotlib import font_manager as fm from matplotlib import pyplot as plt from scipy.spatial import ConvexHull import seaborn as sns import warnings; warnings.simplefilter('ignore') midwest = pd.read_csv("https://raw.githubusercontent.com/selva86/datasets/master/midwest_filter.csv") zhongwen_font = fm.FontProperties(fname='C:\Windows\Fonts\華文楷體.ttf') # Step 1: 準備數據 # 建立儘量多的顏色,由於有獨特的midwest['category'] categories = np.unique(midwest['category']) colors = [plt.cm.tab10(i/float(len(categories)-1)) for i in range(len(categories))] # Step 2:爲每一個類別繪製圖形 plt.figure(figsize=(16, 10), dpi= 80, facecolor='w', edgecolor='k') for i, category in enumerate(categories): plt.scatter('area', 'poptotal', data=midwest.loc[midwest.category==category, :], s=20, c=colors[i], label=str(category)) # Step 3:展現優化:設置圖例等 plt.gca().set(xlim=(0.0, 0.1), ylim=(0, 90000), xlabel='地區', ylabel='人口') plt.xticks(fontsize=12, fontproperties = zhongwen_font) plt.yticks(fontsize=12, fontproperties = zhongwen_font) plt.title("中西部地區人口分佈圖", fontsize=22, fontproperties = zhongwen_font) plt.legend(fontsize=12, prop = zhongwen_font) plt.show()
有時,您但願在邊界內顯示一組點以強調其重要性。在此示例中,您將從應該被環繞的數據幀中獲取記錄,並將其傳遞給下面的代碼中描述的記錄。encircle()github
%matplotlib import pandas as pd import numpy as np import matplotlib as mpl from matplotlib import patches from matplotlib import font_manager as fm from matplotlib import pyplot as plt from scipy.spatial import ConvexHull from matplotlib import patches import seaborn as sns import warnings; warnings.simplefilter('ignore') sns.set_style("white") # S1: 準備數據 midwest = pd.read_csv("https://raw.githubusercontent.com/selva86/datasets/master/midwest_filter.csv") zhongwen_font = fm.FontProperties(fname='C:\Windows\Fonts\simsun.ttc') # 建立儘量多的顏色,由於有獨特的midwest['category']y'] categories = np.unique(midwest['category']) colors = [plt.cm.tab10(i/float(len(categories)-1)) for i in range(len(categories))] # S2: 爲每一個類別繪製圖形 fig = plt.figure(figsize=(16, 10), dpi= 80, facecolor='w', edgecolor='k') for i, category in enumerate(categories): plt.scatter('area', 'poptotal', data=midwest.loc[midwest.category==category, :], s='dot_size', c=colors[i], label=str(category), edgecolors='black', linewidths=.5) # S3: 邊界 # https://stackoverflow.com/questions/44575681/how-do-i-encircle-different-data-sets-in-scatter-plot def encircle(x,y, ax=None, **kw): if not ax: ax=plt.gca() p = np.c_[x,y] hull = ConvexHull(p) poly = plt.Polygon(p[hull.vertices,:], **kw) ax.add_patch(poly) # 選擇要包圍的數據 midwest_encircle_data = midwest.loc[midwest.state=='IN', :] # 圍繞頂點繪圖 encircle(midwest_encircle_data.area, midwest_encircle_data.poptotal, ec="k", fc="gold", alpha=0.1) encircle(midwest_encircle_data.area, midwest_encircle_data.poptotal, ec="firebrick", fc="none", linewidth=1.5) # S4: 優化圖例 plt.gca().set(xlim=(0.0, 0.1), ylim=(0, 90000), xlabel='Area', ylabel='Population') plt.xticks(fontsize=12, fontproperties = zhongwen_font) plt.yticks(fontsize=12, fontproperties = zhongwen_font) plt.title("氣泡圖", fontsize=22, fontproperties = zhongwen_font) plt.legend(fontsize=12, prop = zhongwen_font) plt.show()
若是你想了解兩個變量如何相互改變,那麼最合適的線就是要走的路。下圖顯示了數據中各組之間最佳擬合線的差別。要禁用分組並僅爲整個數據集繪製一條最佳擬合線,請從下面的調用中刪除該參數。數組
# Import dataset %matplotlib import pandas as pd import numpy as np import matplotlib as mpl from matplotlib import patches from matplotlib import font_manager as fm from matplotlib import pyplot as plt from scipy.spatial import ConvexHull import seaborn as sns import warnings; warnings.simplefilter('ignore') # S1 : 數據 df = pd.read_csv("https://raw.githubusercontent.com/selva86/datasets/master/mpg_ggplot2.csv") zhongwen_font = fm.FontProperties(fname='C:\Windows\Fonts\simsun.ttc') df_select = df.loc[df.cyl.isin([4,8]), :] # S2 : 做圖 sns.set_style("white") gridobj = sns.lmplot(x="displ", y="hwy", hue="cyl", data=df_select, aspect=1.6, robust=True, palette='tab10', scatter_kws=dict(s=60, linewidths=.7, edgecolors='black')) # S3 :優化 gridobj.set(xlim=(0.5, 7.5), ylim=(0, 50)) plt.title("帶線性迴歸最佳擬合線的散點圖", fontsize=20, fontproperties = zhongwen_font) plt.show()
或者,您能夠在其本身的列中顯示每一個組的最佳擬合線。你能夠經過在裏面設置參數來實現這一點。分佈式
# Import Data df = pd.read_csv("https://raw.githubusercontent.com/selva86/datasets/master/mpg_ggplot2.csv") df_select = df.loc[df.cyl.isin([4,8]), :] # Each line in its own column sns.set_style("white") gridobj = sns.lmplot(x="displ", y="hwy", data=df_select, height=7, robust=True, palette='Set1', col="cyl", scatter_kws=dict(s=60, linewidths=.7, edgecolors='black')) # Decorations gridobj.set(xlim=(0.5, 7.5), ylim=(0, 50)) plt.show()
一般,多個數據點具備徹底相同的X和Y值。結果,多個點相互繪製並隱藏。爲避免這種狀況,請稍微抖動點,以便您能夠直觀地看到它們。這很方便使用工具
%matplotlib import pandas as pd import numpy as np import matplotlib as mpl from matplotlib import patches from matplotlib import font_manager as fm from matplotlib import pyplot as plt from scipy.spatial import ConvexHull from matplotlib import patches import seaborn as sns import warnings; warnings.simplefilter('ignore') # S1:數據 df = pd.read_csv("https://raw.githubusercontent.com/selva86/datasets/master/mpg_ggplot2.csv") zhongwen_font = fm.FontProperties(fname='C:\Windows\Fonts\simsun.ttc') # S2:做圖 fig, ax = plt.subplots(figsize=(16,10), dpi= 80) sns.stripplot(df.cty, df.hwy, jitter=0.25, size=8, ax=ax, linewidth=.5) # S3:優化 plt.title('使用抖動圖避免點重疊', fontsize=22, fontproperties = zhongwen_font) plt.show()
Correlogram用於直觀地查看給定數據幀(或2D數組)中全部可能的數值變量對之間的相關度量。優化
%matplotlib import pandas as pd import numpy as np import matplotlib as mpl from matplotlib import patches from matplotlib import font_manager as fm from matplotlib import pyplot as plt from scipy.spatial import ConvexHull from matplotlib import patches import seaborn as sns import warnings; warnings.simplefilter('ignore') # S1: 數據 df = pd.read_csv("https://github.com/selva86/datasets/raw/master/mtcars.csv") zhongwen_font = fm.FontProperties(fname='C:\Windows\Fonts\simsun.ttc') # S2: plot 做圖 plt.figure(figsize=(12,10), dpi= 80) sns.heatmap(df.corr(), xticklabels=df.corr().columns, yticklabels=df.corr().columns, cmap='RdYlGn', center=0, annot=True) # S3: 圖例優化 plt.title('相關圖', fontsize=22, fontproperties = zhongwen_font) plt.xticks(fontsize=12) plt.yticks(fontsize=12) plt.show()
成對圖是探索性分析中的最愛,以理解全部可能的數字變量對之間的關係。它是雙變量分析的必備工具。spa
%matplotlib import pandas as pd import numpy as np import matplotlib as mpl from matplotlib import patches from matplotlib import font_manager as fm from matplotlib import pyplot as plt from scipy.spatial import ConvexHull from matplotlib import patches import seaborn as sns import warnings; warnings.simplefilter('ignore') # Load Dataset df = sns.load_dataset('iris') # Plot plt.figure(figsize=(10,8), dpi= 80) sns.pairplot(df, kind="scatter", hue="species", plot_kws=dict(s=80, edgecolor="white", linewidth=2.5)) plt.show()
帶有直方圖的密度曲線將兩個圖表傳達的集體信息聚集在一塊兒,這樣您就能夠將它們放在一個圖形而不是兩個圖形中。code
%matplotlib import pandas as pd import numpy as np import matplotlib as mpl from matplotlib import patches from matplotlib import font_manager as fm from matplotlib import pyplot as plt from scipy.spatial import ConvexHull from matplotlib import patches import seaborn as sns import warnings; warnings.simplefilter('ignore') # S1:數據 df = pd.read_csv("https://github.com/selva86/datasets/raw/master/mpg_ggplot2.csv") zhongwen_font = fm.FontProperties(fname='C:\Windows\Fonts\simsun.ttc') # S2:做圖 plt.figure(figsize=(13,10), dpi= 80) sns.distplot(df.loc[df['class'] == 'compact', "cty"], color="dodgerblue", label="Compact", hist_kws={'alpha':.7}, kde_kws={'linewidth':3}) sns.distplot(df.loc[df['class'] == 'suv', "cty"], color="orange", label="SUV", hist_kws={'alpha':.7}, kde_kws={'linewidth':3}) sns.distplot(df.loc[df['class'] == 'minivan', "cty"], color="g", label="minivan", hist_kws={'alpha':.7}, kde_kws={'linewidth':3}) plt.ylim(0, 0.35) # S3:圖例 plt.title('不一樣車型類型城市裏程密度圖', fontsize=22, fontproperties = zhongwen_font) plt.legend() plt.show()
與時間序列相比,日曆映射是可視化基於時間的數據的備選和不太優選的選項。雖然能夠在視覺上吸引人,但數值並不十分明顯。然而,它能夠很好地描繪極端值和假日效果。blog
import matplotlib as mpl import calmap as calmap # S1:數據 df = pd.read_csv("https://raw.githubusercontent.com/selva86/datasets/master/yahoo.csv", parse_dates=['date']) df.set_index('date', inplace=True) # S2:繪圖 plt.figure(figsize=(16,10), dpi= 80) calmap.calendarplot(df['2014']['VIX.Close'], fig_kws={'figsize': (16,10)}, yearlabel_kws={'color':'black', 'fontsize':14}, subplot_kws={'title':'Yahoo Stock Prices'}) plt.show()
by : 一隻阿木木