圖表類別:線形圖、柱狀圖、密度圖,以橫縱座標兩個維度爲主
同時可延展出多種其餘圖表樣式bootstrap
plt.plot(kind='line', ax=None, figsize=None, use_index=True, title=None, grid=None, legend=False,
style=None, logx=False, logy=False, loglog=False, xticks=None, yticks=None, xlim=None, ylim=None,
rot=None, fontsize=None, colormap=None, table=False, yerr=None, xerr=None, label=None, secondary_y=False, **kwds)
app
import numpy as np import pandas as pd import matplotlib.pyplot as plt % matplotlib inline # Series直接生成圖表 ts = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000)) ts = ts.cumsum() ts.plot(kind='line', label = 'hello', style = '--g.', color = 'red', alpha = 0.4, use_index = True, rot = 45, grid = True, ylim = [-50,50], yticks = list(range(-50,50,10)), figsize = (8,4), title = 'test', legend = True) # plt.grid(True, linestyle = "--",color = "gray", linewidth = "0.5",axis = 'x') # 網格 plt.legend() # Series.plot():series的index爲橫座標,value爲縱座標 # kind → line,bar,barh...(折線圖,柱狀圖,柱狀圖-橫...) # label → 圖例標籤,Dataframe格式以列名爲label # style → 風格字符串,這裏包括了linestyle(-),marker(.),color(g) # color → 顏色,有color指定時候,以color顏色爲準 # alpha → 透明度,0-1 # use_index → 將索引用爲刻度標籤,默認爲True # rot → 旋轉刻度標籤,0-360 # grid → 顯示網格,通常直接用plt.grid # xlim,ylim → x,y軸界限 # xticks,yticks → x,y軸刻度值 # figsize → 圖像大小 # title → 圖名 # legend → 是否顯示圖例,通常直接用plt.legend() # 也能夠 → plt.plot()
df = pd.DataFrame(np.random.randn(1000, 4), index=ts.index, columns=list('ABCD')) df = df.cumsum() df.plot(kind='line', style = '--.', alpha = 0.4, use_index = True, rot = 45, grid = True, figsize = (8,4), title = 'test', legend = True, subplots = False, #False是將這4條線繪製到一個圖裏邊。 colormap = 'Greens') # subplots → 是否將各個列繪製到不一樣圖表,默認False # 也能夠 → plt.plot(df)
plt.plot(kind='bar/barh') , plt.bar()dom
s.plot(kind='bar',color = 'k',grid = True,alpha = 0.5,ax = axes[0])單系列柱狀圖、
df.plot(kind='bar',ax = axes[1],grid = True,colormap='Reds_r')# 多系列柱狀圖 、
df.plot(kind='bar',ax = axes[2],grid = True,colormap='Blues_r',stacked=True) # 多系列堆疊圖
df.plot.barh(ax = axes[3],grid = True,stacked=True,colormap = 'BuGn_r')
fig, axes = plt.subplots(4, 1, figsize = (10, 10)) #4個沒有數據的圖表 s = pd.Series(np.random.randint(0,10,16),index = list('abcdefghijklmnop')) df = pd.DataFrame(np.random.rand(10,3), columns=['a','b','c']) s.plot(kind='bar',color = 'k',grid = True,alpha = 0.5,ax = axes[0]) # ax參數 → 選擇第幾個子圖 # 單系列柱狀圖方法一:plt.plot(kind='bar/barh') df = pd.DataFrame(np.random.rand(10,3), columns=['a','b','c']) df.plot(kind='bar',ax = axes[1],grid = True,colormap='Reds_r')# 多系列柱狀圖 df.plot(kind='bar',ax = axes[2],grid = True,colormap='Blues_r',stacked=True) # 多系列堆疊圖 # stacked → 堆疊 df.plot.barh(ax = axes[3],grid = True,stacked=True,colormap = 'BuGn_r') # 新版本plt.plot.<kind>
plt.bar(x, y1, width=1, facecolor='yellowgreen', edgecolor='white', yerr=y1*0.1)
for i,j in zip(x, y1): plt.text(i+0.3, j-0.15, '%.2f' % j, color = 'white')
plt.figure(figsize=(10,4)) x = np.arange(10) y1 = np.random.rand(10) y2 = -np.random.rand(10) plt.bar(x, y1, width=1, facecolor='yellowgreen', edgecolor='white', yerr=y1*0.1) plt.bar(x,y2,width = 1,facecolor = 'lightskyblue',edgecolor = 'white',yerr = y2*0.1) # x,y參數:x,y值 # width:寬度比例 # facecolor柱狀圖裏填充的顏色、edgecolor是邊框的顏色 # left-每一個柱x軸左邊界,bottom-每一個柱y軸下邊界 → bottom擴展便可化爲甘特圖 Gantt Chart # align:決定整個bar圖分佈,默認left表示默認從左邊界開始繪製,center會將圖繪製在中間位置 # xerr/yerr :x/y方向error bar;偏差線 for i,j in zip(x, y1): plt.text(i+0.3, j-0.15, '%.2f' % j, color = 'white') for i, j in zip(x, y2): plt.text(i+0.3,j+0.05,'%.2f' % -j, color = 'white') # 給圖添加text # zip() 函數用於將可迭代的對象做爲參數,將對象中對應的元素打包成一個個元組,將i,j打包成元組,而後返回由這些元組組成的列表。
print(list(zip(x,y1)))
---->>>> [(0, 0.47736539792043764), (1, 0.82954841542507074), (2, 0.20566514192862784), (3, 0.27679883800197358), (4, 0.45433494683444564),
(5, 0.89112910457025774), (6, 0.66065810313224915), (7, 0.91252133491535792), (8, 0.50006974665511594), (9, 0.13332483000972351)]
plt.table(cellText = data, cellLoc='center', #圖表裏邊的數據居中對齊 cellColours = None, #數據的顏色 rowLabels = rows, #行標籤 rowColours = plt.cm.BuPu(np.linspace(0, 0.5,5))[::-1], # BuPu可替換成其餘colormap 行標籤的顏色
rowLoc = ‘right’,#行標籤對齊位置
colLabels = columns, #列標籤 colColours = plt.cm.Reds(np.linspace(0, 0.5,5))[::-1], #列標籤的顏色,[::-1]是作一個反向 loc='bottom') #表格位置
# table(cellText=None, cellColours=None,cellLoc='right', colWidths=None,rowLabels=None, rowColours=None, rowLoc='left', # colLabels=None, colColours=None, colLoc='center',loc='bottom', bbox=None) data = [[ 66386, 174296, 75131, 577908, 32015], [ 58230, 381139, 78045, 99308, 160454], [ 89135, 80552, 152558, 497981, 603535], [ 78415, 81858, 150656, 193263, 69638], [139361, 331509, 343164, 781380, 52269]] columns = ('Freeze', 'Wind', 'Flood', 'Quake', 'Hail') rows = ['%d year' % x for x in (100, 50, 20, 10, 5)] df = pd.DataFrame(data,columns = ('Freeze', 'Wind', 'Flood', 'Quake', 'Hail'), index = ['%d year' % x for x in (100, 50, 20, 10, 5)]) print(df) df.plot(kind='bar',grid = True,colormap='Blues_r',stacked=True,figsize=(8,3))# 建立堆疊圖
plt.table(cellText = data, cellLoc='center', cellColours = None, rowLabels = rows, rowColours = plt.cm.BuPu(np.linspace(0, 0.5,5))[::-1], # BuPu可替換成其餘colormap colLabels = columns, colColours = plt.cm.Reds(np.linspace(0, 0.5,5))[::-1], rowLoc='right', loc='bottom') # cellText:表格文本 # cellLoc:cell內文本對齊位置 # rowLabels:行標籤 # colLabels:列標籤 # rowLoc:行標籤對齊位置 # loc:表格位置 → left,right,top,bottom plt.xticks([]) #加上的話會顯得混亂,去掉就看的清了。 # 不顯示x軸標註
--------->>函數
Freeze Wind Flood Quake Hail 100 year 66386 174296 75131 577908 32015
50 year 58230 381139 78045 99308 160454
20 year 89135 80552 152558 497981 603535
10 year 78415 81858 150656 193263 69638
5 year 139361 331509 343164 781380 52269
plt.plot.area()
plt.fill(), plt.fill_between()
plt.pie()
spa
上邊的多系列的折線圖只能表示4個線的變化狀況,只能是並行的;而面積圖能夠堆疊,能夠把4個堆疊到一塊兒看總體的一個變化趨勢。3d
df1.plot.area(colormap = 'Greens_r',alpha = 0.5,ax = axes[0]) df2.plot.area(stacked=False,colormap = 'Set2',alpha = 0.5,ax = axes[1])
fig,axes = plt.subplots(2,1,figsize = (8,6)) df1 = pd.DataFrame(np.random.rand(10, 4), columns=['a', 'b', 'c', 'd']) df2 = pd.DataFrame(np.random.randn(10, 4), columns=['a', 'b', 'c', 'd']) df1.plot.area(colormap = 'Greens_r',alpha = 0.5,ax = axes[0]) df2.plot.area(stacked=False,colormap = 'Set2',alpha = 0.5,ax = axes[1]) # 使用Series.plot.area()和DataFrame.plot.area()建立面積圖 # stacked:是否堆疊,默認狀況下,區域圖被堆疊 # 爲了產生堆積面積圖,每列必須是正值或所有負值! # 當數據有NaN時候,自動填充0,因此圖標籤須要清洗掉缺失值
填圖
axes[0].fill(x, y1, 'r',alpha=0.5,label='y1')
axes[1].fill_between(x, y1, y2, color ='b',alpha=0.5,label='area')
# 填圖 fig,axes = plt.subplots(2,1,figsize = (8,6)) x = np.linspace(0, 1, 500) y1 = np.sin(4 * np.pi * x) * np.exp(-5 * x) y2 = -np.sin(4 * np.pi * x) * np.exp(-5 * x) axes[0].fill(x, y1, 'r',alpha=0.5,label='y1') axes[0].fill(x, y2, 'g',alpha=0.5,label='y2') # 對函數與座標軸之間的區域進行填充,使用fill函數 # 也可寫成:plt.fill(x, y1, 'r',x, y2, 'g',alpha=0.5) x = np.linspace(0, 5 * np.pi, 1000) y1 = np.sin(x) y2 = np.sin(2 * x) axes[1].fill_between(x, y1, y2, color ='b',alpha=0.5,label='area') #label是填充裏邊顏色的label # 填充兩個函數之間的區域,使用fill_between函數 for i in range(2): axes[i].legend() axes[i].grid() # 添加圖例、格網
plt.pie(s, explode = [0.1,0,0,0], labels = s.index, colors=['r', 'g', 'b', 'c'], autopct='%.2f%%', pctdistance=0.6, labeldistance = 1.2, shadow = True, startangle=0, radius=1.5, frame=False)
# 餅圖 plt.pie() # plt.pie(x, explode=None, labels=None, colors=None, autopct=None, pctdistance=0.6, shadow=False, labeldistance=1.1, startangle=None, # radius=None, counterclock=True, wedgeprops=None, textprops=None, center=(0, 0), frame=False, hold=None, data=None) s = pd.Series(3 * np.random.rand(4), index=['a', 'b', 'c', 'd'], name='series') plt.axis('equal') # 保證長寬相等 plt.pie(s, explode = [0.1,0,0,0], labels = s.index, colors=['r', 'g', 'b', 'c'], autopct='%.2f%%', pctdistance=0.6, labeldistance = 1.2, shadow = True, startangle=0, radius=1.5, frame=False) print(s) # 第一個參數:數據 # explode:指定每部分的偏移量 # labels:標籤 # colors:顏色 # autopct:餅圖上的數據標籤顯示方式 # pctdistance:每一個餅切片的中心和經過autopct生成的文本開始之間的比例 # labeldistance:被畫餅標記的直徑,默認值:1.1 # shadow:陰影 # startangle:開始角度 # radius:半徑 # frame:圖框 # counterclock:指定指針方向,順時針或者逆時針
plt.hist(x, bins=10, range=None, normed=False, weights=None, cumulative=False, bottom=None,
histtype='bar', align='mid', orientation='vertical',rwidth=None, log=False, color=None, label=None,
stacked=False, hold=None, data=None, **kwargs)
指針
s.hist(bins = 20, histtype = 'bar', align = 'mid', orientation = 'vertical', alpha=0.5, normed =True)
s.plot(kind='kde',style='k--')
# 直方圖+密度圖 s = pd.Series(np.random.randn(1000)) s.hist(bins = 20, histtype = 'bar', align = 'mid', orientation = 'vertical', alpha=0.5, normed =True) # bin:箱子的寬度 # normed 標準化 # histtype 風格,bar,barstacked,step,stepfilled # orientation 水平仍是垂直{‘horizontal’, ‘vertical’} # align : {‘left’, ‘mid’, ‘right’}, optional(對齊方式) s.plot(kind='kde',style='k--') # 密度圖
df.plot.hist(stacked=True, bins=20, colormap='Greens_r', alpha=0.5, grid=True)
df.hist(bins=50) 生成多個直方圖
# 堆疊直方圖 plt.figure(num=1) df = pd.DataFrame({'a': np.random.randn(1000) + 1, 'b': np.random.randn(1000), 'c': np.random.randn(1000) - 1, 'd': np.random.randn(1000)-2}, columns=['a', 'b', 'c','d']) df.plot.hist(stacked=True, bins=20, colormap='Greens_r', alpha=0.5, grid=True) # 使用DataFrame.plot.hist()和Series.plot.hist()方法繪製 # stacked:是否堆疊 df.hist(bins=50) # 生成多個直方圖 a b c d
plt.scatter(), pd.scatter_matrix()code
plt.scatter(x,y,marker='.', s = np.random.randn(1000)*100,#能夠設置標量值如10,也能夠設置隨機值。 cmap = 'Reds', c = y, alpha = 0.8,)
X Y軸座標是兩個維度,點如大小是個維度,點的顏色也是個維度orm
# plt.scatter()散點圖 # plt.scatter(x, y, s=20, c=None, marker='o', cmap=None, norm=None, vmin=None, vmax=None, # alpha=None, linewidths=None, verts=None, edgecolors=None, hold=None, data=None, **kwargs) plt.figure(figsize=(8,6)) x = np.random.randn(1000) y = np.random.randn(1000) plt.scatter(x,y,marker='.', s = np.random.randn(1000)*100,#能夠設置標量值如10,也能夠設置隨機值。 cmap = 'Reds', c = y, alpha = 0.8,) plt.grid() #顯示網格 # s:散點的大小 # c:散點的顏色 # vmin,vmax:亮度設置,標量 # cmap:colormap
pd.scatter_matrix(df,figsize=(10,6), marker = 'o', diagonal='kde', alpha = 0.5, range_padding=0.1)
# pd.scatter_matrix()散點矩陣 # pd.scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, # grid=False, diagonal='hist', marker='.', density_kwds=None, hist_kwds=None, range_padding=0.05, **kwds) df = pd.DataFrame(np.random.randn(100,4),columns = ['a','b','c','d']) pd.scatter_matrix(df,figsize=(10,6), marker = 'o', diagonal='kde', alpha = 0.5, range_padding=0.1) # diagonal:({‘hist’, ‘kde’}),必須且只能在{‘hist’, ‘kde’}中選擇1個 → 每一個指標的頻率圖 # range_padding:(float, 可選),圖像在x軸、y軸原點附近的留白(padding),該值越大,留白距離越大,圖像遠離座標原點
---->> 散點矩陣,a b c d之間互相比較對象
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x000000002039B5C0>, <matplotlib.axes._subplots.AxesSubplot object at 0x000000002054FDD8>, <matplotlib.axes._subplots.AxesSubplot object at 0x00000000206FF278>, <matplotlib.axes._subplots.AxesSubplot object at 0x00000000207349E8>], [<matplotlib.axes._subplots.AxesSubplot object at 0x000000002077CCF8>, <matplotlib.axes._subplots.AxesSubplot object at 0x00000000207B9B70>, <matplotlib.axes._subplots.AxesSubplot object at 0x0000000020801B38>, <matplotlib.axes._subplots.AxesSubplot object at 0x0000000020811D68>], [<matplotlib.axes._subplots.AxesSubplot object at 0x0000000020891710>, <matplotlib.axes._subplots.AxesSubplot object at 0x00000000208DC748>, <matplotlib.axes._subplots.AxesSubplot object at 0x000000002091C630>, <matplotlib.axes._subplots.AxesSubplot object at 0x000000002096C128>], [<matplotlib.axes._subplots.AxesSubplot object at 0x00000000209A3978>, <matplotlib.axes._subplots.AxesSubplot object at 0x00000000209F4208>, <matplotlib.axes._subplots.AxesSubplot object at 0x0000000020A2C978>, <matplotlib.axes._subplots.AxesSubplot object at 0x0000000020A73C88>]], dtype=object)
在平面內取一個定點O, 叫極點,引一條射線Ox,叫作極軸,再選定一個長度單位和角度的正方向(一般取逆時針方向)。對於平面內任何一點M,用ρ表示線段OM的長度,θ表示從Ox到OM的角度,ρ叫作點M的極徑,θ叫作點M的極角,有序數對 (ρ,θ)就叫點M的極座標,這樣創建的座標系叫作極座標系。
在極座標中,x被ρcosθ代替,y被ρsinθ代替。ρ^2=(x^2+y^2)
直角座標系座標與極座標的轉化:
例如:(2,π/3)爲極座標,它所對應的直角座標爲(2×cos π/3,2×sin π/3)。
座標是用該點到定點(稱做極點)的距離及該點和極點的連線與過極點的射線(稱爲極軸)所成的角度來肯定座標的。
調用subplot()建立子圖時經過設置projection='polar',即可建立一個極座標子圖,而後調用plot()在極座標子圖中繪圖。
fig = plt.figure(figsize=(8,4)) ax1 = plt.subplot(121, projection = 'polar') ax2 = plt.subplot(122)
ax1.plot(theta,theta*3,linestyle = '--',lw=1) ax2.plot(theta,theta*3,linestyle = '--',lw=1)
# 建立極座標軸
s = pd.Series(np.arange(20)) theta=np.arange(0,2*np.pi,0.02) print(s.head()) print(theta[:10]) # 建立數據 fig = plt.figure(figsize=(8,4)) ax1 = plt.subplot(121, projection = 'polar') ax2 = plt.subplot(122) # 建立極座標子圖 # 還能夠寫:ax = fig.add_subplot(111,polar=True) ax1.plot(theta,theta*3,linestyle = '--',lw=1) ax1.plot(s, linestyle = '--', marker = '.',lw=2) ax2.plot(theta,theta*3,linestyle = '--',lw=1) ax2.plot(s) plt.grid() # 建立極座標圖,參數1爲角度(弧度制),參數2爲value # lw → 線寬
theta=np.arange(0,2*np.pi,0.02) plt.figure(figsize=(8,4)) ax1= plt.subplot(121, projection='polar') ax1.plot(theta,theta/6,'--',lw=2)
ax2.set_theta_direction(-1)設置座標軸正方形即順時針,默認是逆時針;
ax2.set_thetagrids(np.arange(0.0, 360.0, 90),['a','b','c','d'])設置極座標角度--網格線與角度標籤數量要一致;
ax2.set_rgrids(np.arange(0.2,2,0.4))設置極徑網格線顯示,0.2--2之間,相隔0.4;
ax2.set_theta_offset(np.pi/2) 設置角度偏移,逆時針、弧度制;
ax2.set_rlim(0.2,1.2) 設置極徑0.2--1.2的範圍; ax2.set_rmax(2)設置顯示的極徑最大值;
ax2.set_rticks(np.arange(0.1, 1.5, 0.2))設置極徑網格線的顯示範圍0.1--1.5,每隔0.2個;
# 極座標參數設置 theta=np.arange(0,2*np.pi,0.02) plt.figure(figsize=(8,4)) ax1= plt.subplot(121, projection='polar') ax2= plt.subplot(122, projection='polar') ax1.plot(theta,theta/6,'--',lw=2) ax2.plot(theta,theta/6,'--',lw=2) # 建立極座標子圖ax ax2.set_theta_direction(-1) # set_theta_direction():座標軸正方向,默認逆時針 ax2.set_thetagrids(np.arange(0.0, 360.0, 90),['a','b','c','d']) ax2.set_rgrids(np.arange(0.2,2,0.4)) # set_thetagrids():設置極座標角度網格線顯示及標籤 → 網格和標籤數量一致 # set_rgrids():設置極徑網格線顯示,其中參數必須是正數 ax2.set_theta_offset(np.pi/2) # set_theta_offset():設置角度偏移,逆時針,弧度制 ax2.set_rlim(0.2,1.2) ax2.set_rmax(2) ax2.set_rticks(np.arange(0.1, 1.5, 0.2)) # set_rlim():設置顯示的極徑範圍 # set_rmax():設置顯示的極徑最大值 # set_rticks():設置極徑網格線的顯示範圍
-->
[<matplotlib.axis.YTick at 0x20ee45c0>, <matplotlib.axis.YTick at 0x20eeacc0>, <matplotlib.axis.YTick at 0x20f0d5c0>, <matplotlib.axis.YTick at 0x20f0dcf8>, <matplotlib.axis.YTick at 0x20f0f470>, <matplotlib.axis.YTick at 0x20eefbe0>, <matplotlib.axis.YTick at 0x20f14400>]
先建立極座標--->>建立數據
ax1.plot(theta,data1,'.--',label='data1') 繪製軌跡 --------->>> ax1.fill(theta,data1,alpha=0.2) 圈起來;
# 雷達圖1 - 極座標的折線圖,直接在極座標上繪製了個折線圖 - plt.plot() 它並無首尾相連。 plt.figure(figsize=(8,4)) ax1= plt.subplot(111, projection='polar') ax1.set_title('radar map\n') # 建立標題 ax1.set_rlim(0,12) data1 = np.random.randint(1,10,10) data2 = np.random.randint(1,10,10) data3 = np.random.randint(1,10,10) theta=np.arange(0,2*np.pi,2*np.pi/10) # 建立數據 ax1.plot(theta,data1,'.--',label='data1') ax1.fill(theta,data1,alpha=0.2) ax1.plot(theta,data2,'.--',label='data2') ax1.fill(theta,data2,alpha=0.2) ax1.plot(theta,data3,'.--',label='data3') ax1.fill(theta,data3,alpha=0.2) # 繪製雷達線
data1 = np.concatenate((data1, [data1[0]])); angles = np.concatenate((angles, [angles[0]])) # 閉合
plt.polar(angles, data1, 'o-', linewidth=1)作極座標; plt.fill(angles, data1, alpha=0.25) 填充;
# 雷達圖2 - 極座標的折線圖/填圖 - plt.polar() 首尾閉合,由於它最後須要交到一個點。 labels = np.array(['a','b','c','d','e','f']) # 標籤 dataLenth = 6 # 數據長度 data1 = np.random.randint(0,10,6) data2 = np.random.randint(0,10,6) # 數據 angles = np.linspace(0, 2*np.pi, dataLenth, endpoint=False) #分割圓周長 data1 = np.concatenate((data1, [data1[0]])) #作一個首尾閉合,原來的6個值就變成了7個值。 data2 = np.concatenate((data2, [data2[0]])) #閉合 angles = np.concatenate((angles, [angles[0]])) # 閉合 plt.polar(angles, data1, 'o-', linewidth=1) #作極座標系 plt.fill(angles, data1, alpha=0.25)# 填充,即使不填充顏色它也是閉合的哦。 plt.polar(angles, data2, 'o-', linewidth=1) #作極座標系 plt.fill(angles, data2, alpha=0.25)# 填充 plt.thetagrids(angles * 180/np.pi, labels) # 設置網格、標籤 plt.ylim(0,10) # polar的極值設置爲ylim
bar = ax1.bar(theta,data,alpha=0.5)
for r,bar in zip(data, bar): bar.set_facecolor(plt.cm.jet(r/10.))
# 極軸圖 - 極座標的柱狀圖 plt.figure(figsize=(8,4)) ax1= plt.subplot(111, projection='polar') ax1.set_title('radar map\n') # 建立標題 ax1.set_rlim(0,12) data = np.random.randint(1,10,10) theta=np.arange(0,2*np.pi,2*np.pi/10) # 建立數據 bar = ax1.bar(theta,data,alpha=0.5) for r,bar in zip(data, bar): bar.set_facecolor(plt.cm.jet(r/10.)) # 設置顏色 plt.thetagrids(np.arange(0.0, 360.0, 90), []) # 設置網格、標籤(這裏是空標籤,則不顯示內容)
箱型圖:又稱爲盒須圖、盒式圖、盒狀圖或箱線圖,是一種用做顯示一組數據分散狀況資料的統計圖
包含一組數據的:最大值、最小值(這裏的最大、小值並非整個數據中的最大小值,而是拋開異常值以外的)、中位數、上四分位數(Q1)、下四分位數(Q3)、異常值
① 中位數 → 一組數據平均分紅兩份,中間的數
② 下四分位數Q1 → 是將序列平均分紅四份,計算(n+1)/4與(n-1)/4兩種,通常使用(n+1)/4
③ 上四分位數Q3 → 是將序列平均分紅四份,計算(1+n)/4*3=6.75
④ 內限 → T形的盒須就是內限,最大值區間Q3+1.5IQR,最小值區間Q1-1.5IQR (IQR=Q3-Q1)
⑤ 外限 → T形的盒須就是內限,最大值區間Q3+3IQR,最小值區間Q1-3IQR (IQR=Q3-Q1)
⑥ 異常值 → 內限以外 - 中度異常,外限以外 - 極度異常
plt.plot.box( ),plt.boxplot( )
df.plot.box(ylim=[0,1.2], grid = True, color = color, ax = axes[0])
# plt.plot.box()繪製 fig,axes = plt.subplots(2,1,figsize=(10,6)) df = pd.DataFrame(np.random.rand(10, 5), columns=['A', 'B', 'C', 'D', 'E']) color = dict(boxes='DarkGreen', whiskers='DarkOrange', medians='DarkBlue', caps='Gray') # 箱型圖着色 # boxes → 箱線 # whiskers → 分位數與error bar(指的就是邊緣線)橫線之間豎線的顏色 # medians → 中位數(不是均值)線顏色 # caps → error bar橫線的顏色 df.plot.box(ylim=[0,1.2], grid = True, color = color, ax = axes[0]) # color:樣式填充 df.plot.box(vert=False, #vert參數是否垂直; positions=[1, 4, 5, 6, 8], #分別對應columns參數 A B C D 所在的位置; ax = axes[1], grid = True, color = color) # vert:是否垂直,默認True # position:箱型圖佔位
# plt.boxplot()繪製 # pltboxplot(x, notch=None, sym=None, vert=None, whis=None, positions=None, widths=None, patch_artist=None, bootstrap=None, # usermedians=None, conf_intervals=None, meanline=None, showmeans=None, showcaps=None, showbox=None, showfliers=None, boxprops=None, # labels=None, flierprops=None, medianprops=None, meanprops=None, capprops=None, whiskerprops=None, manage_xticks=True, autorange=False, # zorder=None, hold=None, data=None) df = pd.DataFrame(np.random.rand(10, 5), columns=['A', 'B', 'C', 'D', 'E']) plt.figure(figsize=(10,4)) # 建立圖表、數據 f = df.boxplot(sym = 'o', # 異常點形狀,參考marker vert = True, # 是否垂直,默認True是垂直 whis = 1.5, # IQR,默認1.5 內限 ,也能夠設置區間好比[5,95],表明強制上下邊緣爲數據95%和5%位置;若是IQR改成3就是外限; patch_artist = True, # 上下四分位框內是否填充,True爲填充 meanline = False,showmeans=True, # 是否有均值線及其形狀;meanline是否用線的形式顯示均值,默認用點;showmeans是否顯示均值,默認不顯示。 showbox = True, # 是否顯示箱線 showcaps = True, # 是否顯示邊緣線 showfliers = True, # 是否顯示異常值 notch = False, # 中間箱體是否缺口 return_type='dict' # 返回類型爲字典; ) plt.title('boxplot') print(f)
-->
{'caps': [<matplotlib.lines.Line2D object at 0x0000000023BD49B0>, <matplotlib.lines.Line2D object at 0x0000000023BD4B38>,
<matplotlib.lines.Line2D object at 0x0000000023BEB940>, <matplotlib.lines.Line2D object at 0x0000000023BEBAC8>,
<matplotlib.lines.Line2D object at 0x0000000023C028D0>, <matplotlib.lines.Line2D object at 0x0000000023C02A58>,
<matplotlib.lines.Line2D object at 0x0000000023C19860>, <matplotlib.lines.Line2D object at 0x0000000023C19F98>,
<matplotlib.lines.Line2D object at 0x0000000023C2E7F0>, <matplotlib.lines.Line2D object at 0x0000000023B94A20>],
'means': [<matplotlib.lines.Line2D object at 0x0000000023BD8BA8>, <matplotlib.lines.Line2D object at 0x0000000023BF2B38>,
<matplotlib.lines.Line2D object at 0x0000000023C07AC8>, <matplotlib.lines.Line2D object at 0x0000000023C1FA58>,
<matplotlib.lines.Line2D object at 0x0000000023B97A58>], 'fliers': [<matplotlib.lines.Line2D object at 0x0000000023BE0A58>,
<matplotlib.lines.Line2D object at 0x0000000023BF69E8>, <matplotlib.lines.Line2D object at 0x0000000023C0D978>,
<matplotlib.lines.Line2D object at 0x0000000023C23908>, <matplotlib.lines.Line2D object at 0x0000000023B90BA8>],
'whiskers': [<matplotlib.lines.Line2D object at 0x0000000023BCE780>, <matplotlib.lines.Line2D object at 0x0000000023BCE9E8>,
<matplotlib.lines.Line2D object at 0x0000000023BE47F0>, <matplotlib.lines.Line2D object at 0x0000000023BE4A58>,
<matplotlib.lines.Line2D object at 0x0000000023BFB780>, <matplotlib.lines.Line2D object at 0x0000000023BFBF98>,
<matplotlib.lines.Line2D object at 0x0000000023C13710>, <matplotlib.lines.Line2D object at 0x0000000023C13EF0>,
<matplotlib.lines.Line2D object at 0x0000000023C2A6A0>, <matplotlib.lines.Line2D object at 0x0000000023C2AF98>],
'boxes': [<matplotlib.patches.PathPatch object at 0x0000000023BCE128>, <matplotlib.patches.PathPatch object at 0x0000000023BE4198>,
<matplotlib.patches.PathPatch object at 0x0000000023BFB128>, <matplotlib.patches.PathPatch object at 0x0000000023C130B8>,
<matplotlib.patches.PathPatch object at 0x0000000023C2A048>],
'medians': [<matplotlib.lines.Line2D object at 0x0000000023BD8390>, <matplotlib.lines.Line2D object at 0x0000000023BF2320>,
<matplotlib.lines.Line2D object at 0x0000000023C072B0>, <matplotlib.lines.Line2D object at 0x0000000023C1F240>,
<matplotlib.lines.Line2D object at 0x0000000023B94978>]}
#能夠進行遍歷字典去改裏邊的參數樣式
for box in f['boxes']: box.set( color='b', linewidth=1) # 箱體邊框顏色 box.set( facecolor = 'b' ,alpha=0.5) # 箱體內部填充顏色 for whisker in f['whiskers']: whisker.set(color='k', linewidth=0.5,linestyle='-') for cap in f['caps']: cap.set(color='gray', linewidth=2) for median in f['medians']: median.set(color='DarkBlue', linewidth=2) for flier in f['fliers']: flier.set(marker='o', color='y', alpha=0.5) # boxes, 箱線 # medians, 中位值的橫線, # whiskers, 從box到error bar之間的豎線. # fliers, 異常值 # caps, error bar橫線 # means, 均值的橫線,
# plt.boxplot()繪製 分組彙總 df = pd.DataFrame(np.random.rand(10,2), columns=['Col1', 'Col2'] ) df['X'] = pd.Series(['A','A','A','A','A','B','B','B','B','B']) df['Y'] = pd.Series(['A','B','A','B','A','B','A','B','A','B']) print(df) df.boxplot(by = 'X') df.boxplot(column=['Col1','Col2'], by=['X','Y']) # columns:按照數據的列分子圖 # by:按照列分組作箱型圖
------> >>>
Col1 Col2 X Y 0 0.864057 0.773447 A A 1 0.866570 0.620441 A B 2 0.105994 0.944422 A A 3 0.959510 0.007522 A B 4 0.341135 0.421671 A A 5 0.357656 0.999898 B B 6 0.889062 0.440674 B A 7 0.656127 0.548576 B B 8 0.306237 0.841735 B A 9 0.961797 0.611649 B B
array([<matplotlib.axes._subplots.AxesSubplot object at 0x000000002E578208>, <matplotlib.axes._subplots.AxesSubplot object at 0x000000002E698F98>], dtype=object)