數據分析
數據分析
numpy+scipy+matplotlib+pandas
scikit-learn + tensorflow
1、Numpy的特色
一、擅長數值計算
二、足夠高的運算性能
三、支持矢量化運算
四、免費、開源html
import numpy as np import datetime as dt n = 100000 start = dt.datetime.now() A,B, = [],[] for i in range(n): A.append(i**2) B.append(i**3) C = [] for a,b in zip(A,B): C.append(a+b) print((dt.datetime.now() - start).microseconds)#143709(微秒) # 基於Numpy的實現 start = dt.datetime.now() A, B = np.arange(n) ** 2, np.arange(n) ** 3#此處的計算至關於調用了C語言進行計算,因此更快 C = A+B print((dt.datetime.now() - start).microseconds)#34831(微秒)
2、Numpy的數組
一、Numpy中的數組是ndarry類類型的對象,將實際數據和元數據分開存放,獨立操做,以此提高性能
二、Numpy數組的元素類型必須相同(同質性)
三、Numpy數組的元素能夠經過基0的下標單獨訪問,size個元素,合理下標範圍[0,size-1],[-size,-1]
數組[i][j][k]等價於數組[i,j,k]
四、Numpy數組經過dtype和shape屬性表示元素的類型和維度,其中維度的類型是元組,按照從高到低的順序排列每一維的大小(頁,行,列)
五、建立數組的方法:
一、numpy.arange(起始值(0),終止值,步長(1)),只能生成一維數組
二、numpy.array(任意能夠被解釋爲數據的序列)前端
from __future__ import unicode_literals import numpy as np a = np.arange(1,3) print(a, a.shape, sep=' ')#[1 2] (2,) b = np.array([{1,2,3}, [4,5,6]]) print(b, b.shape, sep=' ')# [{1, 2, 3} list([4, 5, 6])] (2,) print(b.dtype) # object,是python對象,由於nump會將不一樣類型的元素轉爲相同的對象 c = np.array(['A','B',"CD"]) print(c.dtype)# <U2, 表示小端字節序Unicode2字節,‘A’實際上是<U1,由於數組的同質性,因此變爲了<U2
3、Numpy的內置數據類型
一、布爾
bool_
二、整數
一、有符號整數
int8/int16/int32/int64
二、無符號整數
uint8/uint16/uint32/uint64
三、浮點型
float16/float32/float64
四、複數
complex64/complex128,其中實部和虛部都是float型的
五、字符串
str_
4、顯式使用數據類型
一、默認數據類型不知足須要,人爲指定
二、複合類型,在一個元素中含有多個字段
三、用不一樣類型訪問同一個元素
numpy.array(...., dtype = 類型)python
import numpy as np a = np.array([1,2,3,4,5],dtype=np.int8) print('a.dtype:',a.dtype) b = a.astype(float) print('b.dtype:',b.dtype) c = a.astype(np.float32) print('c.dtype:',c.dtype) d = a.astype(np.str_) print('d.dtype:',d.dtype,d) # 一下三種寫法相同 #e = np.array([1234],dtype = np.int32) #e = np.array([1234],dtype = 'int32') e = np.array([1234],dtype = 'i4') print('e:',e.dtype,e.shape) # dtype = (變長類型,長度) f = np.array(['1234'],dtype=(np.str_, 2))# 至關於dtype='U2' print('f:',f.dtype,f[0]) # (定長類型,(維度)) g = np.array([(1,2,3,4)], dtype=(np.int32,4))# 每一個元素的類型是4個int32 g = np.array([((1,2),(3,4))],dtype=(np.int32,(2,2))) print('g:',g.dtype,e.shape) # '類型字符串1,類型字符串2,類型字符串3,...' h = np.array([('1234',(1,2,3,4))], dtype='U4 ,4i4') print('h:',h.dtype,h[0]['f0'],h[0]['f1'])# f0,f1是系統分別爲‘1234’,(1,2,3,4)起的別名 # dtype = {'names':[字段名稱],‘formats’:[字段類型表]} i = np.array([('1234',(1,2,3,4)),('5678',(5,6,7,8))],dtype={'names':['fa','fb'],'formats':['U4','4i4']}) print('i:', i.dtype, i.shape, i[0]['fa'],i[0]['fb']) # dtype = [(字段名稱,字段類型,字段維度),...] j = np.array([('1234',(1,2,3,4))], dtype=[('fa',np.str_,4),('fb',np.int32,4)]) print('j:',j.dtype,j.shape, j[0]['fa'],j[0]['fb']) k = np.array([('1234',(1,2,3,4))],dtype=[('fa','U4'),('fb','4i4')]) print('k:',k.dtype,k[0]['fa'],k[0]['fb']) # (基本類型,解釋類型) # 0x表示16進制,‘u1’整型1個字節, l = np.array([0x1234],dtype=('<u2',{'names':['lo','hi'],'formats':['u1','u1']})) # :x表示16進制 print('{:x} {:x} {:x}'.format(l[0],l['lo'][0],l['hi'][0])) m = np.array([('ABC',(1,2,3))],dtype ='U3, 3u1') print(m)# [('ABC', [1, 2, 3])] print(m.dtype)# [('f0', '<U3'), ('f1', 'u1', (3,))] m1 = np.array([('ABC',(1,2,3))],dtype =[('fa', np.str_,3),('fb','3u1')]) print(m1)# [('ABC', [1, 2, 3])] print(m1.dtype)# [('f0', '<U3'), ('f1', 'u1', (3,))]
5、切片
數組[起始:終止:步長,...],
缺省起始:首(+步長)/尾(-步長)
缺省終止:尾後(+步長)/首前(-步長)
缺省步長:1
6、改變維度
一、視圖變維:根據指定的新維度,構造新的元數據,實際數據共享
源數組.reshape(新維度) ->目標數組
\--------------/
共享實際數據
/--------------\
源數組.ravel()->一維目標數組
二、複製變維
源數組.flatten()->一維目標數組
| |
實際數據<- 非共享 ->實際數據副本
三、就地變維
源數組.shap = 新維度
原數組.resize(新維度)
四、轉置
原數組.transpose()->轉置視圖
7、合併與拆分
一、垂直合分
vstack((上,下))->垂直合併
concatenate((上,下),axis=0)
shape:(3,4)
0 1
vsplit(數組,等分份數)->垂直拆分
split(數組,等分份數,axis=0)
二、水平合分
hstack((左,右))->水平合併
concatenate((左,右),axis=1)
hspilt(數組,等分份數)->水平拆分
三、深度合分
dstack((前,後))->深度合併
dsplit(數組,等分份數)->深度拆分
四、行列合併
row_stack((上,下))->行合併
column_stack((左,右))->列合併
8、ndarry對象的屬性
dtype:元素的數據類型
shape:數組維度
ndim:數組維數
size:元素數,只有一維數組纔等價於len()
itemsize:每一個元素的字節數
nbytes:全部元素的總字節數,size × itemsize
T :轉置視圖
real/imag:複數數組的實部和虛部
flat:扁平迭代器linux
import numpy as np a = np.array([[1+1j, 2+4j, 3+7j], [4+2j, 5+5j, 6+8j], [7+3j, 8+6j, 9+9j],]) print(a.dtype)# complex128 print(a.shape)# (3, 3) print(a.ndim) # 2 print(a.size) # 9 print(a.itemsize) # 16 (16*8=128) print(a.nbytes) # 144 (9*16=144) print(a) """ [[1.+1.j 2.+4.j 3.+7.j] [4.+2.j 5.+5.j 6.+8.j] [7.+3.j 8.+6.j 9.+9.j]] """ print(a.T) """ [[1.+1.j 4.+2.j 7.+3.j] [2.+4.j 5.+5.j 8.+6.j] [3.+7.j 6.+8.j 9.+9.j]] """ print(a.real) """ [[1. 2. 3.] [4. 5. 6.] [7. 8. 9.]] """ print(a.imag) """ [[1. 4. 7.] [2. 5. 8.] [3. 6. 9.]] """ print(a.flat)# <numpy.flatiter object at 0x11259c0> for elem in a.flat: print(elem) """(1+1j)(2+4j)(3+7j)(4+2j)(5+5j)(6+8j)(7+3j)(8+6j)(9+9j)""" print(list(a)) # [array([1.+1.j, 2.+4.j, 3.+7.j]), array([4.+2.j, 5.+5.j, 6.+8.j]), array([7.+3.j, 8.+6.j, 9.+9.j])]
9、數據可視化(matplotlib,數學繪圖庫)
一、缺省樣式
曲線圖:plot(水平座標數組,垂直座標數組)ios
import matplotlib.pyplot as mp import numpy as np # 獲取-π到π之間的數字組成的數組 x = np.linspace(-np.pi,np.pi,1000) sin_y = np.sin(x) cos_y = np.cos(x)/2 mp.plot(x,sin_y) mp.plot(x,cos_y) mp.show()
二、線性、線寬和顏色git
plot(...,linestyle=線型,linewidth=線寬,color=顏色)
三、設置座標範圍
mp.xlim(最小水平座標,最大水平座標)
mp.ylim(最小垂直座標,最大垂直座標)
四、設置刻度標籤
mp.xticks(刻度位置數組[,刻度文本數組])
mp.yticks(刻度位置數組[,刻度文本數組])
五、十字座標軸
ax = mp.gca()#獲取當前座標圖
ax.spinex['left'].set_position((座標系,位置座標))
ax.spinex['right'].set_color(顏色)
六、圖例
plot(..,label=圖例文本)
legend(loc=位置)
loc的取值:
lower right
center
center left
upper center
right
center right
lower center
lower left
upper right
upper left
best
七、添加特殊點
scatter(水平座標數組,垂直座標數組)
八、備註
annotate(備註文本,xy=目標位置,xycoords=目標座標系,
xytext=備註位置,textcoords=備註座標系,fontsize=字體大小,
arrowprops=箭頭屬性props)面試
import matplotlib.pyplot as mp import numpy as np # 獲取-π到π之間的數字組成的數組 x = np.linspace(-np.pi,np.pi,1000) sin_y = np.sin(x) cos_y = np.cos(x)/2 # 添加特殊點 xo = np.pi*3/4 yo_sin = np.sin(xo) yo_cos = np.cos(xo)/2 # 設置邊框位置 mp.xlim(x.min()*1.1,x.max()*1.1) mp.ylim(sin_y.min()*1.1,sin_y.max()*1.1) # 設置座標位置 ax = mp.gca() ax.spines['left'].set_position(('data',0)) ax.spines['bottom'].set_position(('data',0)) ax.spines['top'].set_color('none') ax.spines['right'].set_color('none') # 設置刻度 mp.yticks([-1,-0.5,0,0.5,1]) mp.xticks([-np.pi,-np.pi/2,0,np.pi/2, np.pi*3/4, np.pi], [r'$-\pi$',r'$-\frac{\pi}{2}$',r'$0$',r'$\frac{\pi}{2}$',r'$\frac{3\pi}{4}$',r'$\pi$']) mp.plot(x,sin_y,linestyle='-',linewidth=2,color='orangered',label=r'$x=sin(x)$') mp.plot(x,cos_y,linestyle='-',linewidth=2,color='dodgerblue',label=r'$y=\frac{1}{2}cos(x)$') # 將特殊的點連線 mp.plot([xo,xo],[yo_cos,yo_sin],linestyle='--',linewidth='1',color='limegreen') # 表示點的大小,edgecolor表示邊界顏色,facecolor表示內部顏色,zorder表示畫的順序,大值表示後畫 mp.scatter([xo,xo],[yo_sin,yo_cos],s=60,edgecolor='limegreen',facecolor='white',zorder=3) # 添加備註 mp.annotate(r'$\frac{1}{2}cos(\frac{3\pi}{4})=-\frac{\sqrt{2}}{2}$', xy=(xo,yo_cos),xycoords='data', xytext=(-90,-40),textcoords='offset points', fontsize=14,arrowprops=dict(arrowstyle='->',connectionstyle='arc3,rad=0.2')) mp.annotate(r'$sin(\frac{3\pi}{4})=-\frac{\sqrt{2}}{4}$', xy=(xo,yo_sin),xycoords='data', xytext=(20,20),textcoords='offset points', fontsize=14,arrowprops=dict(arrowstyle='->',connectionstyle='arc3,rad=0.2')) # 圖例的顯示位置 mp.legend(loc='upper left') mp.show()
九、圖形對象
figure(對象名(窗口標題),figsize=大小,dpi=分辨率,facecolor=顏色,..)
該方法會返回圖像對象(建立/設置爲當前)
十、子圖
subplot(總行數,總列數,圖號)
subplot(總行數×100+總列數×10+圖號)算法
import matplotlib.pyplot as mp mp.figure('Subplot',facecolor='lightgray') mp.subplot(221) # 去掉刻度 mp.xticks(()) mp.yticks(()) # 寫文本text(水平位置,處置位置,文本,水平對齊方式,垂直其方式,文本你大小,文本不透明度) mp.text(0.5,0.5,'1',ha='center',va='center',size=36,alpha=0.5) mp.subplot(222) mp.xticks(()) mp.yticks(()) mp.text(0.5,0.5,'2',ha='left',va='top',size=36,alpha=0.5) mp.subplot(223) mp.xticks(()) mp.yticks(()) mp.text(0.5,0.5,'3',ha='left',va='bottom',size=36,alpha=0.5) mp.subplot(224) mp.xticks(()) mp.yticks(()) mp.text(0.5,0.5,'4',ha='right',va='bottom',size=36,alpha=0.5) # 減少每一個子圖的之間的空隙 mp.tight_layout() mp.show()
十一、柵格佈局
import matplotlib.gridspec as mg
gs = mg.GridSpec(行數,列數)
subplot(gs[切片])數據庫
import matplotlib.pyplot as mp import matplotlib.gridspec as mg mp.figure('Gridspec',facecolor='lightgray') # 三行兩列 gs = mg.GridSpec(3,2) # 第一行 mp.subplot(gs[0,:]) # 取消刻度 mp.xticks(()) mp.yticks(()) # 第2,3行,第一列 mp.subplot(gs[1:,0]) mp.xticks(()) mp.yticks(()) # 第2行2列 mp.subplot(gs[1,1]) mp.xticks(()) mp.yticks(()) # 第3行2列 mp.subplot(gs[2,1]) mp.xticks(()) mp.yticks(()) # 減少空隙 mp.tight_layout() mp.show()
十二、自由佈局
axes([左,底,寬,高])
說明:這些參數都是相對值編程
import matplotlib.pyplot as mp mp.figure('Axes',facecolor='lightgray') # 參數都是相對值 mp.axes([0.03,0.038,0.94,0.924]) mp.xticks(()) mp.yticks(()) mp.text(0.5,0.5,'1',ha='center',va='center',size=36,alpha=0.5) mp.axes([0.63,0.076,0.31,0.308]) mp.xticks(()) mp.yticks(()) mp.text(0.5,0.5,'2',ha='center',va='center',size=36,alpha=0.5) mp.show()
1三、座標刻度
xxxLocator(...)->座標定位器對象
ax = mp.gca()#獲取座標對象
設置水平軸主刻度定位器
ax.xaxis.set_major_locator(座標定位器)
設置水平次刻度定位器
ax.xaxis.set_minor_locatir(座標定位器)
設置垂直主刻度定位器
ax.yaxis.set_magor_locator(座標定位器)
設置處置次刻度定位器
ax.yaxis.set_minor_locator(座標定位器)
import matplotlib.pyplot as mp import numpy as np mp.figure('Locator') locators = [ 'mp.NullLocator()', 'mp.MaxNLocator(nbins=3,steps=[1,3,5,7,9])',# 一共3個刻度, 'mp.FixedLocator(locs=[0,2.5,5,7.5,10])', 'mp.AutoLocator()', 'mp.IndexLocator(offset=0.5,base=1.5)',#offset表示起始刻度,base表示間隔 'mp.MultipleLocator()', 'mp.LinearLocator(numticks=21)',#numticks主刻度的個數 'mp.LogLocator(base=2,subs=[1.0])',#base表示底數,subs表示指數的步長 ] n_locators = len(locators) for i ,locator in enumerate(locators): mp.subplot(n_locators,1,i+1) # 設置x,y軸的取值範圍 mp.xlim(0,10) mp.ylim(-1,1) # 取消y軸刻度 mp.yticks(()) ax = mp.gca() ax.spines['left'].set_color('none') ax.spines['right'].set_color('none') ax.spines['top'].set_color('none') ax.spines['bottom'].set_position(('data',0)) ax.xaxis.set_major_locator(eval(locator)) # MultipleLocator(等差值)多點定位器 ax.xaxis.set_minor_locator(mp.MultipleLocator(0.1)) mp.plot(np.arange(0,11),np.zeros(11),c='none') mp.text(5,0.3,locator[3:],ha='center',va='center',size=12) mp.tight_layout() mp.show()
1四、散點圖
scatter(水平座標,垂直座標,s=大小,c=顏色,cmap=顏色映射,
marker=點型,alpha=透明度,label=標籤)
x y z
10 40 1
20 30 2
30 20 3
40 10 4
#jet深藍到深紅的漸變
scatter([10,20,30,40],[40,30,20,10],c=[1,2,3,4],cmap='jet')
import matplotlib.pyplot as mp import numpy as np n = 1000 # normal表示正態分佈0表示平局值,1表示標準差 x = np.random.normal(0,1,n) y = np.random.normal(0,1,n) z = np.sqrt(x ** 2 + y ** 2) mp.figure('Scatter',facecolor='lightgray') mp.title('Scatter',fontsize=20) mp.xlabel('x',fontsize=14) mp.ylabel('y',fontsize=14) mp.tick_params(labelsize=10)#刻度線的字體 mp.grid(linestyle=':') mp.scatter(x,y,c=z,s=60,cmap='jet_r',alpha=0.5) mp.axis('equal')# x y 軸等軸 mp.show()
1五、區域填充
fill_between(水平座標,起始垂直座標,終止垂直座標,填充條件
color=顏色,alpha=透明度)
import matplotlib.pyplot as mp import numpy as np n = 1000 # normal表示正態分佈0表示平局值,1表示標準差 x = np.linspace(0,8*np.pi,n) sin_y = np.sin(x) cos_y = np.cos(x/2)/2 mp.figure('Fill',facecolor='lightgray') mp.title('Fill',fontsize=20) mp.xlabel('x',fontsize=14) mp.ylabel('y',fontsize=14) mp.tick_params(labelsize=10)#刻度線的字體 mp.grid(linestyle=':') mp.plot(x,sin_y,color='dodgerblue',label=r'$y=sin(x)$') mp.plot(x,cos_y,color='orangered',label=r'$y=\frac{1}{2}cos(\frac{x}{2})$') mp.fill_between(x,sin_y,cos_y,cos_y<sin_y,color='dodgerblue',alpha=0.5) mp.fill_between(x,sin_y,cos_y,cos_y>sin_y,color='orangered',alpha=0.5) mp.legend() mp.show()
1六、條形圖
bar(水平座標,高度,ec=邊緣色,fc=填充色,label=標籤)
說明:高度爲正值,矩形條畫在零軸上方,高度爲負值,矩形條畫在零軸下方
import matplotlib.pyplot as mp import numpy as np n = 12 # normal表示正態分佈0表示平局值,1表示標準差 x = np.arange(n) y1 = (1-x/n) * np.random.uniform(0.5,1.0,n) y2 = (1-x/n) * np.random.uniform(0.5,1.0,n) mp.figure('Bar',facecolor='lightgray') mp.title('Bar',fontsize=20) mp.ylim(-1.25,1.25) mp.xlabel('x',fontsize=14) mp.ylabel('y',fontsize=14) mp.xticks(x,x+1) mp.tick_params(labelsize=10)#刻度線的字體 mp.grid(axis='y',linestyle=':') # 只在y軸上畫刻度 mp.bar(x,y1,ec='white',fc='dodgerblue',label='sample 1') # 顯示數值 for _x,_y in zip(x,y1): mp.text(_x,_y,'%.2f'%_y, ha='left', va='bottom',size=8) mp.bar(x,-y2,ec='white',fc='dodgerblue',label='sample 2',alpha=0.5) for _x,_y in zip(x,y2): mp.text(_x,-_y-0.015,'%.2f'%_y, ha='left', va='top',size=8) mp.legend() mp.show()
1七、等高線圖
contourf(點陣水平座標,點陣垂直座標,點陣直立座標,高差份數,cmap=顏色映射),填充
contour(點陣水平座標,點陣垂直座標,點陣直立座標,
高差份數,colors=顏色,linewidths=線寬)
clabel(等高線圖對象,inline_spacing=線內空白,fmt=格式化串,fontsize=字體大小)
import matplotlib.pyplot as mp import numpy as np n = 1000 # 生成1000*1000的交叉點 x, y = np.meshgrid(np.linspace(-3,3,n),np.linspace(-3,3,n)) # exp表示e的多少次方 z = (1-x/2 + x**5 + y**3)*np.exp(-x**2-y**2) y1 = (1-x/n) * np.random.uniform(0.5,1.0,n) y2 = (1-x/n) * np.random.uniform(0.5,1.0,n) mp.figure('Contour',facecolor='lightgray') mp.title('Contour',fontsize=20) mp.xlabel('x',fontsize=14) mp.ylabel('y',fontsize=14) mp.tick_params(labelsize=10)#刻度線的字體 mp.grid(linestyle=':') # 只在y軸上畫刻度 # 畫等高線圖 mp.contourf(x,y,z,8,camp='jet') # 填充 cntr = mp.contour(x,y,z,8,colors='black',linewidth=0.5) # 不填充 mp.clabel(cntr,inline_spacing=1,fmt='%.1f',fontsize=10) mp.show()
填充圖:
輪廓圖:
填充加輪廓圖
1八、熱成像圖
inshow(矩陣,cmap=顏色映射,origin=縱軸方向)
說明:orgin默認值爲heigh(向下增大),還有一個相反的取值low
0 1 2
-------
0 |1 2 3
1 |4 5 6
2 |7 8 9
import matplotlib.pyplot as mp import numpy as np n = 1000 # 生成1000*1000的交叉點 x, y = np.meshgrid(np.linspace(-3,3,n),np.linspace(-3,3,n)) # exp表示e的多少次方 z = (1-x/2 + x**5 + y**3)*np.exp(-x**2-y**2) y1 = (1-x/n) * np.random.uniform(0.5,1.0,n) y2 = (1-x/n) * np.random.uniform(0.5,1.0,n) mp.figure('Hot',facecolor='lightgray') mp.title('Hot',fontsize=20) mp.xlabel('x',fontsize=14) mp.ylabel('y',fontsize=14) mp.xticks(np.linspace(1,1000,7),np.linspace(-3,3,7).astype(int)) mp.yticks(np.linspace(1,1000,7),np.linspace(-3,3,7).astype(int)) mp.tick_params(labelsize=10)#刻度線的字體 mp.grid(linestyle=':') # 只在y軸上畫刻度 # 熱成像圖 mp.imshow(z,cmap='jet',origin='low')
# 設置圖例 mp.colorbar().set_label('z',fontsize=14) mp.show()
1九、餅圖
pie(值,間隙,標籤,顏色,格式,shadow=陰影,startangle=起始角度)
說明:畫圖的順序是逆時針,startangle的值默認是0單位是度不是弧度
import matplotlib.pyplot as mp values = [26,17,21,29,11] spaces = [0.05,0.01,0.01,0.01,0.01]#間隙佔半徑的百分比 labels = ['Python','Javascrip','C++','C','PHP'] colors = ['dodgerblue','orangered','limegreen','violet','gold'] mp.figure('Pie',facecolor='lightgray') mp.title('Pie',fontsize=20) mp.pie(values,spaces,labels,colors,'%d%%',shadow=True,startangle=90) mp.axis('equal')#等軸 mp.show()
20、座標線
grid(which='major/minor',axis='x/y/both',
linewidth=線寬,linestyle=線型,color=)
import matplotlib.pyplot as mp mp.figure('Grid',facecolor='lightgray') mp.title('Grid',fontsize=20) mp.xlabel('x',fontsize=14) mp.ylabel('y',fontsize=14) mp.xlim(0,10) mp.ylim(0,10) ax = mp.gca() ax.xaxis.set_major_locator(mp.MultipleLocator(1.0)) ax.xaxis.set_minor_locator(mp.MultipleLocator(0.1)) ax.yaxis.set_major_locator(mp.MultipleLocator(1.0)) ax.yaxis.set_minor_locator(mp.MultipleLocator(0.1)) mp.tick_params(labelsize=10)#刻度線的字體 #which默認值是major,axis默認值是both mp.grid(which='major',axis='x',linewidth=0.75,linestyle='-',color='lightgray') mp.grid(which='minor',axis='y',linewidth=0.25,linestyle='-',color='red') mp.show()
2一、極座標(ρ,θ)
mp.gca(projection='polar')
plot/scatter(θ,ρ,...)
import matplotlib.pyplot as mp import numpy as np t = np.linspace(0,2*np.pi,1000) r_spiral = 0.8*t r_rose = 5* np.sin(6*t) mp.figure('Polar',facecolor='lightgray') # 建立極座標系 mp.gca(projection='polar') mp.title('Polar',fontsize=20) mp.xlabel(r'$\theta$',fontsize=14) mp.ylabel(r'$\rho$',fontsize=14) mp.tick_params(labelsize=10)#刻度線的字體 mp.grid(linestyle=':') mp.plot(t,r_spiral, c='dodgerblue',label=r'$\rho=0.8\theta$') mp.plot(t,r_rose, c='orangered',label=r'$\rho=5sin(6\theta)$') mp.legend(loc='upper left') mp.show()
2二、三維曲面
from mpl_toolkits.mplot3d import axes3d
ax = gca(projection='3d')
ax.set_xlabel(...)
ax.set_ylabel(...)
ax.set_zlabel(...)
ax.plot_surface(水平座標,垂直座標,直立座標,
rstride=行距,cstride=列距,cmap=顏色映射),填充
ax.plot_wireframe(水平座標,垂直座標,直立座標,
rstride=行距,cstride=列距,linewidth=線寬,color=顏色) ,輪廓
說明:rstride、cstride的值越小圖像越潤滑
import matplotlib.pyplot as mp import numpy as np from mpl_toolkits.mplot3d import axes3d n = 1000 # 生成1000*1000的交叉點 x, y = np.meshgrid(np.linspace(-3,3,n),np.linspace(-3,3,n)) # exp表示e的多少次方 z = (1 - x/2 + x**5 + y**3)*np.exp(-x**2-y**2) mp.figure('3D Surface') mp.title('3D Surface',fontsize=20) ax = mp.gca(projection='3d') ax.set_xlabel('x',fontsize=14) ax.set_ylabel('y',fontsize=14) ax.set_zlabel('z',fontsize=14) mp.tick_params(labelsize=10)#刻度線的字體 ax.plot_surface(x,y,z,rstride=10,cstride=10,cmap='jet',linewidth=0) # 輪廓圖 mp.figure('3D Wireframe') mp.title('3D Wireframe',fontsize=20) ax = mp.gca(projection='3d') ax.set_xlabel('x',fontsize=14) ax.set_ylabel('y',fontsize=14) ax.set_zlabel('z',fontsize=14) mp.tick_params(labelsize=10)#刻度線的字體 ax.plot_wireframe(x,y,z,rstride=20,cstride=20,linewidth=0.5,color='orangered') mp.show()
填充圖:
輪廓圖:
2三、半對數座標(y軸取對數,x軸是正常座標)
semilogy(參數同plot)
import matplotlib.pyplot as mp import numpy as np y = np.array([1,10,100,1000,100,10,1]) mp.figure('Normal & Log',facecolor='lightgray') # 正常座標 mp.subplot(211) mp.title('Nomal',fontsize=20) mp.ylabel('y',fontsize=14) ax =mp.gca() ax.xaxis.set_major_locator(mp.MultipleLocator(1)) ax.xaxis.set_minor_locator(mp.MultipleLocator(0.1)) ax.yaxis.set_major_locator(mp.MultipleLocator(250)) ax.yaxis.set_minor_locator(mp.MultipleLocator(50)) mp.tick_params(labelsize=10) ax.grid(which='major',axis='both',linewidth=0.75,linestyle='-',color='lightgray') ax.grid(which='minor',axis='both',linewidth=0.2,linestyle='-',color='lightgray') mp.plot(y,'o-',c='dodgerblue',label='plot') mp.ylim(min(y)-10,max(y)*1.1) mp.legend() # 半對數座標 mp.subplot(212) mp.title('Log',fontsize=20) mp.ylabel('y',fontsize=14) ax =mp.gca() ax.xaxis.set_major_locator(mp.MultipleLocator(1)) ax.xaxis.set_minor_locator(mp.MultipleLocator(0.1)) ax.yaxis.set_major_locator(mp.MultipleLocator(250)) ax.yaxis.set_minor_locator(mp.MultipleLocator(50)) mp.tick_params(labelsize=10) ax.grid(which='major',axis='both',linewidth=0.75,linestyle='-',color='lightgray') ax.grid(which='minor',axis='both',linewidth=0.2,linestyle='-',color='lightgray') mp.semilogy(y,'o-',c='dodgerblue',label='semilogy') mp.legend() mp.ylim(min(y)-50,max(y)*1.1) mp.tight_layout() mp.show()
2四、簡單動畫
import matplotlib.animations as ma
ma.FuncAnimation(圖像對象,更新函數,生成器函數,interval=間隔毫秒)
說明:每間隔interval毫秒,調用一次更新函數
不帶生成器函數
import numpy as np import matplotlib.pyplot as mp import matplotlib.animation as ma n_bubbles = 100 """ 使用numpy生成一個覺得數組而且每一個元素含有四個字段: position size growth color x y r g b alpha float float float float float float float float """ bubbles = np.zeros(n_bubbles,dtype=[ ('position',float,2),('size',float),('growth',float),('color',float,4)]) bubbles['position'] = np.random.uniform(0,1,(n_bubbles,2)) bubbles['size'] = np.random.uniform(50,750,n_bubbles) bubbles['growth'] = np.random.uniform(30,150,n_bubbles) bubbles['color'] = np.random.uniform(0,1,(n_bubbles,4)) mp.figure('Bubbles',facecolor='lightgray') mp.title('Bubbles',fontsize=20) mp.xticks(()) mp.yticks(()) sc = mp.scatter(bubbles['position'][:,0],bubbles['position'][:,1],s=bubbles['size'],c=bubbles['color']) def update(number): bubbles['size'] += bubbles['growth'] burst = number % n_bubbles bubbles['position'][burst] = np.random.uniform(0,1,2)#從新賦值一個位置 bubbles['size'][burst] = 0 bubbles['growth'][burst]=np.random.uniform(30,150) bubbles['color'][burst] = np.random.uniform(0,1,4) sc.set_offsets(bubbles['position']) sc.set_facecolors(bubbles['color']) # 將改變後的大小告訴sc sc.set_sizes(bubbles['size']) # 若是將這個對象賦值給一個變量則程序執行結束就結束,不會持續調用update anim = ma.FuncAnimation(mp.gcf(),update,interval=10) mp.show()
動態圖
帶生成器函數
import numpy as np import matplotlib.pyplot as mp import matplotlib.animation as ma mp.figure('Signal',facecolor='lightgray') mp.title('Signal',fontsize=20) mp.xlabel('Time',fontsize=14) mp.ylabel('Signal',fontsize=14) mp.xticks(()) ax = mp.gca() ax.set_ylim(-3,3) ax.set_xlim(0,10) mp.tick_params(labelsize=10) mp.grid(linestyle=':') # 獲取曲線的對象,此時是空的,pl是:Line2D(_line0) # mp.plot([],[],c='orangered')獲得的是一個對象集合[<matplotlib.lines.Line2D object at 0x7f458345df28>] pl = mp.plot([],[],c='orangered')[0] # 將咱們的緩衝區域pl的緩衝區關聯起來 pl.set_data([],[]) # data 是生成器生成的數據 def update(data): t, v = data x,y = pl.get_data() x.append(t) y.append(v) # 動態改變座標 x_min, x_max = ax.get_xlim() if t >= x_max: ax.set_xlim(t-(x_max-x_min),t) ax.figure.canvas.draw() pl.set_data(x,y) def generator(): t =0 while True: # v 能夠是從外界採集到的數據 v = np.sin(2*np.pi *t)*np.exp(np.sin(0.2*np.pi*t)) yield t,v t += 0.05 # 若是將這個對象賦值給一個變量則程序執行結束就結束,不會持續調用update anim = ma.FuncAnimation(mp.gcf(),update,generator,interval=5) mp.show()
動態圖
10、numpy的通用函數
一、讀文件
loadtxt(文件名,delimiter=分隔符,usecols=選擇列,unpack=True/False,
dtype=目標類型,converters={列號:轉換器})->二維數組/一維數組的元組
說明:unpack=True時獲得的是二維數組,False獲得的是一維數組的元組
import datetime as dt import numpy as np import matplotlib.pyplot as mp import matplotlib.dates as md # 用於畫圖時的日期 # unicode有幾種編碼:包括UCS-4,UTF-8等,默認讀進來的是UTF-8,UTF-8的一個字符是1至4個字節 # USC-4每一個字符是固定的4字節,符合數組同質性的要求 def dmy2ymd(dmy): """日月年轉換爲年月日""" # 文件讀出來的日期是utf-8格式的字符串(每一個字符所佔的字節數在1-4之間),字節序列,非字符序列 #將字節序列轉換爲字符序列 dmy = str(dmy,encoding='utf-8') # 將字符串解析爲日期格式strptime中的p就是parse的縮寫 date = dt.datetime.strptime(dmy,'%d-%m-%Y').date() # 將日期轉換爲日期字符串 # ymd = date.strftime('%Y-%m-%d') return date # M8[D]表示8字節的日期類型 dates, opening_prices, heighest_prices, lowset_prices, closing_prices \ = np.loadtxt('data/aapl.csv',delimiter=',',usecols=(1,3,4,5,6),unpack=True, dtype=np.dtype('M8[D], f8, f8, f8, f8'),converters={1:dmy2ymd}) mp.figure('Candlestick', facecolor='lightgray') mp.title('Candlestick', fontsize=20) mp.xlabel('Date', fontsize=14) mp.ylabel('Price', fontsize=14) ax = mp.gca() # 主刻度以星期一爲標誌 ax.xaxis.set_major_locator(md.WeekdayLocator(byweekday=md.MO)) # 次刻度以天爲單位,沒有參數表示一天一個點, ax.xaxis.set_minor_locator(md.DayLocator()) # 格式化 31 Jan 2011 ax.xaxis.set_major_formatter(md.DateFormatter('%d %b %Y')) # 設置標籤大小 mp.tick_params(labelsize=10) # 設置網格線 mp.grid(linestyle=':') # 將numpy的日期類型轉換爲matiplotlib能夠識別的格式 dates = dates.astype(md.datetime.datetime) # 獲得一個布爾類型的數組 # 陽線,獲得一個bool數組:收盤價大於開盤價爲Ture,不然爲False,掩碼 rise = closing_prices - opening_prices >= 0.01 # 陰線,獲得一個bool數組:收盤價小於開盤價爲Ture,不然爲False fall = opening_prices - closing_prices >= 0.01 # 設置顏色的rgb fc = np.zeros(dates.size, dtype='3f4') ec = np.zeros(dates.size, dtype='3f4') fc[rise],fc[fall] = (1,1,1),(0,0.5,0) # (1,1,1)表示白色 (0,0.5,0)表示綠色 ec[rise],ec[fall] = (1,0,0),(0,0.5,0) # (1,0,0)表示紅色 # 畫影線 mp.bar(dates,heighest_prices-lowset_prices,0,lowset_prices,color=fc,edgecolor=ec) #lowset_prices 表示底的位置 # 畫實體 mp.bar(dates,closing_prices-opening_prices,0.8,opening_prices,color=fc,edgecolor=ec)# 表示底的位置 # 下標數據自適應 mp.gcf().autofmt_xdate() mp.show()
二、算數平均值
樣本:S= [s1,s2,s3,...sn]
算數平均數:m = (s1+s2+s3+...+sn)/n
numpy.mean(S)-> m
s1 = s + d1, s表示實際值(真值),d1表示偏差,s1表示實際值
s2 = s + d2
.......
sn = s + dn
m= s + (d1+d2...+dn)/n ,當n趨於無限大,(d1+d2...+dn)/n趨於0
算數平均值就是對真值的無偏估計
import numpy as np closing_prices = np.loadtxt('data/aapl.csv',delimiter=',',usecols=(6),unpack=True) mean = 0 for closing_price in closing_prices: mean += closing_price mean /= closing_prices.size print(mean) # 使用Numpy求平均數 mean = np.mean(closing_prices) print(mean)
三、加權平均值
樣本:S= [s1,s2,s3,...sn]
權重:W=[w1,w2,w3,.....,wn]
加權平均值:a = (s1w1+s2w2+s3w3+...+snwn)/(w1+w2+w3+...+wn)
numpy.average(S,weights=W) ->a
算數平均值就是各個樣本權重相等時的加權平局值
成交量加權平均價格
import numpy as np closing_prices, volumes = np.loadtxt('data/aapl.csv',delimiter=',',usecols=(6,7),unpack=True) vwap = 0 for closing_price,volume in zip(closing_prices,volumes): vwap += closing_price*volume vwap /= volumes.sum() print(vwap) # 使用Numpy求平均數 vwap = np.average(closing_prices,weights=volumes) print(vwap)
時間加權平均價格(時間越近的影響較大)
import datetime as dt import numpy as np def dmy2days(dmy): dmy = str(dmy,encoding='utf-8') date = dt.datetime.strptime(dmy,'%d-%m-%Y').date() days = (date - dt.date.min).days return days days,closing_prices = np.loadtxt('data/aapl.csv',delimiter=',',usecols=(1,6),unpack=True, converters={1: dmy2days}) twap = 0 for day,closing_price in zip(days,closing_prices): twap += closing_price*day twap /= days.sum() print(twap) # 使用Numpy求平均數 twap = np.average(closing_prices,weights=days) print(twap)
四、最大值和最小值
max/min:在一個數組中尋找最大值/最小值
argmax/argmin:在一個數組中尋找最大值/最小值的下標,將多維的轉換爲一維求下標
maximum/minimum:在兩個數組的對應位置元素中尋找最大值/最小值
ptp:一個數組中尋找最大值和最小值之差
import numpy as np # 產生位於[10,100)區間的隨機整數 a = np.random.randint(10,100,9).reshape(3,3) print(a) b, c = np.max(a),np.min(a) print(b,c) d ,e = np.argmax(a),np.argmin(a) print(d,e) """ 輸出結果 [[44 32 78] [28 72 75] [74 14 81]] 81 14 8 7 """ f = np.random.randint(10,100,9).reshape(3,3) g, h = np.maximum(a,f),np.minimum(a,f) print('a:',a) print('f:',f) print("g:",g) print('h:',h) """ a: [[67 22 73] [87 93 94] [82 74 16]] f: [[67 33 50] [56 65 24] [52 21 54]] g: [[67 33 73] [87 93 94] [82 74 54]] h: [[67 22 50] [56 65 24] [52 21 16]] """
import numpy as np heightest_prices, lowest_prices = np.loadtxt('data/aapl.csv',delimiter=',',usecols=(4,5),unpack=True) # 常規方法實現 max_heightest_price, min_lowest_price = heightest_prices[0],lowest_prices[0] for heightest_price, lowest_price in zip(heightest_prices,lowest_prices): if max_heightest_price < heightest_price: max_heightest_price = heightest_price if min_lowest_price > lowest_price: min_lowest_price = lowest_prices print(max_heightest_price-min_lowest_price) # 使用Numpy方法實現 max_heightest_price, min_lowest_price = np.max(heightest_prices),np.min(lowest_prices) print(max_heightest_price-min_lowest_price)
import numpy as np heightest_prices, lowest_prices = np.loadtxt('data/aapl.csv',delimiter=',',usecols=(4,5),unpack=True) # 常規方法實現 max_heightest_price,min_heightest_price ,max_lowest_price,min_lowest_price \ = heightest_prices[0],heightest_prices[0],lowest_prices[0],lowest_prices[0] for heightest_price, lowest_price in zip(heightest_prices,lowest_prices): if max_heightest_price < heightest_price: max_heightest_price = heightest_price if min_heightest_price > heightest_price: min_heightest_price = heightest_price if max_lowest_price < lowest_price: max_lowest_price = lowest_price if min_lowest_price > lowest_price: min_lowest_price = lowest_price print("最大值幅度:",max_heightest_price - min_heightest_price) print("最小值幅度",max_lowest_price - min_lowest_price) # 使用Numpy方法實現 print("最大值幅度:",np.ptp(heightest_prices)) print("最小值幅度",np.ptp(lowest_prices))
五、中位數
將多個樣本按照大小順序排列,居於中間位置的元素即爲中位數
13 22 27 31 43 :27是中位數
13 22 27 31 43 51 :(27+31)/2 是中位數
L:序列長度
M: (A[(L-1)/2]+A[L/2])/2
驗證:
L=5
M=(A[4/2]+A[5/2])/2 =(A[2]+A[2])/2=27,下標下取整
L=6
M=(A[5/2]+A[6/2])/2 =(A[2]+A[3])/3=(27+31)/2
numpy.median(A)->M
import numpy as np closing_prices = np.loadtxt('data/aapl.csv',delimiter=',',usecols=(6),unpack=True) sorted_closing_prices = np.sort(closing_prices) #closing_prices.sort()#修改了原來的值,等價於closing_prices = np.sort(closing_prices) l = closing_prices.size median = (sorted_closing_prices[int((l-1)/2)]+sorted_closing_prices[int(l/2)])/2 print(median) median = np.median(closing_prices) print(median)
六、標準差
樣本:S=[s1,s2,...sn]
均值:m=(s1+s2+..+sn)/n
離差:D=[s1-m,s2-m,...,sn-m]
離差方:Q=[(s1-m)^2,(s2-m)^2,...,(sn-m)^2]
方差:v = (q1+q2+...+qn)/n ,q1表示(s1-m)^2
標準差:std = sqrt(v),方均根偏差
numpy.std(S,ddof=非自由因子) -> std
說明:
整體標準差:根號內除以n,ddof=0
樣本標準差:根號內除以(n-1),doof=1
ddof默認爲0,獲得的結果是整體標準差
import numpy as np closing_prices = np.loadtxt('data/aapl.csv',delimiter=',',usecols=(6),unpack=True) mean = closing_prices.mean()# 算數平均值 devs = closing_prices - mean# 離差 dev2 = devs ** 2 # 離差方 pvar = dev2 .mean() # 整體方差 svar = dev2.sum()/(dev2.size-1) # 樣本方差 pstd = np.sqrt(pvar) # 整體標準差 svtd = np.sqrt(svar) # 樣本標準差 print("整體標準差:",pstd) print("樣本標準差",svtd) # 使用Numpy方法 pstd = np.std(closing_prices) vstd = np.std(closing_prices,ddof=1) print("整體標準差:",pstd) print("樣本標準差",svtd)
七、時間數據
一、經過布爾型掩碼數組過濾數組中知足特定條件的元素:
數組[掩碼數組],只有與掩碼數組中值爲True的元素相對應的元素可被訪問
二、numpy.where(關係/邏輯表達)->知足關係表達/邏輯表達式的元素的下標數組
三、numpy.take(數組,下標數組)->提取數組中由下標數組所指示的元素
import numpy as np import datetime as dt def dmy2wday(dmy): """將日期轉換爲星期幾的格式""" dmy = str(dmy,encoding='utf-8') date = dt.datetime.strptime(dmy,'%d-%m-%Y').date() wday = date.weekday()# 0表示星期1 return wday wdays, closing_prices = np.loadtxt('data/aapl.csv',delimiter=',',usecols=(1,6),unpack=True, converters={1:dmy2wday}) # 0 1 2 3 4 5 6 #MON TUE WED THU FRI SAT SUN ave_closing_price = np.zeros(5) for wday in range(ave_closing_price.size): # wdays== wday 會獲得一個掩碼(bool類型的數組) # ave_closing_price[wday] = closing_prices[wdays== wday].mean() # np.where(wdays== wday)獲得的是掩碼數組爲真的下標數組 #ave_closing_price[wday] = closing_prices[np.where(wdays== wday)].mean() ave_closing_price[wday] = np.take(closing_prices, np.where(wdays == wday)).mean() for wday, ave_closing_price in zip(['MON','TUE','WED', 'THU', 'FRI', 'SAT', 'SUN'],ave_closing_price): # np.round(ave_closing_price,2)保留兩位小數 print(wday,np.round(ave_closing_price,2))
numpy.apply_along_axis(N-1維函數,軸向,N維數組):
將N維數組按照軸向拆分紅若干N-1維數組,做爲參數傳遞給N-1維函數,並將該函數的返回值按照相同的軸向從新組合成數組返回給條用着
import numpy as np def foo(x): print("foo:",x) return x.sum(),x.mean(),x.std() a =np.array([1,2,3,4,5]) print(foo(a)) b = np.array([[1,2,3], [4,5,6], [7,8,9]]) print("-"*40) print(np.apply_along_axis(foo,0,b)) print("-"*40) print(np.apply_along_axis(foo,1,b))
import numpy as np import datetime as dt def dmy2wday(dmy): """將日期轉換爲星期幾的格式""" dmy = str(dmy,encoding='utf-8') date = dt.datetime.strptime(dmy,'%d-%m-%Y').date() wday = date.weekday()# 0表示星期1 return wday wdays, opening_prices,highest_prices,lowest_prices,closing_prices\ = np.loadtxt('data/aapl.csv',delimiter=',',usecols=(1,3,4,5,6),unpack=True, converters={1:dmy2wday}) # 0 1 2 3 4 5 6 #MON TUE WED THU FRI SAT SUN wdays = wdays[:16] opening_prices = opening_prices[:16] highest_prices = highest_prices[:16] lowest_prices = lowest_prices[:16] closing_prices = closing_prices[:16] first_monday = np.where(wdays ==0)[0][0] last_friday = np.where((wdays == 4))[0][-1] # 獲取完成星期的的下標 indices = np.arange(first_monday,last_friday+1) indices = np.split(indices,3) # [array([1, 2, 3, 4, 5], dtype=int64), array([ 6, 7, 8, 9, 10], dtype=int64), array([11, 12, 13, 14, 15], dtype=int64)] def week_summary(indices): opening_price = opening_prices[indices[0]] highest_price = np.max(np.take(highest_prices,indices)) lowest_price = np.min(np.take(lowest_prices,indices)) closing_price = closing_prices[indices[-1]] return opening_price, highest_price,lowest_price ,closing_price summaries = np.apply_along_axis(week_summary,1,indices) print(summaries) # %g 緊湊浮點格式,360. 轉換爲360 np.savetxt('data/summary.csv',summaries,delimiter=',',fmt='%g')
八、卷積
激勵函數:f(t)
單位響應函數:g(t)
則該激勵函數做用下的響應爲上兩者的卷積:
/
|f(t)g(t)dt
/
在已知瞬間激勵下的響應的條件下,求持續激勵下的響應:
a = [1,2,3,4,5]
b = [6,7,8]
numpy.convolve(a,b,'valid/same/full'); a:表示被卷積數組,激勵強度。b:表示核數組,單位激勵的響應因數,第三個參數默認爲full
0 0 1 2 3 4 5 0 0
8 7 6(結果:0*8+0*7+1*6=6)
8 7 6(結果:19)
8 7 6(結果:40)
8 7 6(結果:61)
8 7 6(結果:82)
8 7 6(結果:67)
8 7 6(結果:40)
結果集合6, 19,40,61,82,67,40就是徹底卷積(full)
19,40,61,82,67是同維卷積(same)(維度與a相同)
40,61,82是有效卷積(valid)(計算時不須要補0)
import numpy as np a = np.arange(1,6) print(a)# [1 2 3 4 5] b = np.arange(6,9) print(b) # [6 7 8] c = np.convolve(a,b,'full') # 第三個參數默認值爲full print(c) # [ 6 19 40 61 82 67 40] d= np.convolve(a, b, 'same') print(d) # [19 40 61 82 67] e= np.convolve(a, b, 'valid') print(e) # [40 61 82]
簡單移動平均線和指數移動平均線
import numpy as np import datetime as dt import matplotlib.pyplot as mp import matplotlib.dates as md def dmy2ymd(dmy): dmy = str(dmy, encoding='utf-8') date = dt.datetime.strptime(dmy, '%d-%m-%Y').date() ymd = date.strftime("%Y-%m-%d") return ymd dates, closing_prices = np.loadtxt("../data/aapl.csv", delimiter=',',usecols=(1,6), unpack=True, dtype=np.dtype('M8[D], f8'), converters= {1:dmy2ymd} ) # 簡單移動平均線,取5天的平均值 sma51 = np.zeros(closing_prices.size-4) for i in range(sma51.size): sma51[i] = closing_prices[i:i+5].mean() # sma51 至關於sma52 sma52 = np.convolve(closing_prices, np.ones(5)/5, 'valid') # 簡單移動平均線,取10天的平均值 sma10 = np.convolve(closing_prices, np.ones(10)/10, 'valid') # 權重數組 weights = np.exp(np.linspace(-1,0,5)) weights /= weights.sum() ema5 = np.convolve(closing_prices, weights[::-1], 'valid') mp.figure('Moving Average', facecolor='lightgray') mp.title('Moving Average', fontsize=20) mp.xlabel('Date', fontsize=14) mp.ylabel('Price', fontsize=14) ax = mp.gca() ax.xaxis.set_major_locator(md.WeekdayLocator(byweekday=md.MO)) ax.xaxis.set_minor_locator(md.DayLocator()) ax.xaxis.set_major_formatter(md.DateFormatter('%d %b %Y')) mp.tick_params(labelsize=10) mp.grid(linestyle=":") dates = dates.astype(md.datetime.datetime) mp.plot(dates,closing_prices, c='lightgray', label='Closing Price') mp.plot(dates[4:],sma51, c='orangered', label='SMA-51') mp.plot(dates[4:],sma52, c='orangered', label='SMA-52', alpha=0.25, linewidth=6) mp.plot(dates[9:],sma10, c='dodgerblue', label='SMA-10') mp.plot(dates[4:],ema5, c='limegreen', label='EMA-5') mp.legend() mp.gcf().autofmt_xdate() mp.show()
布林帶
中軌:移動平均線,反應趨勢 - 策略
上軌:中軌+標準差*2,反應壓力 - 買點
下軌:中軌-標準差*2,反應支撐 - 賣點
import numpy as np import datetime as dt import matplotlib.pyplot as mp import matplotlib.dates as md def dmy2ymd(dmy): dmy = str(dmy, encoding='utf-8') date = dt.datetime.strptime(dmy, '%d-%m-%Y').date() ymd = date.strftime("%Y-%m-%d") return ymd dates, closing_prices = np.loadtxt("../data/aapl.csv", delimiter=',',usecols=(1,6), unpack=True, dtype=np.dtype('M8[D], f8'), converters= {1:dmy2ymd} ) N = 5 # 中軌線 medios = np.convolve(closing_prices, np.ones(N)/N, 'valid') stds = np.zeros(medios.size) for i in range(medios.size): stds[i] = closing_prices[i:i+N].std() stds *= 2 # 下軌 lowers = medios -stds # 上軌 uppers = medios + stds mp.figure('Bollinger Bands', facecolor='lightgray') mp.title('Bollinger Bands', fontsize=20) mp.xlabel('Date', fontsize=14) mp.ylabel('Price', fontsize=14) ax = mp.gca() ax.xaxis.set_major_locator(md.WeekdayLocator(byweekday=md.MO)) ax.xaxis.set_minor_locator(md.DayLocator()) ax.xaxis.set_major_formatter(md.DateFormatter('%d %b %Y')) mp.tick_params(labelsize=10) mp.grid(linestyle=":") dates = dates.astype(md.datetime.datetime) mp.plot(dates,closing_prices, c='lightgray', label='Closing Price') mp.plot(dates[N-1:],medios, c='dodgerblue', label='Medio') mp.plot(dates[N-1:],lowers, c='limegreen', label='Lower') mp.plot(dates[N-1:],uppers, c='orangered', label='Upper') mp.legend() mp.gcf().autofmt_xdate() mp.show()
九、線性模型
一、線性預測
a b c d e f ?
N=3
A B C
aA+bB+cC = d
bA+cB+dC = e
cA+dB+eC = f
經過上面三個三元一次方程求出,A B C的值,從而求出g=dA+eB+fC
/a b c\ /A\ /d\
|b c d| * |B| = |e|
\c d e/ \C/ \f/
------- ---- ----
a x b
x = numpy.linalg.lstsq(a,b)
g = bx
import numpy as np import datetime as dt import matplotlib.pyplot as mp import matplotlib.dates as md import pandas as pd def dmy2ymd(dmy): dmy = str(dmy, encoding='utf-8') date = dt.datetime.strptime(dmy, '%d-%m-%Y').date() ymd = date.strftime("%Y-%m-%d") return ymd dates, closing_prices = np.loadtxt("../data/aapl.csv", delimiter=',',usecols=(1,6), unpack=True, dtype=np.dtype('M8[D], f8'), converters= {1:dmy2ymd}) # 用5天預測第六天 N = 5 pred_prices = np.zeros(closing_prices.size - N*2 + 1) for i in range(pred_prices.size): a = np.zeros((N, N)) # a[0,] = closing_prices[0:N] # a[1,] = closing_prices[1:N+1] # a[2,] = closing_prices[2:N+2] for j in range(N): # a[j,] = closing_prices[j: N + j] a[j, ] = closing_prices[i+j: i+N+j] b = closing_prices[i+N: i+ N*2] x = np.linalg.lstsq(a, b)[0] pred_prices[i] = b.dot(x) # 兩個數組相乘並將獲得的數組中的元素相加 mp.figure('Linear Prediction', facecolor='lightgray') mp.title('Linear Prediction', fontsize=20) mp.xlabel('Date', fontsize=14) mp.ylabel('Price', fontsize=14) ax = mp.gca() ax.xaxis.set_major_locator(md.WeekdayLocator(byweekday=md.MO)) ax.xaxis.set_minor_locator(md.DayLocator()) ax.xaxis.set_major_formatter(md.DateFormatter('%d %b %Y')) mp.tick_params(labelsize=10) mp.grid(linestyle=":") dates = dates.astype(md.datetime.datetime) mp.plot(dates,closing_prices, 'o-', c='lightgray', label='Closing Price') last_next_day = dates[-1] + pd.tseries.offsets.BDay() # 最後一個交易日的下一個交易日 dates = np.append(dates, last_next_day) mp.plot(dates[N*2:],pred_prices,'o-', c='orangered',label='Predicted Price' ) for date, price in zip(dates[N*2:], pred_prices): print(date, "->", price) mp.legend() mp.gcf().autofmt_xdate() mp.show()
二、線性擬合
x1 y1
x2 y2
...
xn yn
y = kx+b
y1 = kx1 +b
y2 = kx2 +b
....
yn = kxn +b
/x1 1\ /k\ /y1\
|x2 1| * |b| = |y2|
|....| \ / |..|
\xn b/ \yn/, 1表示b前面的係數
------ --- ----
a x b
x = numpy.linalg.lstsq(a,b)
import datetime as dt import numpy as np import matplotlib.pyplot as mp import matplotlib.dates as md # 用於畫圖時的日期 # unicode有幾種編碼:包括UCS-4,UTF-8等,默認讀進來的是UTF-8,UTF-8的一個字符是1至4個字節 # USC-4每一個字符是固定的4字節,符合數組同質性的要求 def dmy2ymd(dmy): """日月年轉換爲年月日""" # 文件讀出來的日期是utf-8格式的字符串(每一個字符所佔的字節數在1-4之間),字節序列,非字符序列 #將字節序列轉換爲字符序列 dmy = str(dmy,encoding='utf-8') # 將字符串解析爲日期格式strptime中的p就是parse的縮寫 date = dt.datetime.strptime(dmy,'%d-%m-%Y').date() # 將日期轉換爲日期字符串 # ymd = date.strftime('%Y-%m-%d') return date # M8[D]表示8字節的日期類型 dates, opening_prices, heighest_prices, lowset_prices, closing_prices \ = np.loadtxt('../data/aapl.csv',delimiter=',',usecols=(1,3,4,5,6),unpack=True, dtype=np.dtype('M8[D], f8, f8, f8, f8'),converters={1:dmy2ymd}) # 趨勢點 trend_points = (heighest_prices+lowset_prices+closing_prices)/3 spreads = heighest_prices - lowset_prices # 壓力點 resistance_points = trend_points + spreads # 支撐點 support_points = trend_points - spreads # 將日期類型的數據轉換爲整數 days = dates.astype(int) # 合併矩陣ones_like(days)生成一個與days維度相同的全1的數組 a = np.column_stack((days,np.ones_like(days))) x = np.linalg.lstsq(a, trend_points)[0] trend_line = days * x[0]+ x[1] # 壓力線 x = np.linalg.lstsq(a, resistance_points)[0] resistance_line = days * x[0]+ x[1] # 支撐線 x = np.linalg.lstsq(a, support_points)[0] support_line = days * x[0]+ x[1] mp.figure('Trend', facecolor='lightgray') mp.title('Trend', fontsize=20) mp.xlabel('Date', fontsize=14) mp.ylabel('Price', fontsize=14) ax = mp.gca() # 主刻度以星期一爲標誌 ax.xaxis.set_major_locator(md.WeekdayLocator(byweekday=md.MO)) # 次刻度以天爲單位,沒有參數表示一天一個點, ax.xaxis.set_minor_locator(md.DayLocator()) # 格式化 31 Jan 2011 ax.xaxis.set_major_formatter(md.DateFormatter('%d %b %Y')) # 設置標籤大小 mp.tick_params(labelsize=10) # 設置網格線 mp.grid(linestyle=':') # 將numpy的日期類型轉換爲matiplotlib能夠識別的格式 dates = dates.astype(md.datetime.datetime) # 獲得一個布爾類型的數組 # 陽線,獲得一個bool數組:收盤價大於開盤價爲Ture,不然爲False,掩碼 rise = closing_prices - opening_prices >= 0.01 # 陰線,獲得一個bool數組:收盤價小於開盤價爲Ture,不然爲False fall = opening_prices - closing_prices >= 0.01 # 設置顏色的rgb fc = np.zeros(dates.size, dtype='3f4') ec = np.zeros(dates.size, dtype='3f4') fc[rise],fc[fall] = (1,1,1),(0.85,0.85,0.85) ec[rise],ec[fall] = (0.85,0.85,0.85),(0.85,0.85,0.85) # 畫影線 mp.bar(dates,heighest_prices-lowset_prices,0,lowset_prices,color=fc,edgecolor=ec) #lowset_prices 表示底的位置 # 畫實體 mp.bar(dates,closing_prices-opening_prices,0.8,opening_prices,color=fc,edgecolor=ec)# 表示底的位置 mp.scatter(dates, trend_points, c='dodgerblue',alpha=0.5, s=60, zorder=2) mp.scatter(dates, resistance_points, c='orangered',alpha=0.5, s=60, zorder=2) mp.scatter(dates, support_points, c='limegreen',alpha=0.5, s=60, zorder=2) mp.plot(dates, trend_line,c='dodgerblue', linewidth=3, label='Trend') mp.plot(dates, resistance_line,c='orangered', linewidth=3, label='Resistance') mp.plot(dates, support_line,c='limegreen', linewidth=3, label='Support') # 下標數據自適應 mp.gcf().autofmt_xdate() mp.show()
十、裁剪、壓縮和累乘
一、裁剪,clip
數組對象.clip(min=最小閾值,max=最大閾值),將數組中小於min時都設置爲min,大於max設置爲max,返回裁剪後的數組對象
二、壓縮,compress
數組對象.compress(條件),返回知足條件的元素所組成的數組
三、累乘,prod
數組對象.prod(),返回元素的累乘之積
數組對象.cumprod,返回元素累乘的過程
import numpy as mp a = mp.arange(1,6) print(a) # [1 2 3 4 5] b = a.clip(min=2, max=4) print(b) # [2 2 3 4 4] c = a.compress((a>=2) & (a<=4)) print(c) # [2 3 4] d = a.prod() print(d) # 120 e = a.cumprod() print(e) # [ 1 2 6 24 120]
十一、協方差、相關係數和相關矩陣
樣本:
a:[a1,a2,...,an]
b:[b1,b2,...,bn]
均值:
ave(a) = (a1+a2+...+an)/n
ave(b) = (b1+b2+...+bn)/n
離差:
dev(a) = [a1,a2,...,an] - ave(a)
dev(b) = [b1,b2,...,bn] - ave(b)
方差:
var(a) = ave(dev(a)*dev(a))
var(b) = ave(dev(b)*dev(b))
標準差:
std(a) = sqrt(var(a))
std(b) = sqrt(var(b))
--------------------------------
a和b的協方差:cov(a,b) = ave(dev(a)*dev(b))
相關係數:cov(a,b)/std(a)*std(b) ,取值在[-1,1]正負號表示相關性爲正相關或負相關,絕對值表示相關性的程度強弱
相關矩陣:
var(a) cov(a,b)
_____________ ______________
std(a)*std(a) std(a)std(b)
cov(b,a) var(b)
_____________ ______________
std(b)*std(a) std(b)std(b)
numpy.corrcoef(a,b) ==> / 1 相關係數\
\相關係數 1 /
import datetime as dt import numpy as np import matplotlib.pyplot as mp import matplotlib.dates as md # 用於畫圖時的日期 # unicode有幾種編碼:包括UCS-4,UTF-8等,默認讀進來的是UTF-8,UTF-8的一個字符是1至4個字節 # USC-4每一個字符是固定的4字節,符合數組同質性的要求 def dmy2ymd(dmy): """日月年轉換爲年月日""" # 文件讀出來的日期是utf-8格式的字符串(每一個字符所佔的字節數在1-4之間),字節序列,非字符序列 #將字節序列轉換爲字符序列 dmy = str(dmy,encoding='utf-8') # 將字符串解析爲日期格式strptime中的p就是parse的縮寫 date = dt.datetime.strptime(dmy,'%d-%m-%Y').date() # 將日期轉換爲日期字符串 # ymd = date.strftime('%Y-%m-%d') return date # M8[D]表示8字節的日期類型 dates, bhp_closing_prices \ = np.loadtxt('../data/bhp.csv',delimiter=',',usecols=(1,6),unpack=True, dtype=np.dtype('M8[D],f8'),converters={1:dmy2ymd}) vale_closing_price = np.loadtxt("../data/vale.csv",delimiter=',', usecols=(6,), unpack=True) # diff(),相鄰的元素求差 bhp_returns = np.diff(bhp_closing_prices)/bhp_closing_prices[:-1] vale_returns = np.diff(vale_closing_price)/vale_closing_price[:-1] ave_a = bhp_returns.mean() dev_a = bhp_returns - ave_a var_a = (dev_a*dev_a).mean() std_a = np.sqrt(var_a) ave_b = vale_returns.mean() dev_b = vale_returns - ave_b var_b = (dev_b*dev_b).mean() std_b = np.sqrt(var_b) cov_ab = (dev_a*dev_b).mean() cov_ba = (dev_b*dev_a).mean() corr = np.array([ [var_a/(std_a*std_a),cov_ab/(std_a*std_b)], [cov_ba/(std_b*std_a),var_b/(std_b*std_b)], ]) print(corr) """ [[ 1. 0.67841747] [ 0.67841747 1. ]] """ # 使用numpy corr = np.corrcoef(bhp_returns,vale_returns) print(corr) """ [[ 1. 0.67841747] [ 0.67841747 1. ]] """ mp.figure('Correlation', facecolor='lightgray') mp.title('Correlation', fontsize=20) mp.xlabel('Date', fontsize=14) mp.ylabel('Returns', fontsize=14) # 收益 ax = mp.gca() # 主刻度以星期一爲標誌 ax.xaxis.set_major_locator(md.WeekdayLocator(byweekday=md.MO)) # 次刻度以天爲單位,沒有參數表示一天一個點, ax.xaxis.set_minor_locator(md.DayLocator()) # 格式化 31 Jan 2011 ax.xaxis.set_major_formatter(md.DateFormatter('%d %b %Y')) # 設置標籤大小 mp.tick_params(labelsize=10) # 設置網格線 mp.grid(linestyle=':') # 將numpy的日期類型轉換爲matiplotlib能夠識別的格式 dates = dates.astype(md.datetime.datetime) # 獲得一個布爾類型的數組 # 陽線,獲得一個bool數組:收盤價大於開盤價爲Ture,不然爲False,掩碼 # 設置顏色的rgb fc = np.zeros(dates.size, dtype='3f4') ec = np.zeros(dates.size, dtype='3f4') mp.plot(dates[:-1],bhp_returns, c='orangered', label="BHP") mp.plot(dates[:-1],vale_returns, c='dodgerblue', label="VALE") # 下標數據自適應 mp.gcf().autofmt_xdate() mp.legend() mp.show()
十二、多項式擬合
y = p0x^n + p1x^n-1 + p2x^n-2+...+pn
numpy.polyfit(X,Y,n) -->[p0,p1,p2,....,pn]
numpy.polyval([p0,p1,p2,....,pn],X) -> Y
numpy.polyder([p0,p1,p2,....,pn]) - > [p0,p1,p2,....,pn-1],求導
numpy.root([p0,p1,p2,....,pn]) ->p0x^n + p1x^n-1 + p2x^n-2+...+pn=0方程的根
import datetime as dt import numpy as np import matplotlib.pyplot as mp import matplotlib.dates as md # 用於畫圖時的日期 # unicode有幾種編碼:包括UCS-4,UTF-8等,默認讀進來的是UTF-8,UTF-8的一個字符是1至4個字節 # USC-4每一個字符是固定的4字節,符合數組同質性的要求 def dmy2ymd(dmy): """日月年轉換爲年月日""" # 文件讀出來的日期是utf-8格式的字符串(每一個字符所佔的字節數在1-4之間),字節序列,非字符序列 # 將字節序列轉換爲字符序列 dmy = str(dmy, encoding='utf-8') # 將字符串解析爲日期格式strptime中的p就是parse的縮寫 date = dt.datetime.strptime(dmy, '%d-%m-%Y').date() # 將日期轉換爲日期字符串 # ymd = date.strftime('%Y-%m-%d') return date # M8[D]表示8字節的日期類型 dates, bhp_closing_prices \ = np.loadtxt('../data/bhp.csv', delimiter=',', usecols=(1, 6), unpack=True, dtype=np.dtype('M8[D],f8'), converters={1: dmy2ymd}) vale_closing_price = np.loadtxt("../data/vale.csv", delimiter=',', usecols=(6,), unpack=True) diff_closing_prices = bhp_closing_prices - vale_closing_price days = dates.astype(int) p = np.polyfit(days,diff_closing_prices,4) print(p) poly_closing_prices = np.polyval(p,days) q = np.polyder(p) roots = np.roots(q) # 取出實根 reals = roots[np.isreal(roots)].real peeks = [[days[0],np.polyval(p,days[0])]] for real in reals: if days[0] < real and real < days[-1]: peeks.append([real,np.polyval(p,real)]) peeks.append([days[-1],np.polyval(p,days[-1])]) peeks.sort() peeks = np.array(peeks) mp.figure('Polynomial Fitting', facecolor='lightgray') mp.title('Polynomial Fitting', fontsize=20) mp.xlabel('Date', fontsize=14) mp.ylabel('difference price', fontsize=14) # ax = mp.gca() # 主刻度以星期一爲標誌 ax.xaxis.set_major_locator(md.WeekdayLocator(byweekday=md.MO)) # 次刻度以天爲單位,沒有參數表示一天一個點, ax.xaxis.set_minor_locator(md.DayLocator()) # 格式化 31 Jan 2011 ax.xaxis.set_major_formatter(md.DateFormatter('%d %b %Y')) # 設置標籤大小 mp.tick_params(labelsize=10) # 設置網格線 mp.grid(linestyle=':') # 將numpy的日期類型轉換爲matiplotlib能夠識別的格式 dates = dates.astype(md.datetime.datetime) mp.plot(dates, poly_closing_prices, c='dodgerblue',linewidth=3,label='Polynomial Fitting') mp.scatter(dates,diff_closing_prices, c='limegreen', alpha=0.5, s=60, label='Difference Price') # 拆分 dates, prices = np.hsplit(peeks,2) dates = dates.astype(int).astype('M8[D]').astype(md.datetime.datetime) for i in range(1,dates.size): mp.annotate('', xytext=(dates[i-1],prices[i-1]),xy=(dates[i],prices[i]),size=40, arrowprops = dict(arrowstyle='fancy',color='orangered',alpha=0.25)) mp.scatter(dates, prices, marker='^',c='orangered',s=100,label='Peek',zorder=4) # 下標數據自適應 mp.gcf().autofmt_xdate() mp.legend() mp.show()
1三、符號數組
一、numpy.sign([12 -8 -9 11 -7 -0 25]) -->[1 -1 -1 1 -1 0 1]
二、numpy.piecewise([12 -8 -9 11 -7 -0 25],[條件1,條件2,...],[取值1,取值2,...]),數組中的元素知足哪一個條件就取哪一個值
淨額成交量(OBV)
[10 -5 -1]
[1000 2000 3000]
[1000 -2000 -3000]
import datetime as dt import numpy as np import matplotlib.pyplot as mp import matplotlib.dates as md # 用於畫圖時的日期 # unicode有幾種編碼:包括UCS-4,UTF-8等,默認讀進來的是UTF-8,UTF-8的一個字符是1至4個字節 # USC-4每一個字符是固定的4字節,符合數組同質性的要求 def dmy2ymd(dmy): """日月年轉換爲年月日""" # 文件讀出來的日期是utf-8格式的字符串(每一個字符所佔的字節數在1-4之間),字節序列,非字符序列 # 將字節序列轉換爲字符序列 dmy = str(dmy, encoding='utf-8') # 將字符串解析爲日期格式strptime中的p就是parse的縮寫 date = dt.datetime.strptime(dmy, '%d-%m-%Y').date() # 將日期轉換爲日期字符串 # ymd = date.strftime('%Y-%m-%d') return date # M8[D]表示8字節的日期類型,volumes是成交量 dates, closing_prices, volumes \ = np.loadtxt('../data/bhp.csv', delimiter=',', usecols=(1, 6, 7), unpack=True, dtype=np.dtype('M8[D],f8, f8'), converters={1: dmy2ymd}) # 差分 diff_closing_prices = np.diff(closing_prices) # sign_closing_prices = np.sign(diff_closing_prices) sign_closing_prices = np.piecewise(diff_closing_prices, [diff_closing_prices<0,diff_closing_prices==0,diff_closing_prices>0], [-1,0,1]) print(sign_closing_prices) obvs = volumes[1:] * sign_closing_prices mp.figure('On-Banlance Volumes', facecolor='lightgray') mp.title('On-Banlance Volumes', fontsize=20) mp.xlabel('Date', fontsize=14) mp.ylabel('OBV', fontsize=14) # ax = mp.gca() # 主刻度以星期一爲標誌 ax.xaxis.set_major_locator(md.WeekdayLocator(byweekday=md.MO)) # 次刻度以天爲單位,沒有參數表示一天一個點, ax.xaxis.set_minor_locator(md.DayLocator()) # 格式化 31 Jan 2011 ax.xaxis.set_major_formatter(md.DateFormatter('%d %b %Y')) # 設置標籤大小 mp.tick_params(labelsize=10) # 設置網格線 mp.grid(axis='y',linestyle=':') # 將numpy的日期類型轉換爲matiplotlib能夠識別的格式 dates = dates[1:].astype(md.datetime.datetime) rise = obvs>0 fall = obvs<0 fc = np.zeros(dates.size, '3f4') ec = np.zeros(dates.size, '3f4') fc[rise], fc[fall] = (1,0,0),(0,0.5,0) ec[rise], ec[fall] = (1,1,1),(1,1,1) mp.bar(dates,obvs, color=fc, edgecolor=ec, label="OBV") # 下標數據自適應 mp.gcf().autofmt_xdate() mp.legend() mp.show()
1四、矢量化
def 標量函數(標量):
...
return 標量返回值
矢量返回值 = numpy.vectorize(標量函數)(矢量參數)
import numpy as np def foo(x, y): return x + y, x - y, x * y a, b = 3, 4 c, d, e = foo(a,b) print(c, d, e) f, g = np.array([5, 6, 7]), np.array([8, 9, 10]) h, i, j = [], [], [] for x, y in zip(f, g): add, sub, mul = foo(x, y) h.append(add) i.append(sub) j.append(mul) h = np.array(h) i = np.array(i) j = np.array(j) print(h, i, j) # 下面的做用等效於上面的 h, i, j = np.vectorize(foo)(f, g) print(h, i, j)
import datetime as dt import numpy as np import matplotlib.pyplot as mp import matplotlib.dates as md # 用於畫圖時的日期 # unicode有幾種編碼:包括UCS-4,UTF-8等,默認讀進來的是UTF-8,UTF-8的一個字符是1至4個字節 # USC-4每一個字符是固定的4字節,符合數組同質性的要求 def dmy2ymd(dmy): """日月年轉換爲年月日""" # 文件讀出來的日期是utf-8格式的字符串(每一個字符所佔的字節數在1-4之間),字節序列,非字符序列 #將字節序列轉換爲字符序列 dmy = str(dmy,encoding='utf-8') # 將字符串解析爲日期格式strptime中的p就是parse的縮寫 date = dt.datetime.strptime(dmy,'%d-%m-%Y').date() # 將日期轉換爲日期字符串 # ymd = date.strftime('%Y-%m-%d') return date # M8[D]表示8字節的日期類型 dates, opening_prices, highest_prices, lowset_prices, closing_prices \ = np.loadtxt('../data/bhp.csv',delimiter=',',usecols=(1,3,4,5,6),unpack=True, dtype=np.dtype('M8[D], f8, f8, f8, f8'),converters={1:dmy2ymd}) def profit(opening_price, highest_price, lowest_price, closing_price): buying_price = opening_price * 0.99 # 開盤價下降一個百分點 if lowest_price <= buying_price <= highest_price: return (closing_price - buying_price) * 100 / buying_price return np.nan profits = np.vectorize(profit)(opening_prices, highest_prices, lowset_prices, closing_prices) print(profits) nan = np.isnan(profits) dates, profits = dates[~nan], profits[~nan] # 獲取有效的數據 gain_dates, gain_profits = dates[profits > 0], profits[profits > 0] # 盈利 loss_dates, loss_profits = dates[profits < 0], profits[profits < 0] # 虧損 mp.figure('Trading Simulation', facecolor='lightgray') mp.title('Trading Simulation', fontsize=20) mp.xlabel('Date', fontsize=14) mp.ylabel('Profit', fontsize=14) ax = mp.gca() # 主刻度以星期一爲標誌 ax.xaxis.set_major_locator(md.WeekdayLocator(byweekday=md.MO)) # 次刻度以天爲單位,沒有參數表示一天一個點, ax.xaxis.set_minor_locator(md.DayLocator()) # 格式化 31 Jan 2011 ax.xaxis.set_major_formatter(md.DateFormatter('%d %b %Y')) # 設置標籤大小 mp.tick_params(labelsize=10) # 設置網格線 mp.grid(linestyle=':') if dates.size > 0: # 將numpy的日期類型轉換爲matiplotlib能夠識別的格式 dates = dates.astype(md.datetime.datetime) mp.plot(dates, profits, c='gray', label='Profit') mp.axhline(y = profits.mean(), linestyle='--', color = 'gray') # 畫水平線 if gain_dates.size > 0: gain_dates = gain_dates.astype(md.datetime.datetime) mp.plot(gain_dates, gain_profits, 'o', c='orangered', label='Gain Profit') mp.axhline(y=gain_profits.mean(), linestyle='--', color='orangered') # 畫水平線 if loss_dates.size > 0: loss_dates = loss_dates.astype(md.datetime.datetime) mp.plot(loss_dates, loss_profits, 'o', c='limegreen', label='Loss Profit') mp.axhline(y=loss_profits.mean(), linestyle='--', color='limegreen') # 畫水平線 mp.legend() mp.gcf().autofmt_xdate() mp.show()
1五、數據平滑
步驟:
一、卷積降噪
二、曲線擬合(爲了尋找交點)
三、計算特徵
四、指導業務
import datetime as dt import numpy as np import matplotlib.pyplot as mp import matplotlib.dates as md # 用於畫圖時的日期 # unicode有幾種編碼:包括UCS-4,UTF-8等,默認讀進來的是UTF-8,UTF-8的一個字符是1至4個字節 # USC-4每一個字符是固定的4字節,符合數組同質性的要求 def dmy2ymd(dmy): """日月年轉換爲年月日""" # 文件讀出來的日期是utf-8格式的字符串(每一個字符所佔的字節數在1-4之間),字節序列,非字符序列 # 將字節序列轉換爲字符序列 dmy = str(dmy, encoding='utf-8') # 將字符串解析爲日期格式strptime中的p就是parse的縮寫 date = dt.datetime.strptime(dmy, '%d-%m-%Y').date() # 將日期轉換爲日期字符串 # ymd = date.strftime('%Y-%m-%d') return date # M8[D]表示8字節的日期類型 dates, bhp_closing_prices = np.loadtxt('../data/bhp.csv', delimiter=',', usecols=(1, 6), unpack=True, dtype=np.dtype('M8[D],f8'), converters={1: dmy2ymd}) vale_closing_price = np.loadtxt("../data/vale.csv", delimiter=',', usecols=(6,), unpack=True) # diff(),相鄰的元素求差 bhp_returns = np.diff(bhp_closing_prices) / bhp_closing_prices[:-1] vale_returns = np.diff(vale_closing_price) / vale_closing_price[:-1] N = 8 weights = np.hanning(N) print(weights) # [ 0. 0.1882551 0.61126047 0.95048443 0.95048443 0.61126047 0.1882551 0. ] weights /= weights.sum() # 求卷積 bhp_smooth_returns = np.convolve(bhp_returns, weights, 'valid') vale_smooth_returns = np.convolve(vale_returns, weights, 'valid') # 多項式擬合 days = dates[N-1:-1].astype(int) degree = 3 bhp_p = np.polyfit(days, bhp_smooth_returns, degree) bhp_fitted_returns = np.polyval(bhp_p, days) vale_p = np.polyfit(days, vale_smooth_returns, degree) vale_fitted_returns = np.polyval(vale_p, days) # 差函數的係數 sub_p = np.polysub(bhp_p, vale_p) roots = np.roots(sub_p) # 提取實根 reals = roots[np.isreal(roots)].real inters = [] for real in reals: if days[0] <= real and real <= days[-1]: inters.append([real, np.polyval(bhp_p, real)]) inters.sort() inters = np.array(inters) mp.figure('Smoothing Returns', facecolor='lightgray') mp.title('Smoothing Returns', fontsize=20) mp.xlabel('Date', fontsize=14) mp.ylabel('Returns', fontsize=14) # 收益 ax = mp.gca() # 主刻度以星期一爲標誌 ax.xaxis.set_major_locator(md.WeekdayLocator(byweekday=md.MO)) # 次刻度以天爲單位,沒有參數表示一天一個點, ax.xaxis.set_minor_locator(md.DayLocator()) # 格式化 31 Jan 2011 ax.xaxis.set_major_formatter(md.DateFormatter('%d %b %Y')) # 設置標籤大小 mp.tick_params(labelsize=10) # 設置網格線 mp.grid(linestyle=':') # 將numpy的日期類型轉換爲matiplotlib能夠識別的格式 dates = dates.astype(md.datetime.datetime) mp.plot(dates[:-1], bhp_returns, alpha=0.25 , c='orangered', label="BHP") mp.plot(dates[:-1], vale_returns, alpha=0.25, c='dodgerblue', label="VALE") mp.plot(dates[N-1:-1], bhp_smooth_returns, alpha=0.75, c='orangered', label="Smooth BHP") mp.plot(dates[N-1:-1], vale_smooth_returns, alpha=0.75, c='dodgerblue', label="Smooth VALE") mp.plot(dates[N-1:-1], bhp_fitted_returns,linewidth=3, c='orangered', label="Fitted BHP") mp.plot(dates[N-1:-1], vale_fitted_returns,linewidth=3, c='dodgerblue', label="Fitted VALE") dates, returns = np.hsplit(inters,2) dates = dates.astype(int).astype('M8[D]').astype(md.datetime.datetime) mp.scatter(dates, returns, marker='x', c='firebrick', s=120, lw=3, zorder=3) # 下標數據自適應 mp.gcf().autofmt_xdate() mp.legend() mp.show()
2、矩陣和ufunc
一、建立矩陣
numpy中的矩陣是matrix類類型的對象,matrix類是ndarray類的子類,對某些專門針對矩陣的運算作了重載,增長部分新的屬性和方法
numpy.matrix(可被解釋爲矩陣的二維容器,copy=是否複製數據[True])->矩陣對象
說明:copy = False時表示矩陣與二維容器共享數據等價於numpy.mat()
numpy.mat(可被解釋爲矩陣的二維容器) ->矩陣對象,數據共享
可被解釋爲矩陣的二維容器也能夠用字符串表示,如:‘1 2 3;4 5 6;7 8 9’
numpy.bmat(子矩陣的塊組合字符串)->組合矩陣
‘A B; C D’:A B C D都表明一個矩陣或可被解釋爲矩陣的二維容器
import numpy as np a = np.matrix([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) print(a, type(a)) b = np.arange(1,10).reshape(3,3) print(b, type(b)) c = np.matrix(b) print(c, type(c)) d = np.matrix('1 2 3; 4 5 6;7 8 9 ') e = d # e 和 e 是一個對象 print(id(e), id(d)) f = np.mat(e) print(id(f)) g = np.matrix(f, copy=False) print(id(g)) h = np.matrix(g) print(id(h)) d += 10 print(d, e, f, g, h, sep='\n') print(b**2) ''' 1 2 3 1 2 3 1 4 9 4 5 6 * 4 5 6 = 16 25 36 7 8 9 7 8 9 49 64 81 ''' print(h**2) # 至關於數組中的點乘 b.dot(b) """ 1 2 3 * 4 5 6 7 8 9 1 2 3 30 36 42 4 5 6 66 81 96 6 7 8 102 126 150 每行與每列對應相乘再相加 """ print(b.T, h.T, sep='\n') i = np.mat('1 2 6; 3 5 7; 4 8 9') j = i.I # 逆矩陣,數組中沒有這個方法 print(j) print(i*j) # 矩陣與逆矩陣的乘積等於單位矩陣(對角線爲1,其他都爲0) k = np.ones((2,2)) l = k * 2 m = k * 3 n = k * 4 o = np.bmat('k l; m n') print(type(o))
二、ufunc,經過函數,統一函數
numpy.ufunc是一個類,由該類所建立的對象可被當作函數調用,在執行過程當中會根據所接收的矢量化參數中的元素依次調用其內部封裝
的標量化函數,並將其返回的標量結果組織成矢量返回調用者
標量函數(標量) -> 標量
numpy.frompyfunc(標量函數,參數個數,返回值的個數) ->ufunc類型的對象
ufunc類型的對象(矢量)->矢量
import numpy as np def foo(x, y): return x + y, x - y, x * y a, b = 3, 4 c, d, e = foo(a,b) print(c, d, e) f, g = np.array([5, 6, 7]), np.array([8, 9, 10]) h, i, j = [], [], [] for x, y in zip(f, g): add, sub, mul = foo(x, y) h.append(add) i.append(sub) j.append(mul) h = np.array(h) i = np.array(i) j = np.array(j) print(h, i, j) # 下面的做用等效於上面的 uf = np.frompyfunc(foo, 2, 3) print(type(uf)) # <class 'numpy.ufunc'> h, i, j = uf(f, g) print(h, i, j)
三、加法ufunc預約義對象(省去了numpy.frompyfunc()這個步驟)
numpy.add.reduce(數組) ->元素累加和
numpy.add.accumulate(數組)->元素累加的過程
numpy.add.reduceat(數組,位置)->分段累加
numpy.add.outer(數組1,數組2)->外和,數組1放在行的位置上,數組2放在列的位置上求和
a:[1 2 3]
b:[4 5 6 7 ]
numpy.add.outer(a,b)->獲得的二維數組就是外和
4 5 6 7
1 5 6 7 8
2 6 7 8 9
3 7 8 9 10
import numpy as np print(type(np.add)) # <class 'numpy.ufunc'> a = np.array([10, 20, 30]) b = np.array([1, 2, 3]) c= np.add(a, b) # 等價於 c = a+b print(c) # [11 22 33] e = np.add.reduce(a) print(e) # 60 f = np.add.accumulate(a) print(f) # [10 30 60] g = np.arange(1,7) ''' 1 2 3 4 5 6 0 1 2 3 4 5 ^ ^ ^ ''' h = np.add.reduceat(g,[0, 2, 4]) # 分段求和 print(h) # [3 7 11] i = np.add.outer(a, b) print(i) ''' 1 2 3 10 11 12 13 20 21 22 23 30 31 32 33 ''' j = np.outer(a,b) # 外積 print(j) ''' 1 2 3 10 10 20 30 20 20 40 60 30 30 60 90 '''
四、除法ufunc預約對象
一、真除:[5 5 -5 -5]<真除>[2 -2 2 -2]=[2.5 -2.5 -2.5 2.5]
numpy.true_divide()
numpy.divide()
/
二、地板除:[5 5 -5 -5]<地板除>[2 -2 2 -2]=[2 -3 -3 2]
numpy.floor_divide()
//
三、天花板除:[5 5 -5 -5]<天花板除>[2 -2 2 -2]=[3 -2 -2 3]
numpy.ceil(a/b).astype(int)
四、截斷除:[5 5 -5 -5]<天截斷除>[2 -2 2 -2]=[2 -2 -2 2]
numpy.trunc(a/b).astype(int)
(a/b).astype(int)
五、取餘ufunc預約義對象
被除數<除以>除數=商...餘數
5<除以>2 = 2...1
除數<乘以>商+餘數 = 被除數
一、地板模
[5 5 -5 -5]<地板除>[2 -2 2 -2]=[2 -3 -3 2]...[1 -1 1 -1]
numpy.remainder()
numpy.mod()
%
二、截斷模
[5 5 -5 -5]<天截斷除>[2 -2 2 -2]=[2 -2 -2 2]...[1 1 -1 -1]
numpy.fmod()
import numpy as np a = np.array([5, 5, -5, -5]) b = np.array([2, -2, 2, -2]) c = np.true_divide(a, b) d = np.divide(a, b) e = a/b print(c, d, e, sep='\n') f = np.floor_divide(a, b) g = a // b print(f, g, sep='\n') h = np.ceil(a/b).astype(int) print(h) # [ 3 -2 -2 3] i = np.trunc(a/b).astype(int) print(i) # [ 2 -2 -2 2] j = (a/b).astype(int) print(j) # [ 2 -2 -2 2] # 模運算 k = np.remainder(a, b) l = np.mod(a, b) n = a % b print(k, l, n) # [ 1 -1 1 -1] [ 1 -1 1 -1] [ 1 -1 1 -1] m = np.fmod(a,b) print(m) # [ 1 1 -1 -1]
六、python語言中絕大部分的運算符都被numpy經過ufunc進行了重載定義,使之可以支持數組間的運算
1 1 2 3 5 8 13 21 ....?
f(n) = f(n-1)+f(n-2),n >= 3
f(1) = f(2) = 1
import numpy as np n = 35 # 遞歸實現 def fibo(n): return 1 if n <3 else fibo(n-1)+fibo(n-2) print(fibo(n)) # 循環實現 fn_1, fn_2 = 0, 1 # f(n-1), f(n-2) = 0, 1 for i in range(n): fn = fn_1 + fn_2 fn_1, fn_2 = fn, fn_1 print(fn) # numpy實現 res = (np.mat('1 1; 1 0') ** (n-1))[0, 0] print(res) ''' 使用斐波那契通項公式實現 __ n __ n /1 + V 5 \ /1 - V 5 \ |--------| - |--------| \ 2 / \ 2 / ------------------------------- __ V 5 ''' r = np.sqrt(5) res = int((((1+r)/2)**n - ((1-r)/2)**n) / r) print(res)
七、實現三角函數的ufanc預約義對象
利薩如曲線
lissa.py
方波發生器
4sin(1x) 4sin(3x) 4sin(5x)
------- + ---------- + ---------- +......
1pi 3pi 5pi 2k-1
1 2 3 k
squr.py
八、位運算
一、異或:^/__xor__/bitwise_xor,相同爲0,不一樣爲1
0 ^ 0 = 0
0 ^ 1 = 1
1 ^ 0 = 1
1 ^ 1 = 0
判斷a、b是否同號(符號位0表示正數,1表示負數)
if a^b <0 then a和b 異號
二、與:&/__and__/bitwise_and
0 & 0 = 0
0 & 1 = 0
1 & 0 = 0
1 & 1 = 1
1 00000001 - 1 = 00000000
2 00000010 - 1 = 00000010
4 00000100 - 1 = 00000011
8 00001000 - 1 = 00000111
16 00010000 - 1 = 00001111
\------&-----/
|
0
if a & (a-1) == 0 then a 是2的冪
三、移位:
右移:>>/__rshift__/right_shift, >>1至關於/2
左移:<</__lshift__/left_shift, <<1至關於*2
import numpy as np a = np.array([10, 20, -30, 40, -50]) b = np.array([60, -70, -80, -90, -100]) # c = a ^ b # c = a.__xor__(b) c = np.bitwise_xor(a, b) print(c) print(a[c < 0], b[c < 0]) # [20 40] [-70 -90] # 判斷一個數是不是2的冪 d = np.arange(1,21) print(d) # [ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20] print(d[d & (d-1) == 0]) # [ 1 2 4 8 16] print(d[d.__and__(d-1) == 0]) print(d[np.bitwise_and(d, (d-1)) == 0])
3、Numpy的子模塊
一、線性代數子模塊(linalg)
一、矩陣的逆矩陣:inv
若矩陣A與矩陣B的乘積是一個單位矩陣,則稱A和B互爲可逆
矩陣:A * A^-1 = E , A必須是方陣(行列數相等)
import numpy as np A = np.mat('1 2 3; 8 9 4; 7 6 5') print(A) B = np.linalg.inv(A) print(B) C = np.linalg.inv(B) print(C) D = A * B print(D) E = np.mat('0 1 2 3;0 8 9 4;0 7 6 5') #np.linalg.inv(E) # numpy.linalg.linalg.LinAlgError: Last 2 dimensions of the array must be square, E不是方陣 # 能夠經過I獲取非方陣的逆矩陣(廣義逆矩陣) print(E.I * E)
二、解線性方程組
/x-2y+z = 0
|2y-8z-8= 0
\-4x+5y+9z+9=0
/1x-2y+1z = 0
|0x+2y-8z = 8
\-4x+5y+9z= -9
/1 2 1 \ /x\ / 0\
|0 2 8 | * |y| = | 8|
\-4 5 9 / \z/ \-9/
---------- ---- ------
a x b
x = numpy.linalg.lstsq(a,b)[0] ,近似解,若是未知數與方程個數相同獲得的是精確解
x = numpy.linalg.solve(a,b) ,精確解
import numpy as np a = np.mat('1 -2 1; 0 2 -8; -4 5 9') b = np.mat('0;8;-9') x = np.linalg.solve(a,b) x1 = np.linalg.lstsq(a,b)[0] print(x) print(x1)
三、特徵值和特徵向量
對於一個n階的方陣A,若是存在一個數a和一個非零n維向量(包含n個元素)x,使得Ax=ax成立,則成a是矩陣A的特徵值,
x是矩陣A屬於特徵值a的特徵向量,一個方陣能夠有任意個特徵值和特徵向量
x z
* x * a z
x z
A A A y
A A A y
A A A y
矩陣Y和矩陣Z相等
numpy.linalg.eig(A) ->[特徵值1 特徵值2...]
[[特徵向量1 特徵向量2...]
... ... ...]
import numpy as np A = np.mat('3 -2;1 0') print(A) eigvals, eigvecs = np.linalg.eig(A) print(eigvals, eigvecs, sep='\n') a = eigvals[0] x = eigvecs[:,0] print(A*x, a*x, sep='\n') a = eigvals[1] x = eigvecs[:,1] print(A*x, a*x, sep='\n')

四、奇異值分解
M = U * Sigma * V
| | |
正交矩陣 | 正交矩陣(矩陣與矩陣的轉置相乘爲單位矩陣)(單位矩陣:主對角線全爲1其他爲零)
UU^T=E | VV^T=E
只有主對角線上的元素非零,其餘元素所有爲零,主對角線上的非零元素稱爲矩陣M的奇異值
numpy.linalg.svd(M, full_matrices=False) ->U,奇異值(Sigma的主對角線元素組成的矩陣),V
import numpy as np M = np.mat('4 11 14; 8 7 -2') print(M) U, sv, V = np.linalg.svd(M,full_matrices=False) ''' full_matrices默認爲True 爲True時V的值爲: [[-0.33333333 -0.66666667 -0.66666667] [ 0.66666667 0.33333333 -0.66666667] [-0.66666667 0.66666667 -0.33333333]] 爲False時V的值爲: [[-0.33333333 -0.66666667 -0.66666667] [ 0.66666667 0.33333333 -0.66666667]] ''' print(U, sv, V, sep='\n') print(U * U.T) print(V * V.T) Sigma = np.diag(sv) #將Sigma補全 print(Sigma, type(Sigma)) print(U * Sigma * V)
五、廣義逆矩陣
廣義逆矩陣是將矩陣求逆的運算法則由方陣推廣到非方陣,只要A*B=E,即便A並不是方陣,仍然能夠稱B爲其廣義逆矩陣。
非方陣的I屬性即其廣義逆矩陣
numpy.linalg.pinv(A) ->A的廣義逆矩陣
import numpy as np A = np.mat('11 12 13 14; 20 21 22 15; 19 18 17 16') print(A) # B = np.linalg.inv(A) # 會報錯numpy.linalg.linalg.LinAlgError: Last 2 dimensions of the array must be square B = np.linalg.pinv(A) print(B) C = A.I # C == B print(C) print(A * C) # 單位陣
六、行列式
二階:
a b
c d
二階行列式:ad - bc
三階:
a b c
d e f
g h i
三階行列式:
a e f - b d f + c d e ==> a(ei-fh) - b(di-fg) + c(dh-eg) ==>aei-afh-bdi+bfg+cdh-ceg
h i g i g h
numpy.linalg.det(方陣)->行列式的值
import numpy as np A = np.mat('2 1; 3 4') print(A, np.linalg.det(A)) B = np.mat('3 2 1; 4 9 8; 5 6 7') print(B, np.linalg.det(B))
二、快速傅里葉變換子模塊(fft)
傅里葉定理:
任何周期函數,總能夠被分解爲有限個不一樣幅值、頻率和初相位的正弦函數。
f(t) - 時間域
(A幅值,fai初相位)(w頻率) - 頻率域
numpy.fft.fftfreq(採樣數,採樣週期)->頻率數組(Hz爲單位)
numpy.fft.fft(時域信號)->頻域複數數組
numpy.fft.ifft(頻域複數數組)->時域信號
import numpy as np import numpy.fft as nf import matplotlib.pyplot as mp times = np.linspace(0, 2*np.pi,201) sigs1 = 4/(1 * np.pi) * np.sin(1 * times) sigs2 = 4/(3 * np.pi) * np.sin(3 * times) sigs3 = 4/(5 * np.pi) * np.sin(5 * times) sigs4 = 4/(7 * np.pi) * np.sin(7 * times) sigs5 = 4/(9 * np.pi) * np.sin(9 * times) sigs6 = sigs1+sigs2+sigs3+sigs4+sigs5 # 快速傅里葉變換 freqs = nf.fftfreq(times.size, times[1]-times[0]) ffts = nf.fft(sigs6) # 求複數的長度:abs對於實數來講數是求絕對值,對複數就是就模,就是求點到原點的距離 pows = np.abs(ffts) sigs7 = nf.ifft(ffts).real mp.figure('FFT', facecolor='lightgray') mp.subplot(121) mp.title("Time Domain", fontsize=16) mp.xlabel('Time', fontsize=12) mp.ylabel('Signal', fontsize=12) mp.tick_params(labelsize=10) mp.grid(linestyle=':') mp.plot(times, sigs1, label='{:.4f}'.format(1/(2*np.pi))) mp.plot(times, sigs2, label='{:.4f}'.format(3/(2*np.pi))) mp.plot(times, sigs3, label='{:.4f}'.format(5/(2*np.pi))) mp.plot(times, sigs4, label='{:.4f}'.format(7/(2*np.pi))) mp.plot(times, sigs5, label='{:.4f}'.format(9/(2*np.pi))) mp.plot(times, sigs6, label='{:.4f}'.format(1/(2*np.pi))) mp.plot(times, sigs7, label='{:.4f}'.format(1/(2*np.pi)), alpha=0.5, linewidth=6) mp.legend() mp.tight_layout() mp.subplot(122) mp.title("Frequency Domain", fontsize=16) mp.xlabel('Frequency', fontsize=12) mp.ylabel('Power', fontsize=12) mp.tick_params(labelsize=10) mp.grid(linestyle=':') mp.plot(freqs[freqs>=0], pows[freqs>=0], c='orangered', label='Frequency Spectrum') mp.legend() mp.tight_layout() mp.show()
import numpy as np import numpy.fft as nf import matplotlib.pyplot as mp import scipy.io.wavfile as wf # 讀取WAV文件 # 樣本的採樣頻率(每秒有多少個採樣點),樣本的信號值(採樣點個數) sample_rate, noised_sigs = wf.read('../data/noised.wav') print(sample_rate) # 44100 print(noised_sigs.shape) # (220500,) # 時間數組 times = np.arange(len(noised_sigs))/sample_rate # numpy.fft.fftfreq(採樣數,採樣週期)->頻率數組(Hz爲單位) freqs = nf.fftfreq(times.size, d = 1/ sample_rate) # numpy.fft.fft(時域信號)->頻域複數數組 noised_ffts = nf.fft(noised_sigs) # 振幅(求模) noised_pows = np.abs(noised_ffts) # 找到能量最大值對應的橫座標 print(noised_pows.argmax()) # 5000 fund_freq = np.abs(freqs[noised_pows.argmax()]) noised_indices = np.where(np.abs(freqs) != fund_freq) filter_ffts = noised_ffts.copy() # 把噪聲置零 filter_ffts[noised_indices] = 0 filter_pows = np.abs(filter_ffts) print(fund_freq) # 1000.0 # 將去噪以後的數去還原爲時間域 filter_sigs = nf.ifft(filter_ffts).real # 將去噪以後的聲音保存爲文件 wf.write('../data/filter.wav',sample_rate, filter_sigs.astype(np.int16)) mp.subplot(221) mp.title("Time Domain", fontsize=16) mp.ylabel('Signal', fontsize=12) mp.tick_params(labelsize=10) mp.grid(linestyle=':') mp.plot(times[:178], noised_sigs[:178], label='Noised', c='orangered') mp.legend() mp.subplot(222) mp.title("Frequency Domain", fontsize=16) mp.ylabel('Signal', fontsize=12) mp.tick_params(labelsize=10) mp.grid(linestyle=':') # 半對數座標 mp.semilogy(freqs[freqs>=0], noised_pows[freqs>=0], label='Noised', c='limegreen') mp.legend() mp.subplot(223) mp.xlabel('Time', fontsize=12) mp.ylabel('Signal', fontsize=12) mp.tick_params(labelsize=10) mp.grid(linestyle=':') mp.plot(times[:178], filter_sigs[:178], label='Filter', c='hotpink') mp.legend() mp.subplot(224) mp.xlabel('Frequency', fontsize=12) mp.ylabel('Power', fontsize=12) mp.tick_params(labelsize=10) mp.grid(linestyle=':') mp.plot(freqs[freqs>=0], filter_pows[freqs>=0], label='Filter', c='dodgerblue') mp.legend() mp.tight_layout() mp.show()
三、隨機數子模塊(random)
一、二項分佈
numpy.random.binomial(n,p,size)->size個隨機數,每一個隨機數來自n次嘗試中成功的次數,其中每次嘗試成功的機率爲p
猜硬幣遊戲:
初始籌碼1000,每輪猜9次,每次猜對的機率0.5,猜對5次以及以上爲贏,籌碼加1,不然爲輸,籌碼減1,問1w輪,籌碼的變化軌跡
numpy.random.binomial(9,0.5,10000)
import numpy as np import matplotlib.pyplot as mp outcomes = np.random.binomial(9, 0.5, 10000) chips = [1000] # 得到每輪的成功次數 for outcome in outcomes: if outcome >= 5: chips.append(chips[-1]+1) else: chips.append((chips[-1]-1)) chips = np.array(chips) mp.figure('Binomial Distribution', facecolor='lightgray') mp.title('Binomial Distribution', fontsize=20) mp.xlabel('Round', fontsize=14) mp.ylabel('Chip', fontsize=14) mp.tick_params(labelsize=10) mp.grid(linestyle=":") o, h, l, c = 0, chips.argmax(), chips.argmin(), chips.size - 1 if chips[o] < chips[c]: color = 'orangered' elif chips[o] > chips[c]: color = 'limegreen' else: color = 'dodgerblue' mp.plot(chips, c=color, label='Chip') # 畫平行於x軸的平行線 mp.axhline(y=chips[o], linestyle='--', color='deepskyblue', linewidth=1) mp.axhline(y=chips[h], linestyle='--', color='crimson', linewidth=1) mp.axhline(y=chips[l], linestyle='--', color='seagreen', linewidth=1) mp.axhline(y=chips[c], linestyle='--', color='orange', linewidth=1) # 畫出四個點的位置 mp.scatter(o, chips[o], edgecolors='deepskyblue',s=60,label='Opening:%d'% chips[o],zorder=3) mp.scatter(h, chips[h], edgecolors='crimson',s=60,label='Highest:%d'% chips[h],zorder=3) mp.scatter(l, chips[l], edgecolors='seagreen',s=60,label='Lowest:%d'% chips[l],zorder=3) mp.scatter(c, chips[c], edgecolors='orange',s=60,label='Closing:%d'% chips[c],zorder=3) mp.legend() mp.show()
二、超幾何分佈
numpy.random.hypergeometric(ngood,ndad,nsample,size)->size個隨機數,每一個隨機數來自隨機抽取的nsample個樣本中的好樣本數,
總樣本由ngood個好樣本和nbad個壞樣本組成
摸球遊戲:
將25個好球和一個壞球放在一塊兒,每輪隨機摸3個球,全爲好球加一分,不然減6份,問100輪,分值變化軌跡
numpy.random.hypergeometric(25,1,3,100)
import numpy as np import matplotlib.pyplot as mp outcomes = np.random.hypergeometric(25, 1, 3, 100) scores = [0] for outcome in outcomes: if outcome == 3: scores.append(scores[-1]+1) else: scores.append(scores[-1]-6) scores = np.array(scores) mp.figure('Hypergeometric Distribution', facecolor='lightgray') mp.title('Hypergeometric Distribution', fontsize=20) mp.xlabel('Round', fontsize=14) mp.ylabel('Score', fontsize=14) mp.tick_params(labelsize=10) mp.grid(linestyle=":") o, h, l, c = 0, scores.argmax(), scores.argmin(), scores.size - 1 if scores[o] < scores[c]: color = 'orangered' elif scores[o] > scores[c]: color = 'limegreen' else: color = 'dodgerblue' mp.plot(scores, c=color, label='Chip') # 畫平行於x軸的平行線 mp.axhline(y=scores[o], linestyle='--', color='deepskyblue', linewidth=1) mp.axhline(y=scores[h], linestyle='--', color='crimson', linewidth=1) mp.axhline(y=scores[l], linestyle='--', color='seagreen', linewidth=1) mp.axhline(y=scores[c], linestyle='--', color='orange', linewidth=1) # 畫出四個點的位置 mp.scatter(o, scores[o], edgecolors='deepskyblue',s=60,label='Opening:%d'% scores[o],zorder=3) mp.scatter(h, scores[h], edgecolors='crimson',s=60,label='Highest:%d'% scores[h],zorder=3) mp.scatter(l, scores[l], edgecolors='seagreen',s=60,label='Lowest:%d'% scores[l],zorder=3) mp.scatter(c, scores[c], edgecolors='orange',s=60,label='Closing:%d'% scores[c],zorder=3) mp.legend() mp.show()
三、標準正態分佈(平均值爲0,標準差爲1)
numpy.randmom,normal(size) -> size個服從標準正態分佈的隨機數
import numpy as np import matplotlib.pyplot as mp samples = np.random.normal(size=10000) mp.figure('Normal Distribution', facecolor='lightgray') mp.title('Normal Distribution', fontsize=20) mp.xlabel('Sample', fontsize=14) mp.ylabel('Occurence', fontsize=14) mp.tick_params(labelsize=10) mp.grid(axis='y', linestyle=":") # 畫直方圖hist(samples, 畫多少個條數,)normed=True,表示已比例的方式顯示 bins = mp.hist(samples, 100, normed=True,edgecolor='steelblue', facecolor='deepskyblue',label='Normal')[1] probs = np.exp(-bins ** 2 / 2) / np.sqrt(2*np.pi) # 標準正態分佈公式 mp.plot(bins, probs, 'o-', c='orangered', label='Probability') mp.legend() mp.show()
4、numpy的專用函數
一、聯合間接排序
a數組:b數組: c:
張三 27 0 170
李四 22 1 165
王五 25 2 175
趙劉 22 3 158
... ...
直接排序:22 22 25 27
間接排序:3 1 2 0 ——>有序下標
\ /
-----
聯合參考序列c的升序
numpy.lexsort((參考序列c,待排序列b))->待排序列中的有序下標
numpy.sort_complex(複數數組)->有序複數數組,按照實部升序,實部相同參考虛部升序
max/min/argmin/argmin將數組中的無效值nan視做正負無窮大,便是最大值也是最小值
nanmax/nanmin/nanargmin/nanargmin在排除數組中的無效值以後,計算其最大值和最小值
numpy.searchsorted(有序數組,被插入數組)——>將被插入數組中的元素插入到有序數組中,不改變其有序性的位置數組。
numpy.insert(原數組,位置數組,被插入數組)->將被插入數組中元素插入到原數組由位置數組所標記的位置處,返回插入後的結果
import numpy as np a = np.array(['Z3', 'L4', 'W5', 'Z6']) b = np.array([27, 22, 25, 22]) c = np.array([170, 165, 175, 158]) print(np.lexsort((c,b))) print(a[np.lexsort((c,b))]) d = b + c*1j print(d) e = np.sort_complex(d) print(e) f = np.array([13, 11,np.nan, 19, 17]) print(np.nanmax(f),np.nanmin(f)) # 將空值忽略19.0 11.0 print(np.nanargmax(f), np.nanargmin(f)) # 將空值忽略3 1 # 0 1 2 3 4 5 6 g = np.array([1, 2, 4, 5, 6, 8, 9]) h = np.array([7, 3]) i = np.searchsorted(g, h) print(i) # [5 2] j = np.insert(g, i, h) print(j)
二、插值
import scipy.interpolate as si
一維插值器 = si.interp1d(離散樣本水平座標,離散樣本垂直座標,kind=‘插值算法’)
插值樣本垂直座標 = 一維插值器(插值樣本水平座標)
插值算法:
一、linear,缺省,線性插值
二、cubic,三次樣條插值
import numpy as np import scipy.interpolate as si import matplotlib.pyplot as mp min_x, max_x = -2.5, 2.5 con_x = np.linspace(min_x, max_x, 1001) # 幅值逐漸衰減的函數 con_y = np.sinc(con_x) dis_x = np.linspace(min_x, max_x, 11) dis_y = np.sinc(dis_x) # 線性插值 linear = si.interp1d(dis_x, dis_y) lin_x = np.linspace(min_x,max_x, 51) lin_y = linear(lin_x) # 樣條插值器 cubic = si.interp1d(dis_x, dis_y, kind='cubic') cub_x = np.linspace(min_x,max_x, 51) cub_y = cubic(cub_x) mp.figure('Interpolation', facecolor='lightgray') mp.subplot(221) mp.title('Interpolation', fontsize=16) mp.ylabel('y', fontsize=12) mp.tick_params(labelsize=10) mp.grid(linestyle=':') mp.plot(con_x, con_y, c='hotpink', label='Continuous') mp.legend() mp.subplot(222) mp.title('Discrete', fontsize=16) mp.tick_params(labelsize=10) mp.grid(linestyle=':') mp.scatter(dis_x, dis_y, c='orangered', label='Discrete') mp.legend() mp.subplot(223) mp.title('Linear', fontsize=16) mp.xlabel('x', fontsize=12) mp.ylabel('y', fontsize=12) mp.tick_params(labelsize=10) mp.grid(linestyle=':') mp.plot(lin_x, lin_y,'o-', c='limegreen', label='Continuous') mp.scatter(dis_x, dis_y, c='orangered', label='Discrete', zorder=3,s=60) mp.legend() mp.subplot(224) mp.title('Cubic', fontsize=16) mp.xlabel('x', fontsize=12) mp.tick_params(labelsize=10) mp.grid(linestyle=':') mp.plot(cub_x, cub_y,'o-', c='dodgerblue', label='Cubic') mp.scatter(dis_x, dis_y, c='orangered', label='Discrete', zorder=3,s=60) mp.legend() mp.show()
三、定積分
import scipy.integrate as si
si.quad(積分函數,積分下限,積分上限)[0]->積分結果
/b
| f(x)dx
/a
import numpy as np import matplotlib.pyplot as mp import matplotlib.patches as mc import scipy.integrate as si def f(x): return 2*x**2 + 3*x + 4 a, b =-5, 5 x1 = np.linspace(a, b, 1001) y1 = f(x1) n = 50 x2 = np.linspace(a, b, n+1) y2 = f(x2) area = 0 for i in range(n): area += (y2[i] + y2[i+1]) * (x2[i+1] - x2[i]) / 2 print(area) # 206.8 # 使用定積分計算面積 area = si.quad(f ,a, b)[0] print(area) # 206.66666666666669 mp.figure('Intergral', facecolor='lightgray') mp.title('Intergral', fontsize=16) mp.xlabel('x', fontsize=14) mp.ylabel('y', fontsize=14) mp.tick_params(labelsize=10) mp.grid(linestyle=':') mp.plot(x1, y1, c='orangered',linewidth=8, label=r'$y=2x^2+3x+4$', zorder=0) # 畫梯形,給出梯形的四個頂點 for i in range(n): mp.gca().add_patch( mc.Polygon([ [x2[i],0], [x2[i],y2[i]], [x2[i+1],y2[i+1]], [x2[i+1],0],], fc='deepskyblue',ec='dodgerblue',alpha=0.5) ) mp.legend() mp.show()
四、金融計算
import numpy as np # 終值 = fv(利率,期數,每期支付,現值) # 將1000元以1%的利率存入銀行5年,每一年加存100元,到期後本息合計多少錢 fv = np.fv(0.01, 5, -100, -1000) print(round(fv,2)) # 1561.11 # 現值 = pv(利率,期數,每期支付,終值) # 將多少錢以1%的利率存入銀行5年,每一年加存100元,到期後本息合計fv錢 pv = np.pv(0.01, 5, -100, fv) print(pv) # -1000.0 # 淨現值 = npv(利率,現金流) # 將1000元以1%的利率存入銀行5年,每一年加存100元,至關於一次存入多少錢 npv = np.npv(0.01,[-1000, -100,-100,-100,-100,-100]) print(round(npv,2)) # -1485.34 fv = np.fv(0.01, 5, 0, npv) print(round(fv,2)) # 1561.11 # 內部收益率 = irr(現金流) # 將1000元存入銀行5年,之後逐年提現100,200,300,400,500。銀行年利率達到多少,可在最後一次提現後償還所有本息 # 至關於淨現值爲0的利率 irr = np.irr([-1000,100,200,300,400,500]) print(round(irr,2)) # 0.12 npv = np.npv(irr,[-1000,100,200,300,400,500]) print(round(npv,2)) # 0.0 # 每期支付 = pmt(利率,期數,現值),終值爲0 # 以1%的年利率從銀行貸款1000元,份5年還清,平均每一年還多少錢? pmt = np.pmt(0.01, 5, 1000) print(round(pmt,2)) # -206.04 # 期數 = nper(利率,每期支付,現值) # 以1%的年利率從銀行貸款1000元,平均每一年還pmt元,多少年還清? nper = np.nper(0.01,pmt,1000) print(nper) # 利率 = rate(期數,每期支付, 現值,終值) # 以1%的年利率從銀行貸款1000元,平均每一年還pmt元,nper年還清,年利率是多少 rate = np.rate(nper, pmt, 1000, 0) print(rate) # 0.01
暱稱:
退出 訂閱評論
[Ctrl+Enter快捷鍵提交]
【活動】華爲雲普惠季 1折秒殺 狂歡繼續
【工具】SpreadJS純前端表格控件,可嵌入應用開發的在線Excel
【騰訊雲】拼團福利,AMD雲服務器8元/月
· Python數據分析I
· [學習筆記] [數據分析] 01.Python入門
· linux下安裝numpy,pandas,scipy,matplotlib,scikit-learn
· Numpy數組優點
· windows下安裝python numpy+scipy+matlotlib+scikit-learn等流行庫
· 微軟發佈下一代神經機器翻譯技術
· 小米在英國「翻車」,國內運營套路在海外惹衆怒
· 黑莓宣佈以14億美圓收購美國AI網絡安全公司Cylance
· 阿里巴巴盯上巴基斯坦 將成繼印度後又一全球投資熱土?
· 「自動關注」下的畸形粉絲經濟 起底聚合支付灰產鏈
» 更多新聞...