能夠來個人Github看原文,歡迎交流。python
https://github.com/AsuraDong/Blog/blob/master/Articles/%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0/numpy%E6%95%B0%E7%BB%84%E3%80%81%E5%90%91%E9%87%8F%E3%80%81%E7%9F%A9%E9%98%B5%E8%BF%90%E7%AE%97.mdgit
import numpy as np import pandas as pd
data1 = [6,7.5,8,0,1] arr1 = np.array(data1)#建立array print(arr1) data2 = [data1,data1] arr2 = np.array(data2) #多維度 print(arr2)
[ 6. 7.5 8. 0. 1. ] [[ 6. 7.5 8. 0. 1. ] [ 6. 7.5 8. 0. 1. ]]
print(arr1.shape,arr2.shape)
print(arr1.dtype,arr2.dtype)
(5,) (2, 5) float64 float64
print(np.zeros(10)) #建立都是0的array print(np.empty([2,3,2],dtype=float)) #都是空的array
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] [[[ 0. 0.] [ 0. 0.] [ 0. 0.]] [[ 0. 0.] [ 0. 0.] [ 0. 0.]]]
print(np.arange(15)) # 相似range函數,但更強大
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14]
print(np.diag((1,2,3)))# 建立對角陣 x = np.arange(10,19).reshape((3,3)) print(x) print(np.diag(x)) #提取對角 print(np.diag(x,1)) #注意對角線的位置
[[1 0 0] [0 2 0] [0 0 3]] [[10 11 12] [13 14 15] [16 17 18]] [10 14 18] [11 15]
arr1 = np.array([1,2,3],dtype=np.float64) #初始化顯式聲明 print(arr1.dtype)
float64
int_arr1 = arr1.astype(np.int) # 類型轉化 print(int_arr1.dtype) #浮點型=>整型。向下取整 strintArr = np.array(['15','1.6'],dtype=np.string_) print(strintArr) print(strintArr.astype(np.float))
int32 [b'15' b'1.6'] [ 15. 1.6]
不用再寫循環了,運算符號會被映射到每一個element編程
注意:若是須要一份拷貝(副本),而不是視圖,須要調用arr.copy()/arr[索引]copy()。數組
arr = np.array([[1,2,3],[4,5,6]]) print(arr) print(arr[0]) print(arr[0,2]) #(0,2) print(arr[:,1])
[[1 2 3] [4 5 6]] [1 2 3] 3 [2 5]
不是 or 運算app
names = np.array(['a','b','c','d','e','f','g']) data = np.random.randn(7,4) print(names) print(data)
['a' 'b' 'c' 'd' 'e' 'f' 'g'] [[ 1.19251264 0.22746816 -1.05968475 0.36553691] [-0.87272129 0.15983765 1.05599441 -0.34609556] [-0.09671786 -0.19573923 -0.83697376 -1.07516871] [-1.91951334 0.29178043 0.77103957 -0.00420115] [ 0.86798399 0.25577025 0.34729878 -0.35262573] [-1.7260201 0.45041534 0.41955063 -0.05338469] [-1.28550254 -0.21678863 0.52706647 -0.32517928]]
print(data[names=='b']) print(data[names=='b',:]) #注意形狀。由於names=='b'不肯定,因此爲了避免損失,維度仍是2維
[[-0.87272129 0.15983765 1.05599441 -0.34609556]] [[-0.87272129 0.15983765 1.05599441 -0.34609556]]
mask = (names=='a') | (names=='b') print(mask)
[ True True False False False False False]
注意[]
的嵌套dom
arr = np.empty((8,4)) for i in range(8): arr[i]=i print(arr)
[[ 0. 0. 0. 0.] [ 1. 1. 1. 1.] [ 2. 2. 2. 2.] [ 3. 3. 3. 3.] [ 4. 4. 4. 4.] [ 5. 5. 5. 5.] [ 6. 6. 6. 6.] [ 7. 7. 7. 7.]]
print(arr[4,3]) print(arr[[4,3]])
4.0 [[ 4. 4. 4. 4.] [ 3. 3. 3. 3.]]
arr = np.arange(32).reshape((8,4)) print(arr) print(arr.shape)
[[ 0 1 2 3] [ 4 5 6 7] [ 8 9 10 11] [12 13 14 15] [16 17 18 19] [20 21 22 23] [24 25 26 27] [28 29 30 31]] (8, 4)
print(arr[[1,5,7,2],[0,3,1,2]]) print(arr[[1,5,7,2]][:,[0,3,1,2]])
[ 4 23 29 10] [[ 4 7 5 6] [20 23 21 22] [28 31 29 30] [ 8 11 9 10]]
a= [0,1,3] print(np.tile(a,2)) print(np.tile(a,(2,1))) #編程2維數組 a = [[0,1,3],[5,9,8]] print(np.tile(a,3))
[0 1 3 0 1 3] [[0 1 3] [0 1 3]] [[0 1 3 0 1 3 0 1 3] [5 9 8 5 9 8 5 9 8]]
a = np.linspace(1,10,10) #用到三個參數,第一個參數表示起始點、第二個參數表示終止點,第三個參數表示數列的個數 print(a) #還可使用參數endpoint來決定是否包含終止值,若是不設置這個參數,默認是True a = np.linspace(1,10,10,endpoint=False) print(a)
[ 1. 2. 3. 4. 5. 6. 7. 8. 9. 10.] [ 1. 1.9 2.8 3.7 4.6 5.5 6.4 7.3 8.2 9.1]
a = np.logspace(0,0,10) print(a) a = np.logspace(0,9,10,base=2) print(a)
[ 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] [ 1. 2. 4. 8. 16. 32. 64. 128. 256. 512.]
元素級,是針對每一個element。而且,函數分爲一元(一個參數)和二元(兩個參數)機器學習
**二元的func:** ![](../../Images/機器學習/numpy數組、向量、矩陣運算/6.png)學習
arr = np.arange(10) print(arr) print(np.exp(arr)) print(np.sqrt(arr)) print(np.modf(arr)) #用於浮點數數組的整數部分和小數部分
[0 1 2 3 4 5 6 7 8 9] [ 1.00000000e+00 2.71828183e+00 7.38905610e+00 2.00855369e+01 5.45981500e+01 1.48413159e+02 4.03428793e+02 1.09663316e+03 2.98095799e+03 8.10308393e+03] [ 0. 1. 1.41421356 1.73205081 2. 2.23606798 2.44948974 2.64575131 2.82842712 3. ] (array([ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]), array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9.]))
x = np.random.randn(8) y = np.random.randn(8) print("x:",x,";\ny:",y) print(np.maximum(x,y)) # 求元素級的最大值
x: [-0.95850365 -1.04703965 1.1886987 0.56798014 -0.45694937 0.15069229 -0.40014771 1.34484067] ; y: [ 0.13101485 -1.85842918 0.41106248 1.0749786 0.1840417 -0.26923493 0.76304996 1.78898707] [ 0.13101485 -1.04703965 1.1886987 1.0749786 0.1840417 0.15069229 0.76304996 1.78898707]
xarr = np.array([1.1,1.2,1.3,1.4,1.5]) yarr = np.array([2.1,2.2,2.3,2.4,2.5]) cond = np.array([True,False,True,True,False]) result = [(x if c else y) for x,y,c in zip(xarr,yarr,cond)] # 原來的方法 print(result) result = np.where(cond,xarr,yarr) print(result)
[1.1000000000000001, 2.2000000000000002, 1.3, 1.3999999999999999, 2.5] [ 1.1 2.2 1.3 1.4 2.5]
arr = np.random.randn(4,4) print(arr) print(np.where(arr>0,2,-2)) #大於0的爲2,小於0的爲-2
[[ 0.59503627 0.53821458 -0.46326505 -0.53595914] [-0.25468951 -0.44695157 0.46815475 0.77838542] [-0.3850164 0.39158841 0.11306119 -0.44023648] [ 0.74045044 -0.2408556 -1.01937278 0.32737532]] [[ 2 2 -2 -2] [-2 -2 2 2] [-2 2 2 -2] [ 2 -2 -2 2]]
便可以當作數組的方法調用,也能夠當作頂級NumPy函數調用
sum/mean
等爲聚合運算cumsum/cumprod
:不聚合,產生一個由中間結果組成的數組arr = np.random.rand(5,4) print(arr.mean()) print(arr.mean(axis = 1)) print(np.mean(arr)) print(np.mean(arr,axis=1))
0.486269616896 [ 0.26406933 0.68176035 0.54759211 0.69419806 0.24372824] 0.486269616896 [ 0.26406933 0.68176035 0.54759211 0.69419806 0.24372824]
print(np.cumsum(arr))
[ 0.08485774 0.41551307 0.83665035 1.05627732 2.02498055 2.31023776 2.78723692 3.78331871 4.49977774 5.05458786 5.6577695 5.97368714 6.43269243 7.34869806 7.93297889 8.75047938 8.89718805 8.99359637 9.61767525 9.72539234]
注意:對於上面這些方法,bool型會被自動轉化爲1/0
arr = np.random.randn(100) print((arr>0).sum())
50
boolArr = np.array([False,False,True,False]) print(boolArr.any()) print(boolArr.all())
True False
調用後,原來的arr就改變了
arr = np.random.randn(8) print(arr) arr.sort() print(arr) arr = np.random.randn(5,3) print(arr) arr.sort(1) print(arr)
[ 0.65805019 1.1477457 -0.13527229 -0.36565657 -0.22426346 -0.5084691 0.57423863 1.67541665] [-0.5084691 -0.36565657 -0.22426346 -0.13527229 0.57423863 0.65805019 1.1477457 1.67541665] [[-0.8629666 -1.14722197 -0.23675518] [ 0.06331798 0.44684519 0.36401337] [ 0.71542329 0.70223347 -0.72877248] [ 1.03857508 0.40552221 1.97313196] [ 0.54667853 -0.45607286 -0.82201063]] [[-1.14722197 -0.8629666 -0.23675518] [ 0.06331798 0.36401337 0.44684519] [-0.72877248 0.70223347 0.71542329] [ 0.40552221 1.03857508 1.97313196] [-0.82201063 -0.45607286 0.54667853]]
位於:np.random
中。而且速度快上不少數量級,除此以外,作了array的擴充,不然原生的random進行運算會報錯
# python版本的 import random position = 0 walk = [position] steps = 1000 for i in range(steps): step=1 if random.randint(0,1) else -1 position += step walk.append(position) # numpy版本的 steps = 1000 draws = np.random.randint(0,2,size=steps) #size還能夠是二維數組,這樣,就能產生一次多個隨機漫步 steps = np.where(draws>0,1,-1) walk = steps.cumsum()# 總和 # print(walk.min(),walk.max())
np.save('name',arr):若是沒有.npy,會自動加上
np.load('name')
np.savez('name.npz',a=arr1,b=arr2,...):保存到壓縮文件中。
np.load('name.npz'):這時候,返回字典。鍵值是上面的參數
xarr = np.arange(10) yarr = np.arange(50) np.save('xarr',arr) print(np.load('xarr.npy'))
[[-1.14722197 -0.8629666 -0.23675518] [ 0.06331798 0.36401337 0.44684519] [-0.72877248 0.70223347 0.71542329] [ 0.40552221 1.03857508 1.97313196] [-0.82201063 -0.45607286 0.54667853]]
np.savez('t.npz',x=xarr,y=yarr) t = np.load('t.npz') print(t['x']) print(t['y'])
[0 1 2 3 4 5 6 7 8 9] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49]