文章發佈於公號【數智物語】 (ID:decision_engine),關注公號不錯過每一篇乾貨。python
來源 | Python與算法之美(id:Python_Ai_Road)算法
01準備數據集網絡
採用的數據集是sklearn中的breast cancer數據集,30維特徵,569個樣本。訓練前進行MinMax標準化縮放至[0,1]區間。按照75/25比例劃分紅訓練集和驗證集。dom
# 獲取數據集
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
breast = datasets.load_breast_cancer()
scaler = preprocessing.MinMaxScaler()
data = scaler.fit_transform(breast['data'])
target = breast['target']
X_train,X_test,y_train,y_test = train_test_split(data,target)
02模型結構圖函數
03正反傳播公式測試
04NN實現代碼優化
import numpy as np
import pandas as pd
#定義激活函數
ReLu = lambda z:np.maximum(0.0,z)
d_ReLu = lambda z:np.where(z<0,0,1)
LeakyReLu = lambda z:np.maximum(0.01*z,z)
d_LeakyReLu = lambda z:np.where(z<0,0.01,1)
Sigmoid = lambda z:1/(1+np.exp(-z))
d_Sigmoid = lambda z: Sigmoid(z)*(1-Sigmoid(z)) #d_Sigmoid = a(1-a)
Tanh = np.tanh
d_Tanh = lambda z:1 - Tanh(z)**2 #d_Tanh = 1 - a**2
class NNClassifier(object):
def __init__(self,n = [np.nan,5,5,1],alpha = 0.1,ITERNUM = 50000, gfunc = 'ReLu'):
self.n = n #各層節點數
self.gfunc = gfunc #隱藏層激活函數
self.alpha,self.ITERNUM = alpha,ITERNUM
self.dfJ = pd.DataFrame(data = np.zeros((ITERNUM,1)),columns = ['J'])
self.W,self.b = np.nan,np.nan
# 肯定各層激活函數
self.g = [eval(self.gfunc) for i in range(len(n))];
self.g[-1] = Sigmoid;self.g[0] = np.nan
# 肯定隱藏層激活函數的導數
self.d_gfunc = eval('d_' + self.gfunc)
def fit(self,X_train,y_train):
X,Y = X_train.T,y_train.reshape(1,-1)
m = X.shape[1] #樣本個數
n = self.n; n[0] = X.shape[0] # 各層節點數量
# 節點值和參數初始化
A = [np.zeros((ni,m)) for ni in n];A[0] = X #各層節點輸出值初始化
Z = [np.zeros((ni,m)) for ni in n];Z[0] = np.nan #各層節點中間值初始化
W = [np.nan] + [np.random.randn(n[i],n[i-1]) * 0.01 for i in range(1,len(n))] #各層係數參數
b = [np.zeros((ni,1)) for ni in n];b[0] = np.nan #n各層偏置參數
# 導數初始化
dA = [np.zeros(Ai.shape) for Ai in A]
dZ = [np.zeros(Ai.shape) for Ai in A]
dW = [np.zeros(Wi.shape) if isinstance(Wi,np.ndarray) else np.nan for Wi in W]
db = [np.zeros(bi.shape) if isinstance(bi,np.ndarray) else np.nan for bi in b]
for k in range(self.ITERNUM):
# ---------正向傳播 ----------
for i in range(1,len(n)):
Z[i] = np.dot(W[i],A[i-1]) + b[i]
A[i] = self.g[i](Z[i])
J = (1/m) * np.sum(- Y*np.log(A[len(n)-1]) -(1-Y)*np.log(1-A[len(n)-1]))
self.dfJ.loc[k]['J']= J
# ----------反向傳播 ---------
hmax = len(n) - 1
dA[hmax] = 1/m*(-Y/A[hmax] + (1-Y)/(1-A[hmax]))
dZ[hmax] = 1/m*(A[hmax]-Y)
dW[hmax] = np.dot(dZ[hmax],A[hmax-1].T)
db[hmax] = np.dot(dZ[hmax],np.ones((m,1)))
for i in range(len(n)-2,0,-1):
dA[i] = np.dot(W[i+1].T,dZ[i+1])
dZ[i] = dA[i]* self.d_gfunc(Z[i])
dW[i] = np.dot(dZ[i],A[i-1].T)
db[i] = np.dot(dZ[i],np.ones((m,1)))
#-----------梯度降低 ---------
for i in range(1,len(n)):
W[i] = W[i] - self.alpha*dW[i]
b[i] = b[i] - self.alpha*db[i]
# 顯示進度
if (k+1)%1000 == 0:
print('progress rate:{}/{}'.format(k+1,self.ITERNUM),end = '\r')
self.W,self.b = W,b
def predict_prob(self,X_test):
# ---------正向傳播 ----------
W,b = self.W,self.b
Ai = X_test.T
for i in range(1,len(self.n)):
Zi = np.dot(W[i],Ai) + b[i]
Ai = self.g[i](Zi)
return(Ai.reshape(-1))
def predict(self,X_test):
Y_prob = self.predict_prob(X_test)
Y_test = Y_prob.copy()
Y_test[Y_prob>=0.5] = 1
Y_test[Y_prob< 0.5] = 0
return(Y_test)
05單隱層神經網絡spa
設置1個隱藏層,隱藏層節點數爲5,隱藏層使用Sigmoid激活函數。3d
# 採用Sigmoid激活函數
NN = NNClassifier(n = [np.nan,5,1],alpha = 0.02,
ITERNUM = 200000, gfunc = 'Sigmoid')
NN.fit(X_train,y_train)
# 繪製目標函數迭代曲線
%matplotlib inline
NN.dfJ.plot(figsize = (12,8))
# 測試在驗證集的auc得分
from sklearn.metrics import roc_auc_score
Y_prob = NN.predict_prob(X_test)
roc_auc_score(list(y_test),list(Y_prob))
隱藏層使用Tanh激活函數。code
# 採用 Tanh激活函數
NN = NNClassifier(n = [np.nan,5,1],alpha = 0.02,
ITERNUM = 200000, gfunc = 'Tanh')
NN.fit(X_train,y_train)
# 繪製目標函數迭代曲線
%matplotlib inline
NN.dfJ.plot(figsize = (12,8))
# 測試在驗證集的auc得分
from sklearn.metrics import roc_auc_score
Y_prob = NN.predict_prob(X_test)
roc_auc_score(list(y_test),list(Y_prob))
隱藏層使用ReLu激活函數。
# 採用 ReLu激活函數
NN = NNClassifier(n = [np.nan,5,1],alpha = 0.02,
ITERNUM = 200000, gfunc = 'ReLu')
NN.fit(X_train,y_train)
# 繪製目標函數迭代曲線
%matplotlib inline
NN.dfJ.plot(figsize = (12,8))
# 測試在驗證集的auc得分
from sklearn.metrics import roc_auc_score
Y_prob = NN.predict_prob(X_test)
roc_auc_score(list(y_test),list(Y_prob))
隱藏層使用LeakyReLu激活函數。
# 採用 LeakyReLu激活函數
NN = NNClassifier(n = [np.nan,5,1],alpha = 0.02,
ITERNUM = 200000, gfunc = 'LeakyReLu')
NN.fit(X_train,y_train)
# 繪製目標函數迭代曲線
%matplotlib inline
NN.dfJ.plot(figsize = (12,8))
# 測試在驗證集的auc得分
from sklearn.metrics import roc_auc_score
Y_prob = NN.predict_prob(X_test)
roc_auc_score(list(y_test),list(Y_prob))
以上試驗彷佛代表,在當前的數據集上,隱藏層採用ReLu激活函數是一個最好的選擇,AUC最高得分爲0.99958。
06雙隱層神經網絡
設置2個隱藏層,隱藏層節點數都爲5,隱藏層都使用ReLu激活函數。
# 設置兩個隱藏層,採用ReLu激活函數
NN = NNClassifier(n = [np.nan,5,5,1],alpha = 0.02,
ITERNUM = 200000, gfunc = 'ReLu')
NN.fit(X_train,y_train)
# 繪製目標函數迭代曲線
%matplotlib inline
NN.dfJ.plot(figsize = (12,8))
# 測試在驗證集的auc得分
from sklearn.metrics import roc_auc_score
Y_prob = NN.predict_prob(X_test)
roc_auc_score(list(y_test),list(Y_prob))
AUC得分0.99874比採用單隱藏層的最優得分0.99958有所下降,多是模型複雜度太高,咱們嘗試減小隱藏層節點的個數至3以下降模型複雜度。
# 雙隱藏層,隱藏層節點數爲3
NN = NNClassifier(n = [np.nan,3,3,1],alpha = 0.02,
ITERNUM = 200000, gfunc = 'ReLu')
NN.fit(X_train,y_train)
# 繪製目標函數迭代曲線
%matplotlib inline
NN.dfJ.plot(figsize = (12,8))
# 測試在驗證集的auc得分
from sklearn.metrics import roc_auc_score
Y_prob = NN.predict_prob(X_test)
roc_auc_score(list(y_test),list(Y_prob))
AUC得分0.99979,又有所提升。
和sklearn中自帶的神經網絡分類器進行對比。
# 和sklearn中的模型對比
from sklearn.neural_network import MLPClassifier
# 第一隱藏層神經元個數爲3,第二隱藏層神經元個數爲3
MLPClf = MLPClassifier(hidden_layer_sizes=(3,3),max_iter=200000,activation='relu')
MLPClf.fit(X_train,y_train)
# 繪製目標函數迭代曲線
dfJ = pd.DataFrame(data = np.array(MLPClf.loss_curve_),columns = ['J'])
dfJ.plot(figsize = (12,8))
# 測試在驗證集的auc得分
from sklearn.metrics import roc_auc_score
Y_prob = MLPClf.predict_proba(X_test)[:,1]
roc_auc_score(list(y_test),list(Y_prob))
以上試驗代表,針對當前數據數據集,選擇ReLu激活函數,採用雙隱藏層,每一個隱藏層節點數設置爲3是一個不錯的選擇,AUC得分爲0.99979。該得分高於採用CV交叉驗證優化超參數後的邏輯迴歸模型的0.99897的AUC得分。
星標我,天天多一點智慧