本次的反向傳播算法是基於上篇文章神經網絡之反向傳播算法(BP)公式詳推導實現的,若是對反向傳播算法公式不太瞭解,強烈建議參考上篇文章。python
咱們將實現一個 4
層的全鏈接網絡,來完成二分類任務。網絡輸入節點數爲 2
,隱藏 層的節點數設計爲:2五、50
和25
,輸出層兩個節點,分別表示屬於類別 1
的機率和類別 2
的機率,以下圖所示。這裏並無採用 Softmax
函數將網絡輸出機率值之和進行約束, 而是直接利用均方偏差函數計算與 One-hot
編碼的真實標籤之間的偏差,全部的網絡激活 函數所有采用 Sigmoid
函數,這些設計都是爲了能直接利用咱們的梯度傳播公式。 算法
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_selection import train_test_split
X, y = datasets.make_moons(n_samples=1000, noise=0.2, random_state=100) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) print(X.shape, y.shape) # (1000, 2) (1000,)
(1000, 2) (1000,)
def make_plot(X, y, plot_name): plt.figure(figsize=(12, 8)) plt.title(plot_name, fontsize=30) plt.scatter(X[y==0, 0], X[y==0, 1]) plt.scatter(X[y==1, 0], X[y==1, 1])
make_plot(X, y, "Classification Dataset Visualization ")
Layer
實現一個網絡層,須要傳入網絡層的輸入節點數、輸出節點數、激 活函數類型等參數weights
和偏置張量 bias
在初始化時根據輸入、輸出節點數自動 生成並初始化class Layer: # 全連接網絡層 def __init__(self, n_input, n_output, activation=None, weights=None, bias=None): """ :param int n_input: 輸入節點數 :param int n_output: 輸出節點數 :param str activation: 激活函數類型 :param weights: 權值張量,默認類內部生成 :param bias: 偏置,默認類內部生成 """ self.weights = weights if weights is not None else np.random.randn(n_input, n_output) * np.sqrt(1 / n_output) self.bias = bias if bias is not None else np.random.rand(n_output) * 0.1 self.activation = activation # 激活函數類型,如’sigmoid’ self.activation_output = None # 激活函數的輸出值 o self.error = None # 用於計算當前層的 delta 變量的中間變量 self.delta = None # 記錄當前層的 delta 變量,用於計算梯度 def activate(self, X): # 前向計算函數 r = np.dot(X, self.weights) + self.bias # X@W + b # 經過激活函數,獲得全鏈接層的輸出 o (activation_output) self.activation_output = self._apply_activation(r) return self.activation_output def _apply_activation(self, r): # 計算激活函數的輸出 if self.activation is None: return r # 無激活函數,直接返回 elif self.activation == 'relu': return np.maximum(r, 0) elif self.activation == 'tanh': return np.tanh(r) elif self.activation == 'sigmoid': return 1 / (1 + np.exp(-r)) return r def apply_activation_derivative(self, r): # 計算激活函數的導數 # 無激活函數, 導數爲 1 if self.activation is None: return np.ones_like(r) # ReLU 函數的導數 elif self.activation == 'relu': grad = np.array(r, copy=True) grad[r > 0] = 1. grad[r <= 0] = 0. return grad # tanh 函數的導數實現 elif self.activation == 'tanh': return 1 - r ** 2 # Sigmoid 函數的導數實現 elif self.activation == 'sigmoid': return r * (1 - r) return r
NeuralNetwork
類Layer
類對象,能夠經過 add_layer
函數追加網絡層,y_test.flatten().shape # (300,)
(300,)
class NeuralNetwork: def __init__(self): self._layers = [] # 網絡層對象列表 def add_layer(self, layer): self._layers.append(layer) def feed_forward(self, X): # 前向傳播(求導) for layer in self._layers: X = layer.activate(X) return X def backpropagation(self, X, y, learning_rate): # 反向傳播算法實現 # 向前計算,獲得最終輸出值 output = self.feed_forward(X) for i in reversed(range(len(self._layers))): # 反向循環 layer = self._layers[i] if layer == self._layers[-1]: # 若是是輸出層 layer.error = y - output # 計算最後一層的 delta,參考輸出層的梯度公式 layer.delta = layer.error * layer.apply_activation_derivative(output) else: # 若是是隱藏層 next_layer = self._layers[i + 1] layer.error = np.dot(next_layer.weights, next_layer.delta) layer.delta = layer.error*layer.apply_activation_derivative(layer.activation_output) # 循環更新權值 for i in range(len(self._layers)): layer = self._layers[i] # o_i 爲上一網絡層的輸出 o_i = np.atleast_2d(X if i == 0 else self._layers[i - 1].activation_output) # 梯度降低算法,delta 是公式中的負數,故這裏用加號 layer.weights += layer.delta * o_i.T * learning_rate def train(self, X_train, X_test, y_train, y_test, learning_rate, max_epochs): # 網絡訓練函數 # one-hot 編碼 y_onehot = np.zeros((y_train.shape[0], 2)) y_onehot[np.arange(y_train.shape[0]), y_train] = 1 mses = [] for i in range(max_epochs): # 訓練 100 個 epoch for j in range(len(X_train)): # 一次訓練一個樣本 self.backpropagation(X_train[j], y_onehot[j], learning_rate) if i % 10 == 0: # 打印出 MSE Loss mse = np.mean(np.square(y_onehot - self.feed_forward(X_train))) mses.append(mse) print('Epoch: #%s, MSE: %f, Accuracy: %.2f%%' % (i, float(mse), self.accuracy(self.predict(X_test), y_test.flatten()) * 100)) return mses def accuracy(self, y_predict, y_test): # 計算準確度 return np.sum(y_predict == y_test) / len(y_test) def predict(self, X_predict): y_predict = self.feed_forward(X_predict) # 此時的 y_predict 形狀是 [600 * 2],第二個維度表示兩個輸出的機率 y_predict = np.argmax(y_predict, axis=1) return y_predict
nn = NeuralNetwork() # 實例化網絡類 nn.add_layer(Layer(2, 25, 'sigmoid')) # 隱藏層 1, 2=>25 nn.add_layer(Layer(25, 50, 'sigmoid')) # 隱藏層 2, 25=>50 nn.add_layer(Layer(50, 25, 'sigmoid')) # 隱藏層 3, 50=>25 nn.add_layer(Layer(25, 2, 'sigmoid')) # 輸出層, 25=>2
# nn.train(X_train, X_test, y_train, y_test, learning_rate=0.01, max_epochs=50)
def plot_decision_boundary(model, axis): x0, x1 = np.meshgrid( np.linspace(axis[0], axis[1], int((axis[1] - axis[0])*100)).reshape(1, -1), np.linspace(axis[2], axis[3], int((axis[3] - axis[2])*100)).reshape(-1, 1) ) X_new = np.c_[x0.ravel(), x1.ravel()] y_predic = model.predict(X_new) zz = y_predic.reshape(x0.shape) from matplotlib.colors import ListedColormap custom_cmap = ListedColormap(['#EF9A9A', '#FFF590', '#90CAF9']) plt.contourf(x0, x1, zz, linewidth=5, cmap=custom_cmap)
plt.figure(figsize=(12, 8)) plot_decision_boundary(nn, [-2, 2.5, -1, 2]) plt.scatter(X[y==0, 0], X[y==0, 1]) plt.scatter(X[y==1, 0], X[y==1, 1])
<matplotlib.collections.PathCollection at 0x29018d6dfd0>
y_predict = nn.predict(X_test)
y_predict[:10] # array([1, 1, 0, 1, 0, 0, 0, 1, 1, 1], dtype=int64)
array([1, 1, 0, 1, 0, 0, 0, 1, 1, 1], dtype=int64)
y_test[:10] # array([1, 1, 0, 1, 0, 0, 0, 1, 1, 1], dtype=int64)
array([1, 1, 0, 1, 0, 0, 0, 1, 1, 1], dtype=int64)
nn.accuracy(y_predict, y_test.flatten()) # 0.86
0.86