現有芯片製造廠需對一批芯片進行質量檢查。製造廠對芯片進行兩個不同的性能測試:microchip_test_1、microchip_test_2,那麼,產品經理如何通過這兩個測試數據去判斷芯片的質量。通過正規化的邏輯迴歸模型(Regularized logistic regression)可以完成此任務。
import numpy as np import pandas as pd import matplotlib.pyplot as plt
fpath = r'../ex2data2.txt' df = pd.read_table(fpath, sep=',', header=None) df.rename(columns={0:'microchip_test_1', 1:'microchip_test_2', 2:'label'}, inplace=True)
fig = plt.figure(figsize=(8,6)) ax = fig.add_subplot(111) ax.scatter(df[df['label']==1]['microchip_test_1'], df[df['label']==1]['microchip_test_2'],\ marker='+', color='k', label='y=1 Accpeted') ax.scatter(df[df['label']==0]['microchip_test_1'], df[df['label']==0]['microchip_test_2'], \ marker='o', color='y', edgecolors='k', label='y=0 Rejected') ax.set(xlabel='Microchip Test 1', ylabel='Microchip Test 2', title='Figure 1: Plot of training data') plt.legend()
- 由下圖可知,原始數據是線性不可分。
def datas(df, Maxpower): #x_mat x_1 = df.iloc[:,:1].values x_2 = df.iloc[:,1:2].values n = len(x_1) array_0 = np.ones([1,n]) for powerNum in range(1, Maxpower+1): for power_index in range(powerNum+1): new_feat = (np.power(x_1,power_index) * np.power(x_2,powerNum-power_index)).reshape([n,1]) array_0 = np.concatenate((array_0,new_feat.T), axis = 0) x_mat = np.mat(array_0) #theta_mat m,n = x_mat.shape theta_mat = np.mat(np.zeros(m)) #y_mat y_mat = np.mat(df['label'].values) return x_mat, y_mat, theta_mat
#sigmoid function def sgm_f(x_mat, theta_mat): z = theta_mat * x_mat g = 1 / (1+np.exp(-z)) return g #cost function def cost_f(x_mat, y_mat, theta_mat, re_lambda): _,m = x_mat.shape h = sgm_f(x_mat, theta_mat) J = (1/m) * (-y_mat * np.log(h.T) - (1-y_mat) * np.log((1-h).T)) +\ (re_lambda / (2*m)) * (theta_mat[0,1:] * theta_mat[0,1:].T) return J #derivation function def deriv_f(x_mat, y_mat, theta_mat, re_lambda): #make theta_0 unchanged by Regularization n,m = x_mat.shape tmp_theta = np.mat(np.zeros(n)) tmp_theta[0,1:] = theta_mat[0,1:] h = sgm_f(x_mat, theta_mat) deriv_J = (1/m) * ((h - y_mat) * x_mat.T) + (re_lambda / m ) * tmp_theta return deriv_J
#gradient descent algorithm def grsdient_bgd(x_mat, y_mat, theta_mat, alpha, epsilon, MaxIter, re_lambda): J0 = 0 J_lst = [] theta_mtr_lst = [] for iternum in range(MaxIter): J = cost_f(x_mat, y_mat, theta_mat, re_lambda) if abs(J-J0) < epsilon: iternum=iternum-1 break theta_mtr_lst.append(theta_mat) theta_mat = theta_mat - alpha*deriv_f(x_mat, y_mat, theta_mat, re_lambda) J0 = J J_lst.append(J) print('MaxIteration Num is %d'%(iternum+1)) return J_lst, theta_mtr_lst, (iternum+1) def grsdient_sgd(x_mat, y_mat, theta_mat, alpha, epsilon, MaxIter, re_lambda): J0 = 0 J_lst = [] theta_mtr_lst = [] n,m = x_mat.shape for iternum in range(MaxIter): J = cost_f(x_mat, y_mat, theta_mat, re_lambda) if abs(J-J0) < epsilon: iternum=iternum-1 break theta_mtr_lst.append(theta_mat) for i in range(m): theta_mat = theta_mat - alpha*deriv_f(x_mat[:,i], y_mat[:,i], theta_mat, re_lambda) theta_mat = theta_mat - alpha*deriv_f(x_mat, y_mat, theta_mat, re_lambda) J0 = J J_lst.append(J) print('MaxIteration Num is %d'%(iternum+1)) return J_lst, theta_mtr_lst, (iternum+1)
#plt_x_mat def plt_x_mat(df, Maxpower): x1_Min = df['microchip_test_1'].min() x1_Max = df['microchip_test_1'].max() x2_Min = df['microchip_test_2'].min() x2_Max = df['microchip_test_2'].max() plot_x1, plot_x2 = np.meshgrid(np.linspace(x1_Min, x1_Max), np.linspace(x2_Min, x2_Max)) x_1 = plot_x1.ravel() x_2 = plot_x2.ravel() n = len(x_1) x_1 = x_1.reshape(n,1) x_2 = x_2.reshape(n,1) plt_array_0 = np.ones([1,n]) for powerNum in range(1, Maxpower+1): for power_index in range(powerNum+1): plt_new_feat = np.power(x_1,power_index) * np.power(x_2,powerNum-power_index) plt_array_0 = np.concatenate((plt_array_0,plt_new_feat.T), axis = 0) plt_x_mat = np.matrix(plt_array_0) return plt_x_mat, plot_x1, plot_x2 def draw_data_boundary(df, plot_x_mat, plot_x1, plot_x2, theta_mtr_lst, re_lambda): fig = plt.figure(figsize=(8,6)) ax = fig.add_subplot(111) ax.scatter(df[df['label']==1]['microchip_test_1'], df[df['label']==1]['microchip_test_2'],\ marker='+', color='k', label='y=1 Accpeted') ax.scatter(df[df['label']==0]['microchip_test_1'], df[df['label']==0]['microchip_test_2'], \ marker='o', color='y', edgecolors='k', label='y=0 Rejected') ax.set(xlabel='Microchip Test 1', ylabel='Microchip Test 2', title=r'Figure 1: Plot of training data ($\lambda=$%d)'%re_lambda) #plot boundary theta_mat_cal = theta_mtr_lst[-1] h_cal = sgm_f(plot_x_mat, theta_mat_cal) h_cal = h_cal.reshape(plot_x1.shape) ax.contour(plot_x1, plot_x2, h_cal, [0.5], colors='r', linewidth=0.5) plt.legend() plt.show() return
def predict(theta_mat, x_mat): h = sgm_f(x_mat, theta_mat) predict_label = np.where(h.T >=0.5,1,0) return predict_label
if __name__=='__main__': for re_lambda in [0, 1, 10, 100]: MaxIter = 1500 alpha = 1 epsilon = 1e-10 Maxpower = 6 #datas x_mat, y_mat, theta_mat = datas(df, Maxpower) J_lst, theta_mtr_lst, Maxiternum = grsdient_bgd(x_mat, y_mat, theta_mat, alpha, epsilon, MaxIter, re_lambda) plot_x_mat, plot_x1, plot_x2 = plt_x_mat(df, Maxpower) draw_data_boundary(df, plot_x_mat, plot_x1, plot_x2, theta_mtr_lst, re_lambda) #predict theta_mat = theta_mtr_lst[-1] pre_label = predict(theta_mat, x_mat) df['pre_label'] = pre_label ##correct_num df_correct = df[df['label']==df['pre_label']] df_correct_num = len(df_correct) ##total_num df_total_num = len(df) ##accuracy_rate accuracy_rate = df_correct_num / df_total_num print('Train Accuracy is: %.4f'%accuracy_rate)
- 展示不同 對決策邊界的影響 。
- 不同 對應的準確率分別爲:0.8305、0.8305、0.7458、0.6102
⚠️參考博客《斯坦福機器學習筆記》:https://yoyoyohamapi.gitbooks.io/mit-ml/content/
⚠️吳恩達《機器學習》課後作業,源數據下載:https://github.com/nsoojin/coursera-ml-py。