邏輯迴歸-線性決策邊界(python3版本)

導入相關庫

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

導入數據

#load data
fpath  = r'.../ex2data1.txt'
df = pd.read_table(fpath, engine='python', header=None, sep=',')
df.rename(columns={0:'Exam_1', 1:'Exam_2', 2:'Admitted'}, inplace=True)

數據預處理

#數據特徵縮放到[0,1]區間
df_norm = df.apply(lambda x: (x - x.min()) / (x.max() - x.min()))

數據可視化

#plot data
plt.scatter(df[df['Admitted'] == 0]['Exam_1'],df[df['Admitted'] == 0]['Exam_2'], edgecolors='k', color='y', label='Not Admitted')
plt.scatter(df[df['Admitted'] == 1]['Exam_1'],df[df['Admitted'] == 1]['Exam_2'], marker='+', color='k', label='Admitted')
plt.legend(loc='upper right')
plt.xlabel('Exam 1 Score')
plt.ylabel('Exam 2 Score')
plt.title('Figure 1:Scatter plot of training data')

這裏是引用

Sigmoid Function

  • hypothesis: h θ ( x ) = g ( θ T x ) h_{\theta}(x)=g({\theta}^Tx)

  • sigmoid function: g ( z ) = 1 1 + e z g(z)={\frac{1}{1+e^{-z}}}

#定義sigmoid 函數
def sgd_f(theta_mtr, x_mtr):
    z = x_mtr * theta_mtr.T
    g = 1 / (1 + np.exp(-z))
    return g

#定義代價函數
def cost_f(theta_mtr, x_mtr, y_mtr):
    m, n = x_mtr.shape
    h = sgd_f(theta_mtr, x_mtr)
    lh = y_mtr.T*np.log(h) + (1-y_mtr).T*np.log(1-h)
    J = - lh / m            #爲了後面利用梯度下降法求解最小值,此處加「-」是將lh變成凸函數
    return J[0,0]

梯度法

# 定義theta增量函數
def derv_theta(theta_mtr, x_mtr, y_mtr):
    m,_ = x_mtr.shape
    h = sgd_f(theta_mtr, x_mtr)
    derv =  (1 / m) * ((h - y_mtr).T * x_mtr)
    return derv

# 批量梯度法
def grsdient_bgd(x_mtr, y_mtr, theta_mtr, alpha, epsilon, MaxIter):
    J0 = 0
    J_lst = []
    theta_mtr_lst = []
    for iternum in range(MaxIter):
        J = cost_f(theta_mtr, x_mtr, y_mtr)
        if abs(J-J0) < epsilon:
            iternum=iternum-1
            break
        theta_mtr_lst.append(theta_mtr)
        theta_mtr = theta_mtr - alpha*derv_theta(theta_mtr, x_mtr, y_mtr)
        J0 = J
        J_lst.append(J)
    print('MaxIteration Num is %d'%(iternum+1))
    return J_lst, theta_mtr_lst, (iternum+1)

# 隨機梯度法
def grsdient_sgd(x_mtr, y_mtr, theta_mtr, alpha, epsilon, MaxIter):
    m, n = x_mtr.shape
    J0 = 0
    J_lst = []
    theta_mtr_lst = []
    for iternum in range(MaxIter):
        J = cost_f(theta_mtr, x_mtr, y_mtr)
        if abs(J-J0) < epsilon:
            iternum=iternum-1
            break
        theta_mtr_lst.append(theta_mtr)
        for i in range(m):
            theta_mtr = theta_mtr - alpha*derv_theta(theta_mtr, x_mtr[i,:], y_mtr[i,:])
        J0 = J
        J_lst.append(J)
    print('MaxIteration Num is %d'%(iternum+1))
    return J_lst, theta_mtr_lst, (iternum+1)
if __name__ == __'main'__:
	alpha = 0.1
	epsilon = 1e-7
	MaxIter = 15000
	m,n = df.iloc[:,:2].shape
	#theta
	theta_mtr = np.zeros([1,n+1])
	    
	#x_matrix
	x_mtr = np.matrix(df_norm.iloc[:,:2].values)
	x0 = np.matrix(np.ones(m))
	x_mtr = np.hstack([x0.T, x_mtr])

	#y_matrix
	y_mtr = np.matrix(df.iloc[:,2:].values)
	J = cost_f(theta_mtr, x_mtr, y_mtr)
	J_lst, theta_mtr_lst, Maxiternum = grsdient_bgd(x_mtr, y_mtr, theta_mtr, alpha, epsilon, MaxIter)

線性決策邊界

#求解線性決策邊界的係數
line_theta = theta_mtr_lst[-1]
line_param_0 = line_theta[0,0] / -line_theta[0,2]
line_param_1 = line_theta[0,1] / -line_theta[0,2]
fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(111)

#plot classify line
plot_x = np.linspace(0,0.9,100)
plot_y = line_param_1 * plot_x + line_param_0
plot_x = plot_x * (df['Exam_1'].max() - df['Exam_1'].min()) + df['Exam_1'].min()
plot_y = plot_y * (df['Exam_2'].max() - df['Exam_2'].min()) + df['Exam_2'].min()

ax.plot(plot_x, plot_y, 'r-', label='classify line')

#plot original data
ax.scatter(df[df['Admitted'] == 0]['Exam_1'],df[df['Admitted'] == 0]['Exam_2'],\edgecolors='k', color='y', label='Not Admitted')

ax.scatter(df[df['Admitted'] == 1]['Exam_1'],df[df['Admitted'] == 1]['Exam_2'],  marker='+', color='k', label='Admitted')

ax.set(xlim=[30,100], ylim=[30,100], title='Figure 2: Training data with decision boundary', xlabel='Exam 1 Score', ylabel='Exam 2 Score')
plt.legend(loc='upper right')

這裏是引用

預測

#predict
def peedic_f(exam1, exam2):
    theta_mtr = theta_mtr_lst[-1]
    x1 = (exam1 - df['Exam_1'].min()) / (df['Exam_1'].max() - df['Exam_1'].min())
    x2 = (exam2 - df['Exam_2'].min()) / (df['Exam_2'].max() - df['Exam_2'].min())
    x_predict = np.matrix([1, x1, x2]) 
    prob = sgd_f(theta_mtr, x_predict)[0,0]
    if prob > 0.5:
        return 1
    else:
        return 0
df['Predicted_label'] = df.apply(lambda x: peedic_f(x['Exam_1'], x['Exam_2']), axis=1)
df[df['Admitted']!=df['Predicted_label']]
  • 利用得到的邏輯迴歸作爲分類器對原始數據是否錄取做預測,可以看出有8個數據預測結果與實際結果不符合,這8個數據剛好和上圖中8個誤分類的點一一對應。 這裏是引用

3個 θ \theta 值在迭代過程中的變化

fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(111)
#theta
theta0_lst, theta1_lst, theta2_lst = [], [], []
for i in theta_mtr_lst:
    theta0_lst.append(i[0,0])
    theta1_lst.append(i[0,1])
    theta2_lst.append(i[0,2])

#number of interation
IterNum = list(range(0,Maxiternum))

ax.plot(IterNum, theta0_lst, 'r-', label=r'$\theta_0$')
ax.plot(IterNum, theta1_lst, 'b-', label=r'$\theta_1$')
ax.plot(IterNum, theta2_lst, 'g-', label=r'$\theta_2$')
ax.set(xlabel='Number of Iteration', ylabel=r'$\theta$', title=r'Relation between Number of Interation and $\theta$')
plt.legend()
  • 從圖中可以看出, θ 0 \theta_0 是先增大後降低, θ 1 θ 2 \theta_1、\theta_2則是一直增大 ,最後3個 θ \theta 值趨於定值。在這裏插入圖片描述

⚠️博主非相關專業出生,轉專業自學,寫此博客純爲交流和分享,有錯誤之處請在留言處指出,謝謝😊。
⚠️參考博客《斯坦福機器學習筆記》:https://yoyoyohamapi.gitbooks.io/mit-ml/content/
⚠️吳恩達《機器學習》課後作業,源數據下載:https://github.com/nsoojin/coursera-ml-py。