邏輯迴歸:簡單的來講,在線性迴歸的基礎上加入了Sigmoid函數!算法
import numpy as np import matplotlib.pyplot as plt from matplotlib.font_manager import FontProperties #加載數據集 def loadData(filename): dataMat = [] labelMat = [] with open(filename) as f: for line in f.readlines(): line = line.strip().split() dataMat.append([1,float(line[0]),float(line[1])]) labelMat.append(int(line[2])) return dataMat,labelMat #繪製數據集 def plot(dataMat,labelMat): x0cord1 = [] x0cord2 = [] x1cord1 = [] x1cord2 = [] n = len(labelMat) for i in range(n): if labelMat[i] == 1: x0cord1.append(dataMat[i][1]) x0cord2.append(dataMat[i][2]) else: x1cord1.append(dataMat[i][1]) x1cord2.append(dataMat[i][2]) plt.scatter(x0cord1,x0cord2,c='red',s=20,alpha=0.5,marker='s') plt.scatter(x1cord1,x1cord2,c='green',s=20,alpha=0.5) plt.title('DataSet') plt.xlabel('x1') plt.ylabel('x2') plt.show() #隨機梯度上升進行LR訓練 def stogradAscent(dataMat,labelMat,num_iter=150): dataMat = np.array(dataMat) m,n = np.shape(dataMat) #矩陣有多少列 m=100 n=3 weights = np.ones(n) #即待優化的參數[1,1,1] weights_array = np.array([]) for j in range(num_iter): dataIndex = list(range(m)) for i in range(m): alpha = 1 / (i+j+1.0) + 0.001 rangeIndex = int(np.random.uniform(0,len(dataIndex))) error = labelMat[rangeIndex] - sigmoid(sum(dataMat[rangeIndex] * weights)) # 一個數 weights = weights + alpha * dataMat[rangeIndex] * error weights_array = np.append(weights_array,weights,axis=0) del(dataIndex[rangeIndex]) weights_array = weights_array.reshape(num_iter*m,n) return weights,weights_array #批量梯度上升進行LR訓練 def gradAscent(dataMat,labelMat): dataMartix = np.mat(dataMat) labelMartix = np.mat(labelMat).transpose() n = np.shape(dataMartix)[1] #矩陣有多少列 weights = np.ones((n,1)) #即待優化的參數 alpha = 0.001 maxiter = 500 weights_array = np.array([]) for i in range(maxiter): error = labelMartix - sigmoid(dataMartix * weights) # 100×1 weights = weights + alpha * dataMartix.transpose() * error weights_array = np.append(weights_array, weights) weights_array = weights_array.reshape(maxiter,n) return np.asarray(weights),weights_array def sigmoid(x): return 1 / (1 + np.exp(-x)) def plotWeights(weights_array1,weights_array2): #設置漢字格式 font = FontProperties(fname=r"C:\Windows\Fonts\simsun.ttc", size=14) #將fig畫布分隔成1行1列,不共享x軸和y軸,fig畫布的大小爲(13,8) #當nrow=3,nclos=2時,表明fig畫布被分爲六個區域,axs[0][0]表示第一行第一列 fig, axs = plt.subplots(nrows=3, ncols=2,sharex=False, sharey=False, figsize=(20,10)) x1 = np.arange(0, len(weights_array1), 1) #繪製w0與迭代次數的關係 axs[0][0].plot(x1,weights_array1[:,0]) axs0_title_text = axs[0][0].set_title(u'梯度上升算法:迴歸係數與迭代次數關係',FontProperties=font) axs0_ylabel_text = axs[0][0].set_ylabel(u'W0',FontProperties=font) plt.setp(axs0_title_text, size=20, weight='bold', color='black') plt.setp(axs0_ylabel_text, size=20, weight='bold', color='black') #繪製w1與迭代次數的關係 axs[1][0].plot(x1,weights_array1[:,1]) axs1_ylabel_text = axs[1][0].set_ylabel(u'W1',FontProperties=font) plt.setp(axs1_ylabel_text, size=20, weight='bold', color='black') #繪製w2與迭代次數的關係 axs[2][0].plot(x1,weights_array1[:,2]) axs2_xlabel_text = axs[2][0].set_xlabel(u'迭代次數',FontProperties=font) axs2_ylabel_text = axs[2][0].set_ylabel(u'W1',FontProperties=font) plt.setp(axs2_xlabel_text, size=20, weight='bold', color='black') plt.setp(axs2_ylabel_text, size=20, weight='bold', color='black') x2 = np.arange(0, len(weights_array2), 1) #繪製w0與迭代次數的關係 axs[0][1].plot(x2,weights_array2[:,0]) axs0_title_text = axs[0][1].set_title(u'改進的隨機梯度上升算法:迴歸係數與迭代次數關係',FontProperties=font) axs0_ylabel_text = axs[0][1].set_ylabel(u'W0',FontProperties=font) plt.setp(axs0_title_text, size=20, weight='bold', color='black') plt.setp(axs0_ylabel_text, size=20, weight='bold', color='black') #繪製w1與迭代次數的關係 axs[1][1].plot(x2,weights_array2[:,1]) axs1_ylabel_text = axs[1][1].set_ylabel(u'W1',FontProperties=font) plt.setp(axs1_ylabel_text, size=20, weight='bold', color='black') #繪製w2與迭代次數的關係 axs[2][1].plot(x2,weights_array2[:,2]) axs2_xlabel_text = axs[2][1].set_xlabel(u'迭代次數',FontProperties=font) axs2_ylabel_text = axs[2][1].set_ylabel(u'W1',FontProperties=font) plt.setp(axs2_xlabel_text, size=20, weight='bold', color='black') plt.setp(axs2_ylabel_text, size=20, weight='bold', color='black') plt.show() def plotBestFit(weights,dataMat,labelMat): x0cord1 = [] x0cord2 = [] x1cord1 = [] x1cord2 = [] n = len(labelMat) for i in range(n): if labelMat[i] == 1: x0cord1.append(dataMat[i][1]) x0cord2.append(dataMat[i][2]) else: x1cord1.append(dataMat[i][1]) x1cord2.append(dataMat[i][2]) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(x0cord1,x0cord2,c='red',s=20,alpha=0.5,marker='s') ax.scatter(x1cord1,x1cord2,c='green',s=20,alpha=0.5) # plt.title('DataSet') # plt.xlabel('x1') # plt.ylabel('x2') # plt.show() w = - weights[1] / weights[2] b = -weights[0] / weights[2] x = np.arange(-3,3,0.1) y = w * x + b ax.plot(x,y) plt.show() if __name__=='__main__': dataMat,labelMat = loadData('testSet.txt') # print(dataMat) # plot(dataMat,labelMat) weights,weights_array1 = stogradAscent(dataMat,labelMat) plotBestFit(weights,dataMat,labelMat) print(weights) weights2,weights_array2 = gradAscent(dataMat,labelMat) # print(weights2) plotWeights(weights_array2, weights_array1)