機器學習實戰-邏輯迴歸

時間 2019-11-09
標籤機器學習實戰邏輯迴歸简体版
原文原文鏈接
邏輯迴歸：簡單的來講，在線性迴歸的基礎上加入了Sigmoid函數！算法
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
#加載數據集
def loadData(filename):
    dataMat = []
    labelMat = []
    with open(filename) as f:
        for line in f.readlines():
            line = line.strip().split()
            dataMat.append([1,float(line[0]),float(line[1])])
            labelMat.append(int(line[2]))
        return dataMat,labelMat

#繪製數據集
def plot(dataMat,labelMat):
    x0cord1 = []
    x0cord2 = []
    x1cord1 = []
    x1cord2 = []
    n = len(labelMat)
    for i in range(n):
        if labelMat[i] == 1:
            x0cord1.append(dataMat[i][1])
            x0cord2.append(dataMat[i][2])
        else:
            x1cord1.append(dataMat[i][1])
            x1cord2.append(dataMat[i][2])
    plt.scatter(x0cord1,x0cord2,c='red',s=20,alpha=0.5,marker='s')
    plt.scatter(x1cord1,x1cord2,c='green',s=20,alpha=0.5)
    plt.title('DataSet')
    plt.xlabel('x1')
    plt.ylabel('x2')
    plt.show()

#隨機梯度上升進行LR訓練
def stogradAscent(dataMat,labelMat,num_iter=150):
    dataMat = np.array(dataMat)
    m,n = np.shape(dataMat)      #矩陣有多少列 m=100 n=3
    weights = np.ones(n)        #即待優化的參數[1,1,1]
    weights_array = np.array([])
    for j in range(num_iter):
        dataIndex = list(range(m))
        for i in range(m):
            alpha = 1 / (i+j+1.0) + 0.001
            rangeIndex = int(np.random.uniform(0,len(dataIndex)))
            error = labelMat[rangeIndex] - sigmoid(sum(dataMat[rangeIndex] * weights))  # 一個數
            weights = weights + alpha * dataMat[rangeIndex] * error
            weights_array = np.append(weights_array,weights,axis=0)
            del(dataIndex[rangeIndex])
    weights_array = weights_array.reshape(num_iter*m,n)
    return weights,weights_array

#批量梯度上升進行LR訓練
def gradAscent(dataMat,labelMat):
    dataMartix = np.mat(dataMat)
    labelMartix = np.mat(labelMat).transpose()
    n = np.shape(dataMartix)[1]       #矩陣有多少列
    weights = np.ones((n,1))        #即待優化的參數
    alpha = 0.001
    maxiter = 500
    weights_array = np.array([])
    for i in range(maxiter):
        error = labelMartix - sigmoid(dataMartix * weights)  # 100×1
        weights = weights + alpha * dataMartix.transpose() * error
        weights_array = np.append(weights_array, weights)
    weights_array = weights_array.reshape(maxiter,n)
    return np.asarray(weights),weights_array

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def plotWeights(weights_array1,weights_array2):
    #設置漢字格式
    font = FontProperties(fname=r"C:\Windows\Fonts\simsun.ttc", size=14)
    #將fig畫布分隔成1行1列,不共享x軸和y軸,fig畫布的大小爲(13,8)
    #當nrow=3,nclos=2時,表明fig畫布被分爲六個區域,axs[0][0]表示第一行第一列
    fig, axs = plt.subplots(nrows=3, ncols=2,sharex=False, sharey=False, figsize=(20,10))
    x1 = np.arange(0, len(weights_array1), 1)
    #繪製w0與迭代次數的關係
    axs[0][0].plot(x1,weights_array1[:,0])
    axs0_title_text = axs[0][0].set_title(u'梯度上升算法：迴歸係數與迭代次數關係',FontProperties=font)
    axs0_ylabel_text = axs[0][0].set_ylabel(u'W0',FontProperties=font)
    plt.setp(axs0_title_text, size=20, weight='bold', color='black')
    plt.setp(axs0_ylabel_text, size=20, weight='bold', color='black')
    #繪製w1與迭代次數的關係
    axs[1][0].plot(x1,weights_array1[:,1])
    axs1_ylabel_text = axs[1][0].set_ylabel(u'W1',FontProperties=font)
    plt.setp(axs1_ylabel_text, size=20, weight='bold', color='black')
    #繪製w2與迭代次數的關係
    axs[2][0].plot(x1,weights_array1[:,2])
    axs2_xlabel_text = axs[2][0].set_xlabel(u'迭代次數',FontProperties=font)
    axs2_ylabel_text = axs[2][0].set_ylabel(u'W1',FontProperties=font)
    plt.setp(axs2_xlabel_text, size=20, weight='bold', color='black')
    plt.setp(axs2_ylabel_text, size=20, weight='bold', color='black')


    x2 = np.arange(0, len(weights_array2), 1)
    #繪製w0與迭代次數的關係
    axs[0][1].plot(x2,weights_array2[:,0])
    axs0_title_text = axs[0][1].set_title(u'改進的隨機梯度上升算法：迴歸係數與迭代次數關係',FontProperties=font)
    axs0_ylabel_text = axs[0][1].set_ylabel(u'W0',FontProperties=font)
    plt.setp(axs0_title_text, size=20, weight='bold', color='black')
    plt.setp(axs0_ylabel_text, size=20, weight='bold', color='black')
    #繪製w1與迭代次數的關係
    axs[1][1].plot(x2,weights_array2[:,1])
    axs1_ylabel_text = axs[1][1].set_ylabel(u'W1',FontProperties=font)
    plt.setp(axs1_ylabel_text, size=20, weight='bold', color='black')
    #繪製w2與迭代次數的關係
    axs[2][1].plot(x2,weights_array2[:,2])
    axs2_xlabel_text = axs[2][1].set_xlabel(u'迭代次數',FontProperties=font)
    axs2_ylabel_text = axs[2][1].set_ylabel(u'W1',FontProperties=font)
    plt.setp(axs2_xlabel_text, size=20, weight='bold', color='black')
    plt.setp(axs2_ylabel_text, size=20, weight='bold', color='black')

    plt.show()
def plotBestFit(weights,dataMat,labelMat):
    x0cord1 = []
    x0cord2 = []
    x1cord1 = []
    x1cord2 = []
    n = len(labelMat)
    for i in range(n):
        if labelMat[i] == 1:
            x0cord1.append(dataMat[i][1])
            x0cord2.append(dataMat[i][2])
        else:
            x1cord1.append(dataMat[i][1])
            x1cord2.append(dataMat[i][2])
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(x0cord1,x0cord2,c='red',s=20,alpha=0.5,marker='s')
    ax.scatter(x1cord1,x1cord2,c='green',s=20,alpha=0.5)
    # plt.title('DataSet')
    # plt.xlabel('x1')
    # plt.ylabel('x2')
    # plt.show()

    w = - weights[1] / weights[2]
    b = -weights[0] / weights[2]
    x = np.arange(-3,3,0.1)
    y = w * x + b
    ax.plot(x,y)
    plt.show()


if __name__=='__main__':
    dataMat,labelMat = loadData('testSet.txt')
    # print(dataMat)
    # plot(dataMat,labelMat)
    weights,weights_array1 = stogradAscent(dataMat,labelMat)
    plotBestFit(weights,dataMat,labelMat)
    print(weights)


    weights2,weights_array2 = gradAscent(dataMat,labelMat)
    # print(weights2)
    plotWeights(weights_array2, weights_array1)
相關標籤/搜索
每日一句
每一个你不满意的现在，都有一个你没有努力的曾经。