[TOC] 更新、更全的《機器學習》的更新網站,更有python、go、數據結構與算法、爬蟲、人工智能教學等着你:http://www.javashuo.com/article/p-vozphyqp-cm.htmlhtml
import numpy as np import matplotlib.pyplot as plt from matplotlib.colors import ListedColormap from matplotlib.font_manager import FontProperties from sklearn import datasets from sklearn.linear_model import LogisticRegression %matplotlib inline font = FontProperties(fname='/Library/Fonts/Heiti.ttc')
iris_data = datasets.load_iris() X = iris_data.data[:, [2, 3]] y = iris_data.target label_list = ['山鳶尾', '雜色鳶尾', '維吉尼亞鳶尾']
def plot_decision_regions(X, y, classifier=None): marker_list = ['o', 'x', 's'] color_list = ['r', 'b', 'g'] cmap = ListedColormap(color_list[:len(np.unique(y))]) x1_min, x1_max = X[:, 0].min()-1, X[:, 0].max()+1 x2_min, x2_max = X[:, 1].min()-1, X[:, 1].max()+1 t1 = np.linspace(x1_min, x1_max, 666) t2 = np.linspace(x2_min, x2_max, 666) x1, x2 = np.meshgrid(t1, t2) y_hat = classifier.predict(np.array([x1.ravel(), x2.ravel()]).T) y_hat = y_hat.reshape(x1.shape) plt.contourf(x1, x2, y_hat, alpha=0.2, cmap=cmap) plt.xlim(x1_min, x1_max) plt.ylim(x2_min, x2_max) for ind, clas in enumerate(np.unique(y)): plt.scatter(X[y == clas, 0], X[y == clas, 1], alpha=0.8, s=50, c=color_list[ind], marker=marker_list[ind], label=label_list[clas])
# C與正則化參數λ成反比,即減少參數C增大正則化的強度 # lbfgs使用擬牛頓法優化參數 # 分類方式爲OvR(One-vs-Rest) lr = LogisticRegression(C=100, random_state=1, solver='lbfgs', multi_class='ovr') lr.fit(X, y)
LogisticRegression(C=100, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=None, penalty='l2', random_state=1, solver='lbfgs', tol=0.0001, verbose=0, warm_start=False)
weights, params = [], [] for c in np.arange(-5, 5): lr = LogisticRegression(C=10.**c, random_state=1, solver='lbfgs', multi_class='ovr') lr.fit(X, y) # lr.coef_[1]拿到類別1的權重係數 weights.append(lr.coef_[1]) params.append(10.**c) # 把weights轉爲numpy數組,即包含兩個特徵的權重的數組 weights = np.array(weights) ''' params: [1e-05, 0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0, 10000.0] ''' ''' weights: [[ 2.50572107e-04 6.31528229e-05] [ 2.46565843e-03 6.15303747e-04] [ 2.13003731e-02 4.74899392e-03] [ 9.09176960e-02 -1.80703318e-03] [ 1.19168871e-01 -2.19313511e-01] [ 8.35644722e-02 -9.08030470e-01] [ 1.60682631e-01 -2.15860167e+00] [ 5.13026897e-01 -2.99137299e+00] [ 1.14643413e+00 -2.79518356e+00] [ 1.90317264e+00 -2.26818639e+00]] ''' plt.plot(params, weights[:, 0], linestyle='--', c='r', label='花瓣長度(cm)') plt.plot(params, weights[:, 1], c='g', label='花瓣長度(cm)') plt.xlabel('C') # 改變x軸的刻度 plt.xscale('log') plt.ylabel('權重係數', fontproperties=font) plt.legend(prop=font) plt.show()
![png](http://www.chenyoude.com/ml/02-15 Logistic迴歸(鳶尾花分類)_10_0.png?x-oss-process=style/watermark)python
上圖顯示了10個不一樣的逆正則化參數C值擬合邏輯迴歸模型,此處只收集標籤爲1(雜色鳶尾)的權重係數。因爲數據沒有通過處理,因此顯示的不太美觀,可是整體趨勢仍是能夠看出減少參數C會增大正則化強度,在$10^{-3}$的時候權重係數開始收斂爲0。算法
plot_decision_regions(X, y, classifier=lr) plt.xlabel('花瓣長度(cm)', fontproperties=font) plt.ylabel('花瓣寬度(cm)', fontproperties=font) plt.legend(prop=font) plt.show()
![png](http://www.chenyoude.com/ml/02-15 Logistic迴歸(鳶尾花分類)_13_0.png?x-oss-process=style/watermark)數組