02-34 非線性支持向量機(鳶尾花分類)+自定義數據分類

時間 2019-11-10

標籤非線性支持向量鳶尾花分類自定義數據简体版

原文原文鏈接

目錄html

更新、更全的《機器學習》的更新網站，更有python、go、數據結構與算法、爬蟲、人工智能教學等着你：http://www.javashuo.com/article/p-vozphyqp-cm.htmlpython

非線性支持向量機(鳶尾花分類)+自定義隨機數據

1、導入模塊

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from matplotlib.font_manager import FontProperties
from sklearn import datasets
from sklearn.svm import SVC
%matplotlib inline
font = FontProperties(fname='/Library/Fonts/Heiti.ttc')

2、自定義數據分類

2.1 自定義數據

# 保證隨機數不重複
np.random.seed(1)
# 建立100個二維數組，即100個2個特徵的樣本
X_custom = np.random.randn(100, 2)
# np.logical_xor(bool1, bool2)，異或邏輯運算，若是bool1和bool2的結果相同則爲False，不然爲True
# ++和--爲一三象限，+-和-+爲二四象限，如此作則100個樣本一定線性不可分
y_custom = np.logical_xor(X_custom[:, 0] > 0, X_custom[:, 1] > 0)
# 二四象限爲True，即爲1類；一三象限爲False，即爲-1類
y_custom = np.where(y_custom, 1, -1)

2.2 構建決策邊界

def plot_decision_regions(X, y, classifier=None):
    marker_list = ['o', 'x', 's']
    color_list = ['r', 'b', 'g']
    cmap = ListedColormap(color_list[:len(np.unique(y))])

    x1_min, x1_max = X[:, 0].min()-1, X[:, 0].max()+1
    x2_min, x2_max = X[:, 1].min()-1, X[:, 1].max()+1
    t1 = np.linspace(x1_min, x1_max, 666)
    t2 = np.linspace(x2_min, x2_max, 666)

    x1, x2 = np.meshgrid(t1, t2)
    y_hat = classifier.predict(np.array([x1.ravel(), x2.ravel()]).T)
    y_hat = y_hat.reshape(x1.shape)
    plt.contourf(x1, x2, y_hat, alpha=0.2, cmap=cmap)
    plt.xlim(x1_min, x1_max)
    plt.ylim(x2_min, x2_max)

    for ind, clas in enumerate(np.unique(y)):
        plt.scatter(X[y == clas, 0], X[y == clas, 1], alpha=0.8, s=50,
                    c=color_list[ind], marker=marker_list[ind], label=clas)

2.3 訓練模型

# rbf爲高斯核
svm = SVC(kernel='rbf', gamma='auto', C=1, random_state=1)
svm.fit(X_custom, y_custom)

SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=1, shrinking=True,
  tol=0.001, verbose=False)

2.4 可視化

plot_decision_regions(X_custom, y_custom, classifier=svm)
plt.title('非線性支持向量機(自定義數據分類)',fontproperties=font)
plt.legend()
plt.show()

3、鳶尾花分類

3.1 獲取數據

iris_data = datasets.load_iris()
X = iris_data.data[:, [2, 3]]
y = iris_data.target
label_list = ['山鳶尾', '雜色鳶尾', '維吉尼亞鳶尾']

3.2 構建決策邊界

def plot_decision_regions(X, y, classifier=None):
    marker_list = ['o', 'x', 's']
    color_list = ['r', 'b', 'g']
    cmap = ListedColormap(color_list[:len(np.unique(y))])

    x1_min, x1_max = X[:, 0].min()-1, X[:, 0].max()+1
    x2_min, x2_max = X[:, 1].min()-1, X[:, 1].max()+1
    t1 = np.linspace(x1_min, x1_max, 666)
    t2 = np.linspace(x2_min, x2_max, 666)

    x1, x2 = np.meshgrid(t1, t2)
    y_hat = classifier.predict(np.array([x1.ravel(), x2.ravel()]).T)
    y_hat = y_hat.reshape(x1.shape)
    plt.contourf(x1, x2, y_hat, alpha=0.2, cmap=cmap)
    plt.xlim(x1_min, x1_max)
    plt.ylim(x2_min, x2_max)

    for ind, clas in enumerate(np.unique(y)):
        plt.scatter(X[y == clas, 0], X[y == clas, 1], alpha=0.8, s=50,
                    c=color_list[ind], marker=marker_list[ind], label=label_list[clas])

3.3 訓練模型(gamma=1)

# rbf爲高斯核
svm = SVC(kernel='rbf', gamma=1, C=1, random_state=1)
svm.fit(X, y)

SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=1, kernel='rbf',
  max_iter=-1, probability=False, random_state=1, shrinking=True,
  tol=0.001, verbose=False)

3.4 可視化

plot_decision_regions(X, y, classifier=svm)
plt.xlabel('花瓣長度（cm）', fontproperties=font)
plt.ylabel('花瓣寬度（cm）', fontproperties=font)
plt.title('非線性支持向量機代碼(鳶尾花分類, gamma=1)', fontproperties=font, fontsize=20)
plt.legend(prop=font)
plt.show()

3.5 訓練模型(gamma=100)

# rbf爲高斯核
svm = SVC(kernel='rbf', gamma=100, C=1, random_state=1)
svm.fit(X, y)

SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=100, kernel='rbf',
  max_iter=-1, probability=False, random_state=1, shrinking=True,
  tol=0.001, verbose=False)

3.6 可視化

plot_decision_regions(X, y, classifier=svm)
plt.xlabel('花瓣長度（cm）', fontproperties=font)
plt.ylabel('花瓣寬度（cm）', fontproperties=font)
plt.title('非線性支持向量機代碼(鳶尾花分類, gamma=100)', fontproperties=font, fontsize=20)
plt.legend(prop=font)
plt.show()