Soft Margin(邊緣) Classification
import numpy as np
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
iris = datasets.load_iris()
X = iris['data'][:, (2, 3)]
y = (iris['target']==2).astype(np.float64)
svm_clf = Pipeline((
('scaler', StandardScaler()),
('linear_svc', LinearSVC(C=1, loss='hinge')),
)), y)
svm_clf.predict([[5.5, 1.7]]) #don't output probabilities
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier
svc_clf = Pipeline((
('scaler', StandardScaler()),
('svc', SVC(kernel='linear', C=1)),
m = len(X)
C =1
sgd_clf = Pipeline((
('scaler', StandardScaler()),
('sgd', SGDClassifier(loss='hinge', alpha=1/(m*C)))
)), y), y)
print(svc_clf.predict([[5.5, 1.7]]), sgd_clf.predict([[5.5, 1.7]]))
from sklearn.datasets import make_moons
X, y = make_moons(n_samples=100, noise=0.15, random_state=42)
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
polynomial_svm_clf = Pipeline((
('poly_features', PolynomialFeatures(degree=3)),
('scaler', StandardScaler()),
('svm_clf', LinearSVC(C=10, loss='hinge'))
)), y)
import matplotlib.pyplot as plt
def plot_predictions(clf, axes):
x0s = np.linspace(axes[0], axes[1], 100)
x1s = np.linspace(axes[2], axes[3], 100)
x0, x1 = np.meshgrid(x0s, x1s)
X = np.c_[x0.ravel(), x1.ravel()]
y_pred = clf.predict(X).reshape(x0.shape)
y_decision = clf.decision_function(X).reshape(x0.shape)
plt.contourf(x0, x1, y_pred,, alpha=0.2)
plt.contourf(x0, x1, y_decision,, alpha=0.1)
def plot_datasets(X, y, axes):
plt.plot(X[:, 0][y==0], X[:, 1][y==0], 'bs')
plt.plot(X[:, 0][y==1], X[:, 1][y==1], 'g^')
plt.grid(True, which='both')
plt.xlabel(r'$x_1$', fontsize=10)
plt.ylabel(r'$x_2$', fontsize=10, rotation=0)
plot_predictions(polynomial_svm_clf, [-1.5, 2.5, -1, 1.5])
plot_datasets(X, y, [-1.5, 2.5, -1, 1.5])
Adding Similarity Feature
Gauss RBF Kernel
rbf_kernel_svm_clf = Pipeline((
('scaler', StandardScaler()),
('svm_clf', SVC(kernel='rbf', gamma=5, C=0.001))
%matplotlib inline
from sklearn.svm import SVC
gamma1, gamma2 = 0.1, 5
C1, C2 = 0.001, 1000
hyperparams = (gamma1, C1), (gamma1, C2), (gamma2, C1), (gamma2, C2)
svm_clfs = []
for gamma, C in hyperparams:
rbf_kernel_svm_clf = Pipeline((
("scaler", StandardScaler()),
("svm_clf", SVC(kernel="rbf", gamma=gamma, C=C))
)), y)
plt.figure(figsize=(14, 9))
for i, svm_clf in enumerate(svm_clfs):
plt.subplot(221 + i)
plot_predictions(svm_clf, [-1.5, 2.5, -1, 1.5])
plot_datasets(X, y, [-1.5, 2.5, -1, 1.5])
gamma, C = hyperparams[i]
plt.title(r"$\gamma = {}, C = {}$".format(gamma, C), fontsize=10)
Computational Complexity
Class | Time Complexity | Out-of-core Support | Scaling required | Kernel Trick |
LinearSVC | $O(m\times n)$ | No | Yes | No |
SGDClassifier | $O(m\times n)$ | Yes | Yes | No |
svc | $O(m^2\times n)\\ to\\ O(m^3\times n)$ | No | Yes | Yes |
from sklearn.svm import LinearSVR
import numpy.random as rnd
m = 50
X = 2 * rnd.rand(m, 1)
y = (4 + 3 * X + rnd.randn(m, 1)).ravel()
svm_reg = LinearSVR(epsilon=1.5), y)
from sklearn.svm import SVR
svm_poly_reg = SVR(kernel='poly', degree=2, C=100, epsilon=0.1),y)
svm_reg1 = LinearSVR(epsilon=1.5)
svm_reg2 = LinearSVR(epsilon=0.5), y), y)
def find_support_vectors(svm_reg, X, y):
y_pred = svm_reg.predict(X)
off_margin = (np.abs(y - y_pred) >= svm_reg.epsilon)
return np.argwhere(off_margin) #nonzero(知足條件)的位置
svm_reg1.support_ = find_support_vectors(svm_reg1, X, y) #其實是margin以外的
svm_reg2.support_ = find_support_vectors(svm_reg2, X, y)
eps_x1 = 1
eps_y_pred = svm_reg1.predict([[eps_x1]]) #再次位置表示epsilon
def plot_svm_regression(svm_reg, X, y, axes):
x1s = np.linspace(axes[0], axes[1], 100).reshape(100, 1)
y_pred = svm_reg.predict(x1s)
plt.plot(x1s, y_pred, "k-", linewidth=2, label=r"$\hat{y}$")
plt.plot(x1s, y_pred + svm_reg.epsilon, "k--")
plt.plot(x1s, y_pred - svm_reg.epsilon, "k--")
plt.scatter(X[svm_reg.support_], y[svm_reg.support_], s=180, facecolors='#FFAAAA')
plt.plot(X, y, "bo")
plt.xlabel(r"$x_1$", fontsize=18)
plt.legend(loc="upper left", fontsize=18)
plt.figure(figsize=(9, 4))
plot_svm_regression(svm_reg1, X, y, [0, 2, 3, 11])
plt.title(r"$\epsilon = {}$".format(svm_reg1.epsilon), fontsize=18)
plt.ylabel(r"$y$", fontsize=18, rotation=0)
#plt.plot([eps_x1, eps_x1], [eps_y_pred, eps_y_pred - svm_reg1.epsilon], "k-", linewidth=2)
'', xy=(eps_x1, eps_y_pred), xycoords='data',
xytext=(eps_x1, eps_y_pred - svm_reg1.epsilon),
textcoords='data', arrowprops={'arrowstyle': '<->', 'linewidth': 1.5}
plt.text(0.91, 5.6, r"$\epsilon$", fontsize=20)
plot_svm_regression(svm_reg2, X, y, [0, 2, 3, 11])
plt.title(r"$\epsilon = {}$".format(svm_reg2.epsilon), fontsize=18)
Decision Function and Predictions
iris = datasets.load_iris()
X = iris['data'][:, (2, 3)]
y = (iris['target']==2).astype(np.float64)
from mpl_toolkits.mplot3d import Axes3D
def plot_3D_decision_function(ax, w, b, x1_lim=[4, 6], x2_lim=[0.8, 2.8]):
x1_in_bounds = (X[:, 0] > x1_lim[0]) & (X[:, 0] < x1_lim[1])
X_crop = X[x1_in_bounds]
y_crop = y[x1_in_bounds]
x1s = np.linspace(x1_lim[0], x1_lim[1], 20)
x2s = np.linspace(x2_lim[0], x2_lim[1], 20)
x1, x2 = np.meshgrid(x1s, x2s)
xs = np.c_[x1.ravel(), x2.ravel()]
df = ( + b).reshape(x1.shape)
m = 1 / np.linalg.norm(w)
boundary_x2s = -x1s*(w[0]/w[1])-b/w[1]
margin_x2s_1 = -x1s*(w[0]/w[1])-(b-1)/w[1]
margin_x2s_2 = -x1s*(w[0]/w[1])-(b+1)/w[1]
ax.plot_surface(x1s, x2, 0, color="b", alpha=0.2, cstride=100, rstride=100)
ax.plot(x1s, boundary_x2s, 0, "k-", linewidth=2, label=r"$h=0$")
ax.plot(x1s, margin_x2s_1, 0, "k--", linewidth=2, label=r"$h=\pm 1$")
ax.plot(x1s, margin_x2s_2, 0, "k--", linewidth=2)
ax.plot(X_crop[:, 0][y_crop==1], X_crop[:, 1][y_crop==1], 0, "g^")
ax.plot_wireframe(x1, x2, df, alpha=0.3, color="k")
ax.plot(X_crop[:, 0][y_crop==0], X_crop[:, 1][y_crop==0], 0, "bs")
ax.axis(x1_lim + x2_lim)
ax.text(4.5, 2.5, 3.8, "Decision function $h$", fontsize=15)
ax.set_xlabel(r"Petal length", fontsize=15)
ax.set_ylabel(r"Petal width", fontsize=15)
ax.set_zlabel(r"$h = \mathbf{w}^t \cdot \mathbf{x} + b$", fontsize=18)
ax.legend(loc="upper left", fontsize=16)
svm_clf2 = LinearSVC(C=10, loss='hinge'), y)
fig = plt.figure(figsize=(11, 6))
ax1 = fig.add_subplot(111, projection='3d')
plot_3D_decision_function(ax1, w=svm_clf2.coef_[0], b=svm_clf2.intercept_[0])
Training Objective
Quadratic Programming
The Dual Problem
Kernelized SVM
A kernel is a function zapable of compting the dot product $\phi (a)^T \cdot \phi (b)$ based only the original vectors $a$ and $b$, without having to compute(or enev to know about) the transformaton $\phi$
$$ \begin{align*} Linear &:\ \ \ K(a,b)=a^T \cdot b \\ Polynormial &:\ \ \ K(a,b)=(\gamma a^T \cdot b + r)^d \\ Gasuuian\ RBF &:\ \ \ K(a,b)=exp(-\gamma \left \| a-b \right \|^2) \\ Sigmoid &:\ \ \ K(a,b)=tanh(\gamma a^T \cdot b + r) \\ \end{align*} $$
Online SVMs