####⑷爲何咱們須要dual problem 其實這個最優問題用普通的QP求解也是能夠的,可是若是咱們的數據維度很大,而通過feature transform以後維度就好更大了,這樣就會致使VC dimension會增大,特別是後面用的多項式核,RBF核等等。通過對偶問題KKT條件的變換以後,咱們的目標式子:
####⑹Comparison of Kernels Polynomial Kernel的hyperplanes是由多項式曲線構成。優勢:階數能夠靈活設置,更貼近實際分佈;缺點:當Q很到的時候,若是kernel裏面的值算出來是<1,那就基本接近0了,大於1就會變得很大,增長計算複雜度。並且參數過多,難以選擇合適的值。
不正確的就加個1,最小爲止。SVM也能夠用這種方法來限制。
加上一個條件,C參數就是對於這些錯誤懲罰度是多少,條件也變了,正確的≥ 1,錯誤的無論他。無論是小錯仍是大錯。 整合一下:
這個式子其實沒有什麼用,
首先不是線性的,用不了二次規劃,更不要說對偶這些了,其次大錯小錯都是同等對待,connot distinguish small error and large error。 對於上述方案繼續修正: 咱們採用一個ξ做爲一個犯錯程度,程度越大,懲罰越大。懲罰就是這個式子數值會變大,而後SVM要花更多的力氣去處理。
此時soft margin就是這樣了,大於0就是1小於就是0。
不敏感損失函數 —— hinge lost function還有對數損失函數交叉熵等等。logistics用的是交叉熵,SVM就是用的hinge lost function。支持向量機就是一個結構風險最小化的近似實現,結構風險至關於指望風險(Eout)的一個上界,它是經驗風險(Ein)和置信區間(Ω模型複雜度)的和,經驗風險依賴於決策函數f的選取,可是置信區間是,F的VC維的增函數,二者是矛盾的。矛盾體如今:當VC維數變大的時候能夠選到更好的f使得經驗風險比較小,可是此時的置信區間比較大。這就是對應了VC bound理論。還好去聽了臺灣大學林軒宇老師課程,對這些機器學習理論基礎有了解。
####⑼變量的選擇方式 SMO稱選擇第1個變量的過程爲外層循環。外層循環在訓練樣本中選取違反KKT條件最嚴重的樣本點,Violation of the most serious sample of KKT conditions。我第一次看這東西是懵逼的。可是仔細想一下,就是檢測哪個樣本是沒有知足KKT的條件:
''''' Generate a random number '''
def select_jrand(i , m):
j = i
while(j == i):
j = int(random.uniform(0 , m))
return j
pass
''''' restraint the α '''
def clip_alpha(aj , H , L):
if aj > H:
aj = H
elif aj < L:
aj = L
return aj
pass
複製代碼
接下來就是實現支持向量機了:
def SVM(data_mat , class_label , C , tolar , max_iter):
data_mat = np.mat(data_mat)
label_mat = np.mat(class_label)
b = 0
m , n = np.shape(data_mat)
alphas = np.zeros((m , 1))
iter = 0
while iter < max_iter:
#做爲迭代變化量
alpha_pairs_changed = 0
#做爲第一個a for i in range(m):
WT_i = np.dot(np.multiply(alphas , label_mat).T , data_mat)
f_xi = float(np.dot(WT_i , data_mat[i , :].T)) + b
Ei = f_xi - float(label_mat[i])
if ((label_mat[i]*Ei < -tolar) and (alphas[i] < C)) or ((label_mat[i]*Ei > tolar) and (alphas[i] > 0)):
j = Tools.select_jrand(i , m)
WT_j = np.dot(np.multiply(alphas , label_mat).T , data_mat)
f_xj = float(np.dot(WT_j , data_mat[j , :].T)) + b
Ej = f_xj - float(label_mat[j])
alpha_iold = alphas[i].copy()
alpha_jold = alphas[j].copy()
if (label_mat[i] != label_mat[j]):
L = max(0 , alphas[j] - alphas[i])
H = min(C , C + alphas[j] - alphas[i])
else:
L = max(0 , alphas[j] + alphas[i] - C)
H = min(C , alphas[j] + alphas[i])
if H == L:
continue
eta = 2.0 * data_mat[i, :] * data_mat[j, :].T - data_mat[i, :] * data_mat[i, :].T - data_mat[j, :] * data_mat[j, :].T
if eta >= 0: continue
alphas[j] = (alphas[j] - label_mat[j]*(Ei - Ej))/eta
alphas[j] = Tools.clip_alpha(alphas[j], H, L)
if (abs(alphas[j] - alpha_jold) < 0.00001):
continue
alphas[i] = alphas[i] + label_mat[j]*label_mat[i]*(alpha_jold - alphas[j])
b1 = b - Ei + label_mat[i]*(alpha_iold - alphas[i])*np.dot(data_mat[i,:], data_mat[i,:].T) +\
label_mat[j]*(alpha_jold - alphas[j])*np.dot(data_mat[i,:], data_mat[j,:].T)
b2 = b - Ej + label_mat[i]*(alpha_iold - alphas[i])*np.dot(data_mat[i,:], data_mat[j,:].T) +\
label_mat[j]*(alpha_jold - alphas[j])*np.dot(data_mat[j,:], data_mat[j,:].T)
if (0 < alphas[i]) and (C > alphas[i]):
b = b1
elif (0 < alphas[j]) and (C > alphas[j]):
b = b2
else:
b = (b1 + b2)/2.0
print(b)
alpha_pairs_changed += 1
pass
if alpha_pairs_changed == 0:
iter += 1
else:
iter = 0
support_x = []
support_y = []
class1_x = []
class1_y = []
class01_x = []
class01_y = []
for i in range(m):
if alphas[i] > 0.0:
support_x.append(data_mat[i, 0])
support_y.append(data_mat[i, 1])
for i in range(m):
if label_mat[i] == 1:
class1_x.append(data_mat[i, 0])
class1_y.append(data_mat[i, 1])
else:
class01_x.append(data_mat[i, 0])
class01_y.append(data_mat[i, 1])
w_best = np.dot(np.multiply(alphas, label_mat).T, data_mat)
fig, ax = plt.subplots(figsize=(10, 5))
ax.scatter(support_x, support_y, s=100, c="y", marker="v", label="support_v")
ax.scatter(class1_x, class1_y, s=30, c="b", marker="o", label="class 1")
ax.scatter(class01_x, class01_y, s=30, c="r", marker="x", label="class -1")
lin_x = np.linspace(0, 100)
lin_y = (-float(b) - w_best[0, 0] * lin_x) / w_best[0, 1]
plt.plot(lin_x, lin_y, color="black")
ax.legend()
ax.set_xlabel("factor1")
ax.set_ylabel("factor2")
plt.show()
return b , alphas
datamat , labelmat = dataSet.load_data_set()
b, alphas = SVM(datamat , labelmat , 0.6 , 0.001 , 10)
print(b , alphas)
複製代碼
首先傳入的後面幾個參數分別是懲罰力度,容忍度。比較重要的應該是這一句:
if ((label_mat[i]*Ei < -tolar) and (alphas[i] < C)) or ((label_mat[i]*Ei > tolar) and (alphas[i] > 0)):
複製代碼
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import random
import Tool
import smo_class
import KernelTransform
def innerL(i ,os):
Ei = Tool.calculateEi(os , i)
if ((os.labels[i]*Ei < -os.toler) and
(os.alphas[i] < os.C)) or ((os.labels[i]*Ei > os.toler) and
(os.alphas[i] > 0)):
j , Ej = Tool.selectj(i , os , Ei)
alphaIold = os.alphas[i].copy()
alphaJold = os.alphas[j].copy()
if (os.labels[i] != os.labels[j]):
L = max(0 , os.alphas[j] - os.alphas[i])
H = min(os.C , os.C + np.array(os.alphas)[j] - np.array(os.alphas)[i])
else:
L = max(0 , os.alphas[j] + os.alphas[i] - os.C)
H = min(os.C , np.array(os.alphas)[j] + np.array(os.alphas)[i])
if L == H:
return 0
eta = 2.0*os.x[i,:]*os.x[j,:].T - os.x[i,:]*os.x[i,:].T - os.x[j,:]*os.x[j,:].T
if eta >= 0:
print('η> 0,the kernel matrix is not semi-positive definite')
return 0
os.alphas[j] -= os.labels[j]*(Ei - Ej)/eta
os.alphas[j] = Tool.rangeSelectionForAlpha(os.alphas[j] , H , L)
Tool.updateEk(os , j)
if (abs(os.alphas[j] - alphaJold) < 0.00001):
print("j not moving enough")
return 0
os.alphas[i] += os.labels[j] * os.labels[i] * (alphaJold - os.alphas[j])
Tool.updateEk(os , i)
b1 = os.b - Ei - os.labels[i] * (os.alphas[i] - alphaIold) * \
os.x[i, :] * os.x[i, :].T - os.labels[j] * \
(os.alphas[j] - alphaJold) * os.x[i, :] * os.x[j, :].T
b2 = os.b - Ej - os.labels[i] * (os.alphas[i] - alphaIold) * \
os.x[i, :] * os.x[j, :].T - os.labels[j] * \
(os.alphas[j] - alphaJold) * os.x[j, :] * os.x[j, :].T
if (0 < os.alphas[i]) and (os.C > os.alphas[i]):
os.b = b1
elif (0 < os.alphas[j]) and (os.C > os.alphas[j]):
os.b = b2
else:
os.b = (b1 + b2) / 2.0
return 1
else:
return 0
def smo(data,labels,C = 0.6,toler = 0.001,maxIter = 40 , kernel = True):
oS = smo_class.optStruct(np.mat(data),np.mat(labels).transpose(),C,toler)
iter =0
entireSet = True
alphaPairsChanged = 0
while(iter < maxIter) and ((alphaPairsChanged >0) or (entireSet)):
alphaPairsChanged = 0
if entireSet:
for i in range(oS.m):
if kernel == True:
alphaPairsChanged += KernelTransform.innerL(i,oS)
else:
alphaPairsChanged += innerL(i, oS)
print("fullSet,iter: %d i: %d,pairs changed %d" %\
(iter,i,alphaPairsChanged))
iter +=1
else:
# 兩個元素乘積非零,每兩個元素作乘法[0,1,1,0,0]*[1,1,0,1,0]=[0,1,0,0,0]
nonBoundIs = np.nonzero((oS.alphas.A > 0)*(oS.alphas.A < C))[0]
for i in nonBoundIs:
alphaPairsChanged += innerL(i,oS)
print("nou-bound,iter: %d i:%d,pairs changed %d" % (iter,i,alphaPairsChanged))
iter +=1
# entireSet 控制交替的策略選擇if entireSet:
entireSet = False
# 必須有alpha對進行更新elif(alphaPairsChanged == 0):
entireSet = True
print("iteration number:%d" % iter)
return oS.b,oS.alphas
複製代碼
entireSet就是交換策略的標誌。貌似沒有什麼好說的。 以後就是執行函數這些了:
import Tool
import SMO
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import KernelTransform
''' calculate w and draw the picture, the variable which the α not equal zero , we call support vector '''
def calculateW(alphas , data , labels):
x = np.mat(data)
label = np.mat(labels).transpose()
m , n = np.shape(x)
w = np.zeros((n , 1))
for i in range(m):
w += np.multiply(alphas[i] * label[i] , x[i , :].T)
return w
pass
if __name__ == '__main__':
data, label = Tool.loadDataSet('../Data/testSet.txt')
b,alphas = SMO.smo(data , label , kernel=False)
w = calculateW(alphas , data , label)
x = np.arange(0 , 11)
print(w)
y = (-b - w[0]*x)/w[1]
Tool.drawDataset(data , label , x , y.tolist()[0] , line=True , alphas=alphas)
data, label = Tool.loadDataSet('../Data/testSetRBF.txt')
b, alphas = SMO.smo(data, label,kernel=True ,maxIter=100)
svInd = np.nonzero(alphas.A > 0)[0]
Tool.drawDataset(data, label, line=False, alphas=alphas)
複製代碼