import numpy as np import math import csv import matplotlib.pyplot as plt class Model(object): def __init__(self,filename,alpha,iter_num,num_samples,k,sigma,u): self.filename = filename self.alpha = alpha self.iter_num = iter_num self.num_samples = num_samples self.k = k self.sigma = sigma self.u = u self.prob = np.zeros((self.num_samples,self.k)) def LoadCsv(self): lines = csv.reader(open(self.filename, 'r')) data = list(lines) for i in range(1, len(data)): data[i] = [float(x) for x in data[i]] result = np.array(data[1:]) self.data = result[:,1:] self.data = np.matrix(self.data) def E_Step(self): #prob = np.zeros((self.num_samples,self.k)) for i in range(self.num_samples): sum_p = 0 for j in range(self.k): sum_p+= self.alpha[j]*math.exp(-0.5*(self.data[i,:]-self.u[j,:]) *self.sigma[j].I*np.transpose(self.data[i,:]-self.u[j,:]))/(np.sqrt(np.linalg.det(self.sigma[j])) *2*math.pi) for j in range(self.k): number = math.exp(-0.5*(self.data[i,:]-self.u[j,:]) *self.sigma[j].I*np.transpose(self.data[i,:]-self.u[j,:]))/(np.sqrt(np.linalg.det(self.sigma[j])) *2*math.pi) self.prob[i,j]=self.alpha[j]*number/sum_p #return prob def M_Step(self): self.E_Step() for i in range(self.k): denom =0 numer = 0 sigma_denom = 0 for j in range(self.num_samples): denom += self.prob[j,i]*self.data[j,:] numer += self.prob[j,i] self.u[i,:] = denom/numer self.alpha[i] = numer/self.num_samples for j in range(self.num_samples): sigma_denom += self.prob[j,i]*np.transpose(self.data[j,:]-self.u[i,:])*(self.data[j,:]-self.u[i,:]) self.sigma[i] = sigma_denom/numer def Update(self): self.LoadCsv() for i in range(self.iter_num): print('------%d'%i) self.M_Step() cluster=np.zeros(self.num_samples) #劃分紅簇 for i in range(self.num_samples): for j in range(self.k): if self.prob[i,j] == max(self.prob[i,:]): cluster[i] = j return cluster def Show(self): cluster = self.Update() plt.figure() color=['red','green','blue'] for i in range(self.num_samples): #print(self.data[i,0]) plt.scatter(self.data[i, 0], self.data[i,1], c=color[int(cluster[i])], s=25, alpha=0.4, marker='o') plt.xlabel('density') plt.ylabel('sugar content') plt.savefig('高斯混合聚類模型-50代.jpg') plt.show() filename = 'D:\DeepLearning(7.8-)\data\watermelon4_0.csv' alpha = [0.33,0.33,0.34] u = np.array([[0.403,0.237],[0.714,0.346],[0.532,0.472]]) u = np.matrix(u) k =3 num_samples = 30 iter_num = 50 sigma =[] sigma.append(np.matrix([[0.1,0.0],[0.0,0.1]])) sigma.append(np.matrix([[0.1,0.0],[0.0,0.1]])) sigma.append(np.matrix([[0.1,0.0],[0.0,0.1]])) m = Model(filename,alpha,iter_num,num_samples,k,sigma,u) m.Show() #cluster =m.Update() #print(cluster)
結果:app
西瓜書上的 西瓜數據集 :https://files.cnblogs.com/files/jzcbest1016/data.rarui
高斯混合聚類模型僞代碼:spa
輸入:樣本集D ={x1,x2,....xm}3d
高斯混合成分個數kcode
過程:blog
初始化高斯混合分佈的模型參數{(ai,ui,∑i)} 三個參數含義:分別爲混合係數,均值向量,協方差矩陣it
迭代: class
for j = 1,2,......,m doimport
根據計算xj的各混合成分生成的後驗機率object
pm等於上面那個式子
end for
for i =1,2,....k do
計算新均值向量
計算新協方差矩陣
計算新混合係數
end for
將模型參數{(ai,ui,∑i)|1<=i<=k}更新爲{(ai’,ui’,∑i’)|1<=i<=k}
until 知足中止條件
Ci = Ø (1<=i<=k)
for j =1,2,...m do
根據(i=1,2,....k) 肯定xj的簇標記λj (肯定屬於哪一個i)
將xj劃分相應的簇:
end for
輸出:簇劃分c = {c1,c2.....ck}