相關機率論知識(機率論)python
在生成模型中,咱們的學習目標是:數組
而在高斯判別分析中,咱們有以下假設:app
即:dom
其中 是高斯分佈的均值向量, 是協方差矩陣.函數
而後,咱們使用 極大似然估計 ,首先構造 :學習
因而獲得:spa
因而咱們只要將學習到的參數 代入 和 的函數,在預測時,只須要代入數據,計算出不一樣類別中最大的 ,即可以獲得預測的類別.code
import numpy as np
import random
import matplotlib.pyplot as plt
# 高斯判別分析(Gaussian discriminant analysis)
def load_data(gauss_1, gauss_2, point_num):
""" 數據輸入 :param gauss_1: 類別0的μ和σ組成的數組 :param gauss_2: 類別1的μ和σ組成的數組 :param point_num: 樣本總數 :return: x_0,y_0 二維高斯分佈樣本點(x_0,y_0) x_1,y_1 二維高斯分佈樣本點(x_1,y_1) label 類別標籤 """
x_0 = []
x_1 = []
y_0 = []
y_1 = []
label = []
while len(label) < point_num:
if random.random() > 0.5:
x_0.append(random.gauss(gauss_1[0], gauss_1[1]))
y_0.append(random.gauss(gauss_1[0], gauss_1[1]))
label.append(0)
else:
x_1.append(random.gauss(gauss_2[0], gauss_2[1]))
y_1.append(random.gauss(gauss_2[0], gauss_2[1]))
label.append(1)
return x_0, y_0, x_1, y_1, label
def train(x_train_0, x_train_1, label):
""" 訓練高斯判別分析模型 :param x_train_0: 類別爲0的樣本 :param x_train_1: 類別爲1的樣本 :param label: 樣本標籤 :return: phi: Φ mu_0: μ_0 mu_1: μ_1 sigma: Σ """
m = len(label)
label_cnt_0 = float(label.count(0))
label_cnt_1 = float(label.count(1))
phi = label_cnt_1 / m
mu_0 = np.sum(x_train_0[0])/label_cnt_0, np.sum(x_train_0[1])/label_cnt_0
mu_1 = np.sum(x_train_1[0])/label_cnt_1, np.sum(x_train_1[1])/label_cnt_1
x0_u0 = np.mat(x_train_0.T - mu_0)
x1_u1 = np.mat(x_train_1.T - mu_1)
x_u = np.mat(np.concatenate([x0_u0, x1_u1]))
sigma = (1.0 / m) * (x_u.T * x_u)
''' print(phi) print(mu_0) print(mu_1) print(sigma_0) print(sigma_1) '''
return phi, np.mat(mu_0), np.mat(mu_1), sigma
def predict(x, gauss):
""" 預測數據 :param x: 輸入的預測樣本 :param gauss: 獲得的高斯模型的數組 :return: p_y0 類別爲0的機率 p_y1 類別爲1的機率 """
p_y0 = (1-gauss[0]) * np.exp(-1/2 * (x-gauss[1]) * gauss[3].I * (x-gauss[1]).T)
p_y1 = (gauss[0]) * np.exp(-1/2 * (x-gauss[2]) * gauss[3].I * (x-gauss[2]).T)
if p_y1 > p_y0:
print(str(x) + "is 1")
else:
print(str(x) + "is 0")
return p_y0, p_y1
if __name__ == '__main__':
x_0, y_0, x_1, y_1, label = load_data([15, 3], [30, 3], 5000)
train_0 = np.vstack((x_0, y_0))
train_1 = np.vstack((x_1, y_1))
gauss = train(train_0, train_1, label)
for i in range(10,40):
pre_x = [i, i]
p_0, p_1 =predict(pre_x, gauss)
# 繪製訓練樣本點
plt.scatter(x_0, y_0, 4, "lightblue")
plt.scatter(x_1, y_1, 4, "red")
# 繪製分隔曲線(取樣本中心點連線的中垂線)
x_point = []
y_point = []
x0 = ((gauss[2] + gauss[1]) /2).T
# 直線的斜率
tmp = (gauss[2] -gauss[1]).T
k = float(- tmp[0] / tmp[1])
for i in np.linspace(0, 40, 100):
x_point.append(i)
y_point.append(k*i - k*float(x0[0])+float(x0[1]))
plt.plot(x_point, y_point)
plt.show()
複製代碼
實現後的圖像以下:cdn