kmeans聚類算法(使用西瓜數據集4.0)

number,density,sugercontent 1,0.697,0.460
2,0.774,0.376
3, 0.634,0.264
4,0.608,0.318
5,0.556,0.215
6,0.403,0.237
7,0.481,0.149
7,0.666,0.091
8,0.437,0.211
9,0.666,0.091
10,0.243,0.267
11,0.245,0.057
12,0.343,0.099
13,0.639,0.161
14,0.657,0.198
15,0.360,0.370
16,0.593,0.042
17,0.719,0.103
18,0.359,0.188
19,0.339,0.241
20,0.282,0.257
21,0.748,0.232
22,0.714,0.346
23,0.483,0.312
24,0.478,0.437
25,0.525,0.369
26,0.751,0.489
27,0.532,0.472
28,0.473,0.376
29,0.725,0.445
30,0.446,0.459

import numpy as np import matplotlib.pyplot as plt # Though the following import is not directly being used, it is required # for 3D projection to work
from mpl_toolkits.mplot3d import Axes3D from sklearn.cluster import KMeans import pandas as pd xigua = pd.read_csv('xigua.csv') estimator = KMeans(n_clusters=3,max_iter=500,) #計算每一個樣本的聚類中心並預測聚類索引。
a1=xigua.values print(a1[:,1:3]) res = estimator.fit_predict(a1[:,1:3]) #每一個點的標籤
lable_pred = estimator.labels_ #每一個點的聚類中心
centroids = estimator.cluster_centers_ #樣本距其最近的聚類中心的平方距離之和。
inertia = estimator.inertia_ print (lable_pred) print (centroids) print (inertia) for i in range(len(a1)): if int(lable_pred[i]) == 0: plt.scatter(a1[i][0], a1[i][1], color='red') if int(lable_pred[i]) == 1: plt.scatter(a1[i][0], a1[i][1], color='black') if int(lable_pred[i]) == 2: plt.scatter(a1[i][0], a1[i][1], color='yellow') plt.show()

打印參數
[2 2 1 2 1 0 0 1 0 1 0 0 0 1 1 0 1 1 0 0 0 1 2 2 2 2 2 2 2 2 2] [[0.3492     0.2076 ] [0.65311111 0.15522222] [0.6005     0.40491667]] 0.41449036111111104



打印圖片