import numpy as np import operator def createDataSet(): group = np.array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]]) labels = ['A','A','B','B'] return group, labels def classify(inx, dataSet, labels, k): dataSetsize = dataSet.shape[0] diffMat = np.tile(inx, (dataSetsize,1)) - dataSet sqDiffMat = diffMat**2 sqDistances = sqDiffMat.sum(axis=1) distances = sqDistances**0.5 sortedDistIndicies = distances.argsort() #argsort返回數組值的從小到大的索引值 classCount = {} for i in range(k): voteIlabels = labels[sortedDistIndicies[i]] #dict.get(key, default=None) #字典的get,返回鍵的值,若是不在返回None,這裏是返回的數字0, #字典原本就是空的,當檢查到沒有該元素的時候,就加一,這正是爲什麼這裏的get以後須要加一 classCount[voteIlabels] = classCount.get(voteIlabels,0)+1 sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True) return sortedClassCount[0][0] if __name__ == '__main__': group, labels = createDataSet() test = [1,2] test_class = classify(test, group, labels, 3) print(test_class)
tips:數組
KNN距離公式以下:code
$$ L{p}(X{i},Y{i})=(\sum_{i=1}^{n}\left | X{i}^{(l)}-X{j}^{(l)} \right |^{p})^{\frac{1}{p}} $$索引
對於代碼值得注意的地方有 1.np.tile的使用ip
2.字典get的使用ci