Knn算法實現javascript
import numpy as np
import pandas as pd
#這裏直接引入sklearn裏面的數據集,iris 鳶尾花
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split # 切分數據集爲訓練集和測試集
from sklearn.metrics import accuracy_score #計算分類預測的準確率
iris = load_iris()
df = pd.DataFrame(data=iris.data, columns = iris.feature_names)
df['class'] = iris.target
df['class'] = df['class'].map( lambda i:iris.target_names[i] )
df.describe()
x = iris.data
y = iris.target.reshape(-1,1)
#劃分訓練接和測試集
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=35,stratify = y)
arr=np.argsort(np.array([1,5,3,4]))[:3]
test=[np.array([1,5,3,4])[a] for a in arr]
test_2=np.array([1,5,3,4])[arr]
test_2.tolist().count(1)
np.argmax([1,5,3,4])
# np.bincount([1,1,2,3,'1x'])
# 距離函數定義
def l1_distance(a,b):
return np.sum(np.abs(a-b),axis=1)
def l2_distance(a,b):
return np.sqrt(np.sum((a-b)**2,axis=1))
# 分類器實現
class kNN(object):
#定義一個初始化方法, __init__ 是類的構造方法
def __init__(self,n_neighbors=1,dist_func= l1_distance):
self.n_neighbors=n_neighbors
self.dist_func=dist_func
# 訓練模型的方法
def fit(self,x,y):
self.x_train = x
self.y_train = y
# 模型預測
def predict(self, x):
# 初始化預測分類數組
y_pred = np.zeros((x.shape[0],1),dtype=self.y_train.dtype)
#遍歷輸入的x數據點
for i,x_test in enumerate(x):
# x_test和全部訓練數據計算距離
distances=self.dist_func(self.x_train,x_test)
# 對獲得的距離按照由近到遠排序
nn_indexes=np.argsort(distances)[:self.n_neighbors]
#選取其中最近的k個點,統計類別出現頻率最高的那個,賦給y_predict[i]
# y_res=[y_train[a] for a in nn_indexes]
y_res=y_train[nn_indexes].ravel().tolist()
# y_pred[i] = np.argmax([y_res.count(0),y_res.count(1),y_res.count(2)])
y_pred[i] = np.argmax(np.bincount(y_res))
return y_pred
kNN_model=kNN(n_neighbors=5,dist_func= l1_distance)
kNN_model.fit(x_train,y_train)
y_pred=kNN_model.predict(x_test)
accuracy_score(y_test,y_pred)
#比對各個參數的好壞
knn=kNN()
knn.fit(x_train,y_train)
result_list=[]
for p in [1,2]:
knn.dist_func=l1_distance if p==1 else l2_distance
#考慮不一樣的k取值
for k in range(1,10,2):
knn.n_neighbors=k
y_pred=knn.predict(x_test)
accuracy= accuracy_score(y_test,y_pred)
print(accuracy)
result_list.append([knn.n_neighbors,knn.dist_func.__name__,accuracy])
df = pd.DataFrame(result_list,columns=['k',"距離函數","準確率"])
df