10,knn手寫數字識別

# 導包
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier

# 獲取數據
feature = []
target = []
for i in range(10):
    for j in range(1,501):
        img_arr = plt.imread('F:/data/%d/%d_%d.bmp'%(i,i,j))
        feature.append(img_arr)
        target.append(i)

feature = np.array(feature)
target = np.array(target)
print(feature.shape,target.shape)

# 測試結果
index = np.random.randint(0,5000,size=1)[0]
print('該索引對應的目標值',target[index])
digit = feature[index]
plt.figure(figsize=(2,2))
plt.imshow(digit,cmap='gray')

# 打亂數據順序
np.random.seed(3) #按照同一標準打亂
np.random.shuffle(feature)

np.random.seed(3)
np.random.shuffle(target)

# 分別獲取訓練,測試數據
x_train = feature[:4950]
y_train = target[:4950]
x_test = feature[-50:]
y_test = target[-50:]
x_train.shape   #(4950, 28, 28)


# 特徵數據必須保證是二維
x_train = x_train.reshape(4950,784)
#像素點一共784個,倒着數爲-1
x_test = x_test.reshape(50,-1)

# 創建knn對象
knn = KNeighborsClassifier(n_neighbors=15)
knn.fit(x_train,y_train)
knn.score(x_train,y_train)

# 比對結果
y_ = knn.predict(x_test)
print('真實:',y_test)
print('預測:',y_)

#模型保存
from sklearn.externals import joblib
joblib.dump(knn,'./knn.m')

knn = joblib.load('./knn.m')
# 讓模型進行外部模型的識別操做
img_arr = plt.imread('F:/數字.jpg')
plt.imshow(img_arr)

five_img = img_arr[95:150,85:130]
plt.imshow(five_img)
five_img.shape
# 對目標照片進行降維
five_img = five_img.mean(axis=2)


# 將照片的像素壓縮成和樣本一樣的像素,即28*28
import scipy.ndimage as ndimage
five_img.shape
five = ndimage.zoom(five_img,zoom=(28/55,28/45))

five.shape
knn.predict(five.reshape(1,784))
最終得到結果
相關文章
相關標籤/搜索