import numpy as np import matplotlib.pyplot as plt from sklearn.linear_model import Ridge from sklearn.metrics import r2_score, mean_squared_error from sklearn.neighbors import KNeighborsRegressor from sklearn.preprocessing import StandardScaler X_train = np.array([ [158, 1], [170, 1], [183, 1], [191, 1], [155, 0], [163, 0], [180, 0], [158, 0], [170, 0] ]) ss = StandardScaler() X_trainss = ss.fit_transform(X_train) X_train_log = np.log(X_train + 1) y_train = [64, 86, 84, 80, 49, 59, 67, 54, 67] X_test = np.array([ [160, 1], [196, 1], [168, 0], [177, 0] ]) X_testss = ss.transform(X_test) X_test_log = np.log(X_test + 1) y_test = [66, 87, 68, 74] K = 5 clf = KNeighborsRegressor(n_neighbors=K) clf.fit(X_train, y_train) clf1 = KNeighborsRegressor(n_neighbors=K) clf1.fit(X_trainss, y_train) clf2 = Ridge().fit(X_train_log, y_train) predictions = clf.predict(np.array(X_test)) predictions1 = clf1.predict(X_testss) predictions2 = clf2.predict(X_test_log) print('Actual weights: %s' % y_test) print('Predicted weights: %s' % predictions) print('Predicted weights StandardScaler: %s' % predictions1) print('Predicted weights Log: %s' % predictions2) print(mean_squared_error(y_test, predictions)) print(mean_squared_error(y_test, predictions1)) print(mean_squared_error(y_test, predictions2)) print(r2_score(y_test, predictions)) print(r2_score(y_test, predictions1)) print(r2_score(y_test, predictions2))
結果是:機器學習
Actual weights: [66, 87, 68, 74]
Predicted weights: [62.4 76.8 66. 72.6]
Predicted weights by StandardScaler: [69.4 76.8 59.2 59.2]
Predicted weights by Log: [72.98731557 73.88528401 63.37281696 63.60369452]
mean_squared_error: 30.740000000000023
mean_squared_error by StandardScaler: 103.02
mean_squared_error by Log: 87.57808624078896
0.5424744186046508
-0.5333209302325581
-0.30348779521174274學習
Process finished with exit code 0spa
咱們發現特徵值通過標準化和對數化的轉換後,預測均方差偏移反而更大了。本例來自《scikit-learn機器學習》的第三章的最後一個例子的延展,因此書中的例子感受還有缺陷,緣由是樣本太少了。code