迴歸(Regression):https://www.tensorflow.org/tutorials/keras/basic_regressionpython
主要步驟:
數據部分git
模型部分github
Attribute Information:api
回調函數是一個函數的合集,在訓練的階段中,用來查看訓練模型的內在狀態和統計。
在訓練時,相應的回調函數的方法就會被在各自的階段被調用。
通常是在model.fit函數中調用callbacks(參數爲callbacks,必須輸入list類型的數據)。
簡而言之,Callbacks用於指定在每一個epoch開始和結束的時候進行哪一種特定操做。網絡
EarlyStopping是Callbacks的一種,可用來加快學習的速度,提升調參效率。
使用一個EarlyStopping回調來測試每個迭代的訓練條件,若是某個迭代事後沒有顯示改進,自動中止訓練。dom
GitHub:https://github.com/anliven/Hello-AI/blob/master/Google-Learn-and-use-ML/3_basic_regression.py函數
1 # coding=utf-8 2 import tensorflow as tf 3 from tensorflow import keras 4 from tensorflow.python.keras import layers 5 import matplotlib.pyplot as plt 6 import pandas as pd 7 import seaborn as sns 8 import pathlib 9 import os 10 11 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 12 print("# TensorFlow version: {} - tf.keras version: {}".format(tf.VERSION, tf.keras.__version__)) # 查看版本 13 14 # ### 數據部分 15 # 獲取數據(Get the data) 16 ds_path = str(pathlib.Path.cwd()) + "\\datasets\\auto-mpg\\" 17 ds_file = keras.utils.get_file(fname=ds_path + "auto-mpg.data", origin="file:///" + ds_path) # 得到文件路徑 18 column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight', 'Acceleration', 'Model Year', 'Origin'] 19 raw_dataset = pd.read_csv(filepath_or_buffer=ds_file, # 數據的路徑 20 names=column_names, # 用於結果的列名列表 21 na_values="?", # 用於替換NA/NaN的值 22 comment='\t', # 標識着多餘的行不被解析(若是該字符出如今行首,這一行將被所有忽略) 23 sep=" ", # 分隔符 24 skipinitialspace=True # 忽略分隔符後的空白(默認爲False,即不忽略) 25 ) # 經過pandas導入數據 26 data_set = raw_dataset.copy() 27 print("# Data set tail:\n{}".format(data_set.tail())) # 顯示尾部數據 28 29 # 清洗數據(Clean the data) 30 print("# Summary of NaN:\n{}".format(data_set.isna().sum())) # 統計NaN值個數(NaN表明缺失值,可用isna()和notna()來檢測) 31 data_set = data_set.dropna() # 方法dropna()對缺失的數據進行過濾 32 origin = data_set.pop('Origin') # Origin"列是分類不是數值,轉換爲獨熱編碼(one-hot encoding) 33 data_set['USA'] = (origin == 1) * 1.0 34 data_set['Europe'] = (origin == 2) * 1.0 35 data_set['Japan'] = (origin == 3) * 1.0 36 data_set.tail() 37 print("# Data set tail:\n{}".format(data_set.tail())) # 顯示尾部數據 38 39 # 劃分訓練集和測試集(Split the data into train and test) 40 train_dataset = data_set.sample(frac=0.8, random_state=0) 41 test_dataset = data_set.drop(train_dataset.index) # 測試做爲模型的最終評估 42 43 # 檢查數據(Inspect the data) 44 sns.pairplot(train_dataset[["MPG", "Cylinders", "Displacement", "Weight"]], diag_kind="kde") 45 plt.figure(num=1) 46 plt.savefig("./outputs/sample-3-figure-1.png", dpi=200, format='png') 47 plt.show() 48 plt.close() 49 train_stats = train_dataset.describe() # 整體統計數據 50 train_stats.pop("MPG") 51 train_stats = train_stats.transpose() # 經過transpose()得到矩陣的轉置 52 print("# Train statistics:\n{}".format(train_stats)) 53 54 # 分離標籤(Split features from labels) 55 train_labels = train_dataset.pop('MPG') # 將要預測的值 56 test_labels = test_dataset.pop('MPG') 57 58 59 # 規範化數據(Normalize the data) 60 def norm(x): 61 return (x - train_stats['mean']) / train_stats['std'] 62 63 64 normed_train_data = norm(train_dataset) 65 normed_test_data = norm(test_dataset) 66 67 68 # ### 模型部分 69 # 構建模型(Build the model) 70 def build_model(): # 模型被包裝在此函數中 71 model = keras.Sequential([ # 使用Sequential模型 72 layers.Dense(64, activation=tf.nn.relu, input_shape=[len(train_dataset.keys())]), # 包含64個單元的全鏈接隱藏層 73 layers.Dense(64, activation=tf.nn.relu), # 包含64個單元的全鏈接隱藏層 74 layers.Dense(1)] # 一個輸出層,返回單個連續的值 75 ) 76 optimizer = tf.keras.optimizers.RMSprop(0.001) 77 model.compile(loss='mean_squared_error', # 損失函數 78 optimizer=optimizer, # 優化器 79 metrics=['mean_absolute_error', 'mean_squared_error'] # 在訓練和測試期間的模型評估標準 80 ) 81 return model 82 83 84 # 檢查模型(Inspect the model) 85 mod = build_model() # 建立模型 86 mod.summary() # 打印出關於模型的簡單描述 87 example_batch = normed_train_data[:10] # 從訓練集中截取10個做爲示例批次 88 example_result = mod.predict(example_batch) # 使用predict()方法進行預測 89 print("# Example result:\n{}".format(example_result)) 90 91 92 # 訓練模型(Train the model) 93 class PrintDot(keras.callbacks.Callback): 94 def on_epoch_end(self, epoch, logs): 95 if epoch % 100 == 0: 96 print('') 97 print('.', end='') # 每完成一次訓練打印一個「.」符號 98 99 100 EPOCHS = 1000 # 訓練次數 101 102 history = mod.fit(normed_train_data, 103 train_labels, 104 epochs=EPOCHS, # 訓練週期(訓練模型迭代輪次) 105 validation_split=0.2, # 用來指定訓練集的必定比例數據做爲驗證集(0~1之間的浮點數) 106 verbose=0, # 日誌顯示模式:0爲安靜模式, 1爲進度條(默認), 2爲每輪一行 107 callbacks=[PrintDot()] # 回調函數(在訓練過程當中的適當時機被調用) 108 ) # 返回一個history對象,包含一個字典,其中包括訓練期間發生的狀況(training and validation accuracy) 109 110 111 def plot_history(h, n=1): 112 """可視化模型訓練過程""" 113 hist = pd.DataFrame(h.history) 114 hist['epoch'] = h.epoch 115 print("\n# History tail:\n{}".format(hist.tail())) 116 117 plt.figure(num=n, figsize=(6, 8)) 118 119 plt.subplot(2, 1, 1) 120 plt.xlabel('Epoch') 121 plt.ylabel('Mean Abs Error [MPG]') 122 plt.plot(hist['epoch'], hist['mean_absolute_error'], label='Train Error') 123 plt.plot(hist['epoch'], hist['val_mean_absolute_error'], label='Val Error') 124 plt.ylim([0, 5]) 125 126 plt.subplot(2, 1, 2) 127 plt.xlabel('Epoch') 128 plt.ylabel('Mean Square Error [$MPG^2$]') 129 plt.plot(hist['epoch'], hist['mean_squared_error'], label='Train Error') 130 plt.plot(hist['epoch'], hist['val_mean_squared_error'], label='Val Error') 131 plt.ylim([0, 20]) 132 133 filename = "./outputs/sample-3-figure-" + str(n) + ".png" 134 plt.savefig(filename, dpi=200, format='png') 135 plt.show() 136 plt.close() 137 138 139 plot_history(history, 2) # 可視化 140 141 # 調試 142 model2 = build_model() 143 early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', 144 patience=10) # 指定提早中止訓練的callbacks 145 history2 = model2.fit(normed_train_data, 146 train_labels, 147 epochs=EPOCHS, 148 validation_split=0.2, 149 verbose=0, 150 callbacks=[early_stop, PrintDot()]) # 當沒有改進時自動中止訓練(經過EarlyStopping) 151 plot_history(history2, 3) 152 loss, mae, mse = model2.evaluate(normed_test_data, test_labels, verbose=0) 153 print("# Testing set Mean Abs Error: {:5.2f} MPG".format(mae)) # 測試集上的MAE值 154 155 # 作出預測(Make predictions) 156 test_predictions = model2.predict(normed_test_data).flatten() # 使用測試集中數據進行預測 157 plt.figure(num=4, figsize=(6, 8)) 158 plt.scatter(test_labels, test_predictions) 159 plt.xlabel('True Values [MPG]') 160 plt.ylabel('Predictions [MPG]') 161 plt.axis('equal') 162 plt.axis('square') 163 plt.xlim([0, plt.xlim()[1]]) 164 plt.ylim([0, plt.ylim()[1]]) 165 plt.plot([-100, 100], [-100, 100]) 166 plt.savefig("./outputs/sample-3-figure-4.png", dpi=200, format='png') 167 plt.show() 168 plt.close() 169 170 error = test_predictions - test_labels 171 plt.figure(num=5, figsize=(6, 8)) 172 plt.hist(error, bins=25) # 經過直方圖來展現錯誤的分佈狀況 173 plt.xlabel("Prediction Error [MPG]") 174 plt.ylabel("Count") 175 plt.savefig("./outputs/sample-3-figure-5.png", dpi=200, format='png') 176 plt.show() 177 plt.close()
C:\Users\anliven\AppData\Local\conda\conda\envs\mlcc\python.exe D:/Anliven/Anliven-Code/PycharmProjects/Google-Learn-and-use-ML/3_basic_regression.py
# TensorFlow version: 1.12.0 - tf.keras version: 2.1.6-tf
# Data set tail:
MPG Cylinders Displacement ... Acceleration Model Year Origin
393 27.0 4 140.0 ... 15.6 82 1
394 44.0 4 97.0 ... 24.6 82 2
395 32.0 4 135.0 ... 11.6 82 1
396 28.0 4 120.0 ... 18.6 82 1
397 31.0 4 119.0 ... 19.4 82 1
[5 rows x 8 columns]
# Summary of NaN:
MPG 0
Cylinders 0
Displacement 0
Horsepower 6
Weight 0
Acceleration 0
Model Year 0
Origin 0
dtype: int64
# Data set tail:
MPG Cylinders Displacement ... USA Europe Japan
393 27.0 4 140.0 ... 1.0 0.0 0.0
394 44.0 4 97.0 ... 0.0 1.0 0.0
395 32.0 4 135.0 ... 1.0 0.0 0.0
396 28.0 4 120.0 ... 1.0 0.0 0.0
397 31.0 4 119.0 ... 1.0 0.0 0.0
[5 rows x 10 columns]
# Train statistics:
count mean std ... 50% 75% max
Cylinders 314.0 5.477707 1.699788 ... 4.0 8.00 8.0
Displacement 314.0 195.318471 104.331589 ... 151.0 265.75 455.0
Horsepower 314.0 104.869427 38.096214 ... 94.5 128.00 225.0
Weight 314.0 2990.251592 843.898596 ... 2822.5 3608.00 5140.0
Acceleration 314.0 15.559236 2.789230 ... 15.5 17.20 24.8
Model Year 314.0 75.898089 3.675642 ... 76.0 79.00 82.0
USA 314.0 0.624204 0.485101 ... 1.0 1.00 1.0
Europe 314.0 0.178344 0.383413 ... 0.0 0.00 1.0
Japan 314.0 0.197452 0.398712 ... 0.0 0.00 1.0
[9 rows x 8 columns]
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense (Dense) (None, 64) 640
_________________________________________________________________
dense_1 (Dense) (None, 64) 4160
_________________________________________________________________
dense_2 (Dense) (None, 1) 65
=================================================================
Total params: 4,865
Trainable params: 4,865
Non-trainable params: 0
_________________________________________________________________
# Example result:
[[0.3783294 ]
[0.17875314]
[0.68095654]
[0.45696187]
[1.4998233 ]
[0.05698915]
[1.4138494 ]
[0.7885587 ]
[0.10802953]
[1.3029677 ]]
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
# History tail:
val_loss val_mean_absolute_error ... mean_squared_error epoch
995 9.350584 2.267639 ... 2.541113 995
996 9.191998 2.195405 ... 2.594836 996
997 9.559576 2.384058 ... 2.576047 997
998 8.791337 2.145222 ... 2.782730 998
999 9.088490 2.227165 ... 2.425531 999
[5 rows x 7 columns]
.........................................................................................
# History tail:
val_loss val_mean_absolute_error ... mean_squared_error epoch
84 8.258534 2.233329 ... 6.221810 84
85 8.328515 2.208959 ... 6.213853 85
86 8.420452 2.224991 ... 6.427011 86
87 8.418247 2.215443 ... 6.178523 87
88 8.437484 2.193801 ... 6.183405 88
[5 rows x 7 columns]
# Testing set Mean Abs Error: 1.88 MPG
Process finished with exit code 0
問題描述
在Anaconda3建立的運行環境中,執行「import tensorflow.keras import layers」失敗,提示「Unresolved reference」
學習
處理方法
改寫爲「from tensorflow.python.keras import layers」
導入包時,須要根據實際的具體位置進行導入。
確認TensorFlow中Keras的實際位置:「D:\DownLoadFiles\anaconda3\envs\mlcc\Lib\site-packages\tensorflow\python\keras\」。
實際上多了一層目錄「python」,因此正確的導入方式爲「from tensorflow.python.keras import layers」。測試
參考信息
https://stackoverflow.com/questions/47262955/how-to-import-keras-from-tf-keras-in-tensorflowfetch
問題描述
執行keras.utils.get_file("auto-mpg.data", "https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data")報錯:
Downloading data from https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data
Traceback (most recent call last):
......
Exception: URL fetch failure on https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data: None -- [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond
處理方法「網絡」的緣由,致使沒法下載。手工下載,而後放置在當前目錄,從當前目錄地址導入數據文件。