Python全棧開發/人工智能公開課_騰訊課堂python
https://ke.qq.com/course/190378git
https://github.com/haoran119/ke.qq.com.python/tree/master/src/python-fullstackgithub
1 # coding: utf-8 2 3 # In[1]: 4 5 6 import numpy as np 7 8 9 # In[2]: 10 11 12 # 建立數組 13 14 list1 = [ 1, 3, 5, -2, 0, -9 ] 15 list2 = [ 2, 4, -3, -7, 1, -7 ] 16 list3 = [ [2, 5, 0], [11, 3, 4] ] 17 list4 = [ [3, -1, 8], [9, -3, 9] ] 18 19 20 # In[3]: 21 22 23 arr1 = np.array( list1 ) 24 25 #[ 1 3 5 -2 0 -9] 26 print(arr1) 27 28 29 # In[4]: 30 31 32 arr4 = np.array( list3 ) 33 34 #[[ 2 5 0] 35 # [11 3 4]] 36 print(arr4) 37 38 39 # In[5]: 40 41 42 arr2 = np.arange( 1, 10, 2 ) 43 44 #[1 3 5 7 9] 45 print(arr2) 46 47 48 # In[6]: 49 50 51 arr3 = np.linspace( 1, 10, 4 ) 52 53 #[ 1. 4. 7. 10.] 54 print(arr3) 55 56 57 # In[7]: 58 59 60 arr_zero = np.zeros( (3, 4)) # zeros參數是元組() 61 62 #[[0. 0. 0. 0.] 63 # [0. 0. 0. 0.] 64 # [0. 0. 0. 0.]] 65 print(arr_zero) 66 67 68 # In[8]: 69 70 71 arr_one = np.ones( (3, 3) ) 72 73 #[[1. 1. 1.] 74 # [1. 1. 1.] 75 # [1. 1. 1.]] 76 #[[100. 100. 100.] 77 # [100. 100. 100.] 78 # [100. 100. 100.]] 79 print(arr_one) 80 print(arr_one * 100) 81 82 83 # In[9]: 84 85 86 arr_eye = np.eye( 4, 4 ) # 對角線上元素爲1,其餘爲0 87 88 #[[1. 0. 0. 0.] 89 # [0. 1. 0. 0.] 90 # [0. 0. 1. 0.] 91 # [0. 0. 0. 1.]] 92 print(arr_eye) 93 94 95 # In[10]: 96 97 98 arr_eye2 = np.eye( 4, 5 ) 99 100 #[[1. 0. 0. 0. 0.] 101 # [0. 1. 0. 0. 0.] 102 # [0. 0. 1. 0. 0.] 103 # [0. 0. 0. 1. 0.]] 104 print(arr_eye2) 105 106 107 # In[11]: 108 109 110 # 數組的索引和切片 111 112 #[ 5 -2 0] 113 #[[3 4]] 114 print(arr1[2:5]) # 左閉右開 115 print(arr4[1:2, 1:3]) 116 117 118 # In[12]: 119 120 121 # 通用的函數 122 123 #sqrt : 124 # [[1.41421356 2.23606798 0. ] 125 # [3.31662479 1.73205081 2. ]] 126 #exp : 127 # [[7.38905610e+00 1.48413159e+02 1.00000000e+00] 128 # [5.98741417e+04 2.00855369e+01 5.45981500e+01]] 129 print("sqrt : \n", np.sqrt(arr4)) 130 print("exp : \n", np.exp(arr4)) 131 132 133 # In[13]: 134 135 136 arr2 = np.array( list2 ) 137 new_arr = np.maximum( arr1, arr2 ) 138 139 #[ 2 4 5 -2 1 -7] 140 print(new_arr) 141 142 143 # In[14]: 144 145 146 # ReLU >0 保留原值,<0 取0 147 new_arr = np.maximum(0, arr1) 148 149 #[1 3 5 0 0 0] 150 print(new_arr) 151 152 153 # In[15]: 154 155 #(array([[0.41421356, 0.23606798, 0. ], 156 # [0.31662479, 0.73205081, 0. ]]), array([[1., 2., 0.], 157 # [3., 1., 2.]])) 158 print( np.modf( np.sqrt( arr4 ) ) ) # 把整數部分和小數部分,生成兩個獨立的數組 159 160 161 # In[16]: 162 163 164 new_arr1 = np.where( arr2>0, 'True', 'False' ) # if condition: x, y 165 166 #['True' 'True' 'False' 'False' 'True' 'False'] 167 print(new_arr1) 168 169 170 # In[17]: 171 172 #[-7 -3 1 2 4] 173 #[ 0 2 3 4 5 11] 174 print( np.unique( arr2 ) ) 175 print( np.unique( arr4 ) ) 176 177 178 # In[18]: 179 180 181 # 數組做爲文件來輸入和輸出 182 183 np.save( 'myarr', arr2 ) # 把數組保存爲文件 .npy 184 185 186 # In[19]: 187 188 189 new_arr2 = np.load( 'myarr.npy' ) 190 191 #[ 2 4 -3 -7 1 -7] 192 print(new_arr2) 193 194 195 # In[20]: 196 197 198 np.savez( 'myarrzip', a1=arr1, a2=arr2, a3=arr3 ) 199 arr = np.load( 'myarrzip.npz' ) 200 201 #[ 1 3 5 -2 0 -9] 202 print(arr['a1']) 203 204 205 # In[21]: 206 207 208 # 線性代數 矩陣 209 # 矩陣的合併 210 211 arr5 = np.array( list3 ) 212 arr6 = np.array( list4 ) 213 214 #[[ 2 5 0 3 -1 8] 215 # [11 3 4 9 -3 9]] 216 #[[ 2 5 0] 217 # [11 3 4] 218 # [ 3 -1 8] 219 # [ 9 -3 9]] 220 print( np.hstack( [arr5, arr6] ) ) 221 print( np.vstack( [arr5, arr6] ) ) 222 223 224 # In[22]: 225 226 227 # 點乘 228 229 arr6 = np.array( list4 ).reshape( 3, 2 ) 230 231 #[[ 3 -1] 232 # [ 8 9] 233 # [-3 9]] 234 #[[ -5 12 -4] 235 # [115 67 36] 236 # [ 93 12 36]] 237 print( arr6 ) 238 print( arr6.dot( arr5 ) ) 239 240 241 # In[23]: 242 243 #[[ 3 8 -3] 244 # [-1 9 9]] 245 print( np.transpose(arr6) ) # 轉置
1 # coding: utf-8 2 3 # In[1]: 4 5 6 import pandas as pd 7 import numpy as np 8 9 10 # In[2]: 11 12 13 data = pd.DataFrame(pd.read_excel('originalData.xlsx')) 14 15 # date hour pressure wind_direction temperature 16 #0 2016-07-01 0.0 1000.4 225.0 26.4 17 #1 2016-07-01 NaN NaN NaN NaN 18 #2 2016-07-01 6.0 998.9 212.0 31.7 19 #3 2016-07-01 235.0 998.7 244.0 NaN 20 #4 2016-07-01 12.0 999.7 222.0 NaN 21 #5 2016-07-01 15.0 1000.0 102.0 NaN 22 #6 2016-07-01 NaN 998.8 202.0 26.0 23 #7 2016-07-01 NaN 1000.2 334.0 25.5 24 #8 2016-07-01 NaN 1000.2 334.0 25.5 25 #9 2016-07-02 3.0 1002.4 46.0 30.0 26 #10 2016-07-02 6.0 1001.3 37.0 29.3 27 #11 2016-07-02 9.0 1001.9 345.0 25.9 28 #12 2016-07-02 12.0 1003.6 113.0 25.1 29 #13 2016-07-02 12.0 1003.6 113.0 25.1 30 #14 2016-07-02 15.0 1002.4 138.0 25.3 31 # hour pressure wind_direction temperature 32 #count 11.000000 14.000000 14.000000 11.000000 33 #mean 29.545455 1000.864286 190.500000 26.890909 34 #std 68.313049 1.685963 102.932951 2.311473 35 #min 0.000000 998.700000 37.000000 25.100000 36 #25% 6.000000 999.775000 113.000000 25.400000 37 #50% 12.000000 1000.300000 207.000000 25.900000 38 #75% 13.500000 1002.275000 239.250000 27.850000 39 #max 235.000000 1003.600000 345.000000 31.700000 40 print(data) 41 print(data.describe()) 42 43 44 # In[3]: 45 46 #RangeIndex(start=0, stop=15, step=1) 47 #Index(['date', 'hour', 'pressure', 'wind_direction', 'temperature'], dtype='object') 48 print(data.index) 49 print(data.columns) 50 51 52 # In[4]: 53 54 # date hour pressure wind_direction temperature 55 #0 2016-07-01 0.0 1000.4 225.0 26.4 56 #1 2016-07-01 NaN NaN NaN NaN 57 #2 2016-07-01 6.0 998.9 212.0 31.7 58 #3 2016-07-01 235.0 998.7 244.0 NaN 59 #4 2016-07-01 12.0 999.7 222.0 NaN 60 #5 2016-07-01 15.0 1000.0 102.0 NaN 61 # date hour pressure wind_direction temperature 62 #9 2016-07-02 3.0 1002.4 46.0 30.0 63 #10 2016-07-02 6.0 1001.3 37.0 29.3 64 #11 2016-07-02 9.0 1001.9 345.0 25.9 65 #12 2016-07-02 12.0 1003.6 113.0 25.1 66 #13 2016-07-02 12.0 1003.6 113.0 25.1 67 #14 2016-07-02 15.0 1002.4 138.0 25.3 68 print(data.head(6)) 69 print(data.tail(6)) 70 71 72 # In[5]: 73 74 75 # 1. 刪掉空白值超過3的行 76 data.dropna(axis=0, thresh=3, inplace=True) 77 data.reset_index(drop=True, inplace=True) 78 79 # date hour pressure wind_direction temperature 80 #0 2016-07-01 0.0 1000.4 225.0 26.4 81 #1 2016-07-01 6.0 998.9 212.0 31.7 82 #2 2016-07-01 235.0 998.7 244.0 NaN 83 #3 2016-07-01 12.0 999.7 222.0 NaN 84 #4 2016-07-01 15.0 1000.0 102.0 NaN 85 #5 2016-07-01 NaN 998.8 202.0 26.0 86 #6 2016-07-01 NaN 1000.2 334.0 25.5 87 #7 2016-07-01 NaN 1000.2 334.0 25.5 88 #8 2016-07-02 3.0 1002.4 46.0 30.0 89 #9 2016-07-02 6.0 1001.3 37.0 29.3 90 #10 2016-07-02 9.0 1001.9 345.0 25.9 91 #11 2016-07-02 12.0 1003.6 113.0 25.1 92 #12 2016-07-02 12.0 1003.6 113.0 25.1 93 #13 2016-07-02 15.0 1002.4 138.0 25.3 94 print(data) 95 96 97 # In[6]: 98 99 100 # 2. 填充空白,hour填充10,temperature填充25.5 101 data.fillna({'hour':10, 'temperature':25.5}, inplace=True) 102 103 # date hour pressure wind_direction temperature 104 #0 2016-07-01 0.0 1000.4 225.0 26.4 105 #1 2016-07-01 6.0 998.9 212.0 31.7 106 #2 2016-07-01 235.0 998.7 244.0 25.5 107 #3 2016-07-01 12.0 999.7 222.0 25.5 108 #4 2016-07-01 15.0 1000.0 102.0 25.5 109 #5 2016-07-01 10.0 998.8 202.0 26.0 110 #6 2016-07-01 10.0 1000.2 334.0 25.5 111 #7 2016-07-01 10.0 1000.2 334.0 25.5 112 #8 2016-07-02 3.0 1002.4 46.0 30.0 113 #9 2016-07-02 6.0 1001.3 37.0 29.3 114 #10 2016-07-02 9.0 1001.9 345.0 25.9 115 #11 2016-07-02 12.0 1003.6 113.0 25.1 116 #12 2016-07-02 12.0 1003.6 113.0 25.1 117 #13 2016-07-02 15.0 1002.4 138.0 25.3 118 print(data) 119 120 121 # In[7]: 122 123 124 # 3. 刪掉hour>24的行 125 num = data.index.max() 126 127 for i in range(num): 128 if data.loc[i, 'hour'] > 24: 129 data.drop([i], inplace=True) 130 print('hour > 24, deleted') 131 132 data.reset_index(drop=True, inplace=True) 133 134 #hour > 24, deleted 135 # date hour pressure wind_direction temperature 136 #0 2016-07-01 0.0 1000.4 225.0 26.4 137 #1 2016-07-01 6.0 998.9 212.0 31.7 138 #2 2016-07-01 12.0 999.7 222.0 25.5 139 #3 2016-07-01 15.0 1000.0 102.0 25.5 140 #4 2016-07-01 10.0 998.8 202.0 26.0 141 #5 2016-07-01 10.0 1000.2 334.0 25.5 142 #6 2016-07-01 10.0 1000.2 334.0 25.5 143 #7 2016-07-02 3.0 1002.4 46.0 30.0 144 #8 2016-07-02 6.0 1001.3 37.0 29.3 145 #9 2016-07-02 9.0 1001.9 345.0 25.9 146 #10 2016-07-02 12.0 1003.6 113.0 25.1 147 #11 2016-07-02 12.0 1003.6 113.0 25.1 148 #12 2016-07-02 15.0 1002.4 138.0 25.3 149 print(data) 150 151 152 # In[8]: 153 154 155 # 4. 刪掉重複的數據行,保留出現的第一行(所有刪掉?保留最後一行?) 156 data.drop_duplicates(keep='first', inplace=True) 157 data.reset_index(drop=True, inplace=True) 158 159 # date hour pressure wind_direction temperature 160 #0 2016-07-01 0.0 1000.4 225.0 26.4 161 #1 2016-07-01 6.0 998.9 212.0 31.7 162 #2 2016-07-01 12.0 999.7 222.0 25.5 163 #3 2016-07-01 15.0 1000.0 102.0 25.5 164 #4 2016-07-01 10.0 998.8 202.0 26.0 165 #5 2016-07-01 10.0 1000.2 334.0 25.5 166 #6 2016-07-02 3.0 1002.4 46.0 30.0 167 #7 2016-07-02 6.0 1001.3 37.0 29.3 168 #8 2016-07-02 9.0 1001.9 345.0 25.9 169 #9 2016-07-02 12.0 1003.6 113.0 25.1 170 #10 2016-07-02 15.0 1002.4 138.0 25.3 171 print(data) 172 173 174 # In[9]: 175 176 177 # 5. 數據重排 178 randnum = np.random.permutation(data.index.size) 179 180 #[ 4 0 10 3 1 5 8 9 7 2 6] 181 print(randnum) 182 183 184 # In[10]: 185 186 187 data2 = data.take(randnum) 188 189 # date hour pressure wind_direction temperature 190 #4 2016-07-01 10.0 998.8 202.0 26.0 191 #0 2016-07-01 0.0 1000.4 225.0 26.4 192 #10 2016-07-02 15.0 1002.4 138.0 25.3 193 #3 2016-07-01 15.0 1000.0 102.0 25.5 194 #1 2016-07-01 6.0 998.9 212.0 31.7 195 #5 2016-07-01 10.0 1000.2 334.0 25.5 196 #8 2016-07-02 9.0 1001.9 345.0 25.9 197 #9 2016-07-02 12.0 1003.6 113.0 25.1 198 #7 2016-07-02 6.0 1001.3 37.0 29.3 199 #2 2016-07-01 12.0 999.7 222.0 25.5 200 #6 2016-07-02 3.0 1002.4 46.0 30.0 201 print(data2) 202 203 204 # In[11]: 205 206 207 # 6. 隨機採樣 208 data3 = data.sample(8) 209 210 # date hour pressure wind_direction temperature 211 #4 2016-07-01 10.0 998.8 202.0 26.0 212 #0 2016-07-01 0.0 1000.4 225.0 26.4 213 #2 2016-07-01 12.0 999.7 222.0 25.5 214 #1 2016-07-01 6.0 998.9 212.0 31.7 215 #5 2016-07-01 10.0 1000.2 334.0 25.5 216 #9 2016-07-02 12.0 1003.6 113.0 25.1 217 #7 2016-07-02 6.0 1001.3 37.0 29.3 218 #8 2016-07-02 9.0 1001.9 345.0 25.9 219 print(data3) 220 data3.to_csv('data3.csv')
1 # coding: utf-8 2 3 # In[1]: 4 5 6 import matplotlib.pyplot as plt 7 import numpy as np 8 from mpl_toolkits.mplot3d import Axes3D 9 10 11 # In[2]: 12 13 14 # 1. 線形圖 y = ax + b 15 x = np.linspace(1, 21, 20) 16 y = 2 * x + 3 17 y2 = np.sin(x) 18 19 plt.plot(x, y, 'm^:', x, y2) 20 21 plt.show() 22 23 24 # In[3]: 25 26 27 # 2. 散點圖 28 n = 1024 29 x = np.random.normal(0, 1, n) #1024個符合高斯分佈的值 30 y = np.random.normal(0, 1, n) 31 32 plt.scatter(x, y, s=np.random.rand(n)*50, c=np.random.rand(n), alpha=0.7) 33 34 plt.show() 35 36 37 # In[4]: 38 39 40 # 3. 柱狀圖 41 n = 10 42 x = np.arange(n) 43 y1 = (1 - x / float(n)) * np.random.uniform(0.5, 1.0, n) 44 y2 = (1 - x / float(n)) * np.random.uniform(0.5, 1.0, n) 45 46 plt.bar(x, y1, facecolor='red', edgecolor='white') 47 plt.bar(x, -y2, facecolor='blue', edgecolor='black') 48 49 for xx, y in zip(x, y1): 50 plt.text(xx, y + 0.1, '%0.2f'%y, ha='center', va='bottom') 51 52 for xx, y in zip(x, -y2): 53 plt.text(xx, y - 0.1, '%0.2f'%y, ha='center', va='bottom') 54 55 plt.ylim(-1.5, 1.5) 56 57 plt.show() 58 59 60 # In[5]: 61 62 63 # 4. 3D 64 fig = plt.figure(figsize=(12, 8)) 65 ax = Axes3D(fig) 66 x = np.arange(-4, 4, 0.25) 67 y = np.arange(-4, 4, 0.25) 68 69 x, y = np.meshgrid(x, y) 70 #[[-4. -3.75 -3.5 ... 3.25 3.5 3.75] 71 # [-4. -3.75 -3.5 ... 3.25 3.5 3.75] 72 # [-4. -3.75 -3.5 ... 3.25 3.5 3.75] 73 # ... 74 # [-4. -3.75 -3.5 ... 3.25 3.5 3.75] 75 # [-4. -3.75 -3.5 ... 3.25 3.5 3.75] 76 # [-4. -3.75 -3.5 ... 3.25 3.5 3.75]] 77 print(x) 78 #[[-4. -4. -4. ... -4. -4. -4. ] 79 # [-3.75 -3.75 -3.75 ... -3.75 -3.75 -3.75] 80 # [-3.5 -3.5 -3.5 ... -3.5 -3.5 -3.5 ] 81 # ... 82 # [ 3.25 3.25 3.25 ... 3.25 3.25 3.25] 83 # [ 3.5 3.5 3.5 ... 3.5 3.5 3.5 ] 84 # [ 3.75 3.75 3.75 ... 3.75 3.75 3.75]] 85 print(y) 86 87 z = np.sin(np.sqrt(x**2 + y**2)) 88 89 ax.plot_surface(x, y, z, cmap=plt.get_cmap('autumn')) 90 91 plt.show() 92 93 94 # In[6]: 95 96 97 # 5. 一圖多畫 98 x = np.linspace(0, 5, 5) 99 y1 = x**2 100 y2 = 2 * x 101 y3 = np.sin(x) 102 y4 = np.cos(x) 103 104 ax1 = plt.subplot(221) 105 plt.plot(x, y1) 106 ax2 = plt.subplot(2, 2, 2) 107 plt.plot(x, y2) 108 ax3 = plt.subplot(223) 109 plt.plot(x, y3) 110 ax4 = plt.subplot(2, 2, 4) 111 #plt.plot(x, y4) 112 113 plt.show()
KNN分類算法的分類預測過程算法
對於一個須要預測的輸入向量x,只須要在訓練數據集中尋找k個與向量x最近的向量的集合,而後把x的類標預測爲這k個樣本中類標數最多的那一類。數據庫
1 # coding: utf-8 2 3 # In[1]: 4 5 6 from sklearn.model_selection import train_test_split 7 from sklearn.neighbors import KNeighborsClassifier 8 from sklearn.metrics import accuracy_score 9 from sklearn import datasets 10 11 12 # In[2]: 13 14 15 """ 16 手寫體數字,監督學習 17 一、樣本集:一批手寫體數字的圖片,帶標籤(0-9),10類 18 樣本數據量爲1797,存在sklearn的datasets裏。 19 每個數據樣本都是由image, target兩部分組成。 20 image是一個尺寸爲8*8的圖像(手寫的數字0-9), 21 target是圖像的類別(數字0-9)。 22 二、劃分訓練集和測試集 23 三、選一個算法,構建模型,KNN 24 四、訓練模型 25 五、預測、驗證 26 六、模型優化(SVM, 決策樹) 27 七、保存模型(.model, load, predict) 28 八、新建多張手寫體圖片,讓模型來識別新的圖片 29 """ 30 sample_data = datasets.load_digits() 31 images = sample_data.data 32 labels = sample_data.target 33 34 35 # In[3]: 36 37 38 #劃分訓練集和測試集 39 train_data, test_data, train_labels, test_labels = train_test_split(images, labels, test_size=0.1) 40 41 42 # In[4]: 43 44 45 #選擇模型 46 model_knn = KNeighborsClassifier(n_neighbors=4, algorithm='auto', weights='distance') 47 48 49 # In[5]: 50 51 52 #訓練模型 53 model_knn.fit(train_data, train_labels) 54 #KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski', 55 # metric_params=None, n_jobs=1, n_neighbors=4, p=2, 56 # weights='distance') 57 58 # In[6]: 59 60 61 #預測、驗證 62 pred = model_knn.predict(test_data) 63 print("pred : \n", pred) 64 print("test_labels : \n", test_labels) 65 #pred : 66 # [3 4 1 4 4 0 0 8 2 9 8 9 6 1 3 3 7 8 5 1 3 2 1 2 7 4 8 5 7 1 0 2 4 0 7 3 1 67 # 5 3 4 6 2 5 1 6 3 4 5 4 9 3 6 5 0 0 4 5 2 0 7 7 6 5 1 2 9 9 2 7 6 3 2 3 8 68 # 6 7 6 4 0 2 2 8 8 8 5 0 2 0 4 2 2 0 6 6 6 0 9 8 9 5 3 8 5 7 9 6 3 0 3 9 5 69 # 1 0 9 6 7 0 1 5 3 0 3 4 9 2 3 8 2 2 5 7 2 6 2 7 3 1 4 5 9 9 6 6 9 7 1 3 7 70 # 1 9 8 6 9 9 6 5 0 5 6 9 7 7 5 0 3 8 5 9 2 0 9 3 1 2 9 3 7 6 9 6] 71 #test_labels : 72 # [3 4 1 4 4 0 0 8 2 9 8 9 6 1 3 3 7 8 5 1 3 2 1 2 7 4 8 5 7 1 0 2 4 0 7 3 1 73 # 5 3 4 6 2 5 1 6 3 4 5 4 9 3 6 5 0 0 4 5 2 0 7 7 6 5 1 2 9 9 2 7 6 3 2 3 8 74 # 6 7 6 4 0 2 2 8 8 8 5 0 2 0 4 2 2 0 6 6 6 0 9 8 7 5 3 8 5 7 9 6 3 0 3 9 5 75 # 1 0 9 6 7 0 1 5 3 0 3 4 9 2 3 8 2 2 5 7 2 6 2 7 3 1 4 5 9 9 6 6 9 7 1 3 7 76 # 1 9 8 6 9 9 6 5 0 5 6 9 7 7 5 0 3 8 5 9 2 0 9 3 1 2 9 3 7 6 9 6] 77 78 # In[7]: 79 80 81 #查看準確率 82 acc = accuracy_score(pred, test_labels) 83 print("Accuracy rate : %.3f" % acc) 84 #Accuracy rate : 0.994
1 # coding: utf-8 2 3 # In[1]: 4 5 6 """ 7 基於CNN的手寫體數字識別 8 9 迭代一輪 80s 10 """ 11 import keras 12 from keras.datasets import mnist 13 from keras.models import Sequential 14 from keras.layers import Dense, Dropout, Flatten 15 from keras.layers import Conv2D, MaxPooling2D 16 from keras import backend as K 17 18 19 # In[2]: 20 21 22 # 設置初始參數 23 batch_size = 128 # 一批餵給模型多少張圖片 60000 24 num_classes = 10 # 分類 0 - 9 25 epochs = 12 # 迭代次數 26 27 img_rows, img_cols = 28, 28 # 28 * 28 28 29 30 # In[3]: 31 32 33 # 加載數據 34 (x_train, y_train), (x_test, y_test) = mnist.load_data() # 加載數據集,第一次運行慢 35 36 # 判斷backend theano, tensorflow 37 # 彩色圖片 RGB 3 通道,灰度圖 1 通道 38 if K.image_data_format() == 'channels_first': 39 x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) # (60000, 1, 28, 28) 40 x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) 41 intput_shape = (1, img_rows, img_cols) 42 else: 43 x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) 44 x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) 45 input_shape = (img_rows, img_cols, 1) 46 47 48 # In[4]: 49 50 51 # 數據處理 52 53 # image處理 54 x_train = x_train.astype('float32') 55 x_test = x_test.astype('float32') 56 x_train /= 255 57 x_test /= 255 58 59 #x_train shape: (60000, 28, 28, 1) 60 #60000 train samples 61 #10000 test samples 62 print('x_train shape: ', x_train.shape) 63 print(x_train.shape[0], 'train samples') 64 print(x_test.shape[0], 'test samples') 65 66 # labels處理 67 # 5 -> [0000010000] 2 -> [0010000000] 68 y_train = keras.utils.to_categorical(y_train, num_classes) 69 y_test = keras.utils.to_categorical(y_test, num_classes) 70 71 72 # In[5]: 73 74 75 # 1. 選擇模型 76 model = Sequential() # 序貫模型 77 78 79 # In[6]: 80 81 82 # 2. 構建網絡層 83 # CNN的參數 權重(卷積核的構成),卷積核大小,數量,池化大小,步長,dropout rate 84 model.add(Conv2D(32, 85 kernel_size=(3, 3), 86 activation='relu', 87 input_shape=input_shape)) # 卷積層1 88 89 model.add(Conv2D(64, 90 (3, 3), 91 activation='relu')) # 卷積層2 92 93 model.add(MaxPooling2D(pool_size=(2, 2))) # 池化, 默認步長1 94 95 model.add(Dropout(0.25)) # 防止過擬合:訓練集特徵提取太細緻,不適用於測試集 96 97 model.add(Flatten()) # 壓平 98 99 model.add(Dense(128, 100 activation='relu')) # 全鏈接:全部神經元之間都是互相鏈接的 101 102 model.add(Dropout(0.5)) # 扔掉50% 103 104 model.add(Dense(num_classes, 105 activation='softmax')) # 全鏈接,多分類 106 107 108 # In[7]: 109 110 111 # 3. 編譯 112 model.compile(loss=keras.losses.categorical_crossentropy, 113 optimizer=keras.optimizers.Adadelta(), 114 metrics=['accuracy']) 115 116 117 # In[8]: 118 119 120 # 4. 訓練 121 model.fit(x_train, 122 y_train, 123 batch_size=batch_size, 124 epochs=epochs, 125 verbose=1, 126 validation_data=(x_test, y_test)) # 開始訓練 127 128 #Train on 60000 samples, validate on 10000 samples 129 #Epoch 1/12 130 #60000/60000 [==============================] - 79s 1ms/step - loss: 0.2630 - acc: 0.9195 - val_loss: 0.0574 - val_acc: 0.9825 131 #Epoch 2/12 132 #60000/60000 [==============================] - 77s 1ms/step - loss: 0.0900 - acc: 0.9730 - val_loss: 0.0437 - val_acc: 0.9855 133 #Epoch 3/12 134 #60000/60000 [==============================] - 78s 1ms/step - loss: 0.0663 - acc: 0.9810 - val_loss: 0.0387 - val_acc: 0.9874 135 #Epoch 4/12 136 #60000/60000 [==============================] - 76s 1ms/step - loss: 0.0555 - acc: 0.9836 - val_loss: 0.0321 - val_acc: 0.9881 137 #Epoch 5/12 138 #60000/60000 [==============================] - 76s 1ms/step - loss: 0.0462 - acc: 0.9862 - val_loss: 0.0287 - val_acc: 0.9907 139 #Epoch 6/12 140 #60000/60000 [==============================] - 81s 1ms/step - loss: 0.0418 - acc: 0.9873 - val_loss: 0.0318 - val_acc: 0.9893 141 #Epoch 7/12 142 #60000/60000 [==============================] - 81s 1ms/step - loss: 0.0364 - acc: 0.9885 - val_loss: 0.0291 - val_acc: 0.9907 143 #Epoch 8/12 144 #60000/60000 [==============================] - 80s 1ms/step - loss: 0.0338 - acc: 0.9898 - val_loss: 0.0260 - val_acc: 0.9922 145 #Epoch 9/12 146 #60000/60000 [==============================] - 80s 1ms/step - loss: 0.0319 - acc: 0.9903 - val_loss: 0.0266 - val_acc: 0.9918 147 #Epoch 10/12 148 #60000/60000 [==============================] - 79s 1ms/step - loss: 0.0290 - acc: 0.9908 - val_loss: 0.0271 - val_acc: 0.9919 149 #Epoch 11/12 150 #60000/60000 [==============================] - 79s 1ms/step - loss: 0.0281 - acc: 0.9911 - val_loss: 0.0247 - val_acc: 0.9928 151 #Epoch 12/12 152 #60000/60000 [==============================] - 82s 1ms/step - loss: 0.0256 - acc: 0.9920 - val_loss: 0.0251 - val_acc: 0.9926 153 #<keras.callbacks.History at 0x182f48b128> 154 155 156 # In[9]: 157 158 159 # 5. 預測 160 score = model.evaluate(x_test, y_test, verbose=0) # 在測試集上測試 161 162 #Test loss: 0.025120523367086936 163 #Test accuracy: 0.9926 164 print('Test loss: ', score[0]) 165 print('Test accuracy: ', score[1]) 166 167 168 # In[10]: 169 170 171 model.save('.\model\HandwritingRecUsingCNN.model') # 保存模型