以下樣例基於tensorflow實現了一個簡單的3層深度學習入門框架程序,程序主要有以下特性:python
一、 基於著名的MNIST手寫數字集樣例數據:http://yann.lecun.com/exdb/mnist/git
二、 加入衰減學習率優化,使得學習率能夠根據訓練步數指數級減小,在訓練後期增長模型穩定性算法
三、 加入L2正則化,減小各個權重值大小,避免過擬合問題網絡
四、 加入滑動平均模型,提升模型在驗證數據上的準確性app
網絡一共3層,第一層輸入層784個節點的輸入層,第二層隱藏層有500個節點,第三層輸出層有10個節點。框架
# 導入模塊庫 import tensorflow as tf import datetime import numpy as np # 已經被廢棄掉了 #from tensorflow.examples.tutorials.mnist import input_data from tensorflow.contrib.learn.python.learn.datasets import mnist from tensorflow.contrib.layers import l2_regularizer # 屏蔽AVX2特性告警信息 import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # 屏蔽mnist.read_data_sets被棄用告警 import logging class WarningFilter(logging.Filter): def filter(self, record): msg = record.getMessage() tf_warning = 'datasets' in msg return not tf_warning logger = logging.getLogger('tensorflow') logger.addFilter(WarningFilter()) # 神經網絡結構定義:輸入784個特徵值,包含一個500個節點的隱藏層,10個節點的輸出層 INPUT_NODE = 784 OUTPUT_NODE = 10 LAYER1_NODE = 500 # 隨機梯度降低法數據集大小爲100,訓練步驟爲30000 BATCH_SIZE = 100 TRAINING_STEPS = 30000 # 衰減學習率 LEARNING_RATE_BASE = 0.8 LEARNING_RATE_DECAY = 0.99 # L2正則化 REGULARIZATION_RATE = 0.0001 MOVING_AVERAGE_DECAY = 0.99 validation_accuracy_rate_list = [] test_accuracy_rate_list = [] # 定義前向更新過程 def inference(input_tensor,avg_class,weights1,biase1,weights2,biase2): if avg_class == None: layer1 = tf.nn.relu(tf.matmul(input_tensor,weights1) + biase1) return tf.matmul(layer1,weights2) + biase2 else: layer1 = tf.nn.relu(tf.matmul(input_tensor,avg_class.average(weights1)) + avg_class.average(biase1)) return tf.matmul(layer1,avg_class.average(weights2)) + avg_class.average(biase2) # 定義訓練過程 def train(mnist_datasets): # 定義輸入 x = tf.placeholder(dtype=tf.float32,shape=[None,784]) y_ = tf.placeholder(dtype=tf.float32,shape=[None,10]) # 定義訓練參數 weights1 = tf.Variable(tf.truncated_normal(shape=[INPUT_NODE,LAYER1_NODE],mean=0.0,stddev=0.1)) biase1 = tf.Variable(tf.constant(value=0.1,dtype=tf.float32,shape=[LAYER1_NODE])) weights2 = tf.Variable(tf.truncated_normal(shape=[LAYER1_NODE,OUTPUT_NODE],mean=0.0,stddev=0.1)) biase2 = tf.Variable(tf.constant(value=0.1,dtype=tf.float32,shape=[OUTPUT_NODE])) # 前向更新 # 訓練數據時,不須要使用滑動平均模型,因此avg_class輸入爲空 y = inference(x,None,weights1,biase1,weights2,biase2) # 該變量記錄訓練次數,訓練模型時經常須要設置爲不可訓練的變量,即trainable=False global_step = tf.Variable(initial_value=0,trainable=False) # 生成滑動平均模型,用於驗證 variable_averages = tf.train.ExponentialMovingAverage(decay=MOVING_AVERAGE_DECAY,num_updates=global_step) # 在全部表明神經網絡的可訓練變量上,應用滑動模型,即全部的可訓練變量都有一個影子變量 variable_averages_ops = variable_averages.apply(tf.trainable_variables()) # 定義數據驗證時,前向更新結果 average_y = inference(x,variable_averages,weights1,biase1,weights2,biase2) # 計算交叉熵 cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.argmax(y_,1),logits=y) cross_entropy_mean = tf.reduce_mean(cross_entropy) # 計算L2正則化損失 regularizer = l2_regularizer(REGULARIZATION_RATE) regularization = regularizer(weights1) + regularizer(weights2) # 計算總損失Loss loss = cross_entropy_mean + regularization # 定義指數衰減的學習率 learning_rate = tf.train.exponential_decay(learning_rate=LEARNING_RATE_BASE,global_step=global_step, decay_steps=mnist_datasets.train.num_examples / BATCH_SIZE, decay_rate=LEARNING_RATE_DECAY) # 定義隨機梯度降低算法來優化損失函數 train_step = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\ .minimize(loss = loss,global_step = global_step) # 每次前向更新完之後,既須要反向更新參數值,又須要對滑動平均模型中影子變量進行更新 # 和train_op = tf.group(train_step,variable_averages_ops)是等價的 with tf.control_dependencies([train_step,variable_averages_ops]): train_op = tf.no_op(name='train') # 定義驗證運算,計算準確率 correct_prediction = tf.equal(tf.argmax(average_y,1),tf.argmax(y_,1)) accuracy = tf.reduce_mean(tf.cast(x=correct_prediction,dtype=tf.float32)) with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) validate_feed = {x:mnist_datasets.validation.images, y_:mnist_datasets.validation.labels} test_feed = {x:mnist_datasets.test.images, y_:mnist_datasets.test.labels} for i in range(TRAINING_STEPS): # 每1000輪,用測試和驗證數據分別對模型進行評估 if i % 1000 == 0: validate_accuracy_rate = sess.run(accuracy,validate_feed) print("%s: After %d training steps(s),validation accuracy" "using average model is %g "%(datetime.datetime.now(),i,validate_accuracy_rate)) test_accuracy_rate = sess.run(accuracy, test_feed) print("%s: After %d training steps(s),test accuracy" "using average model is %g " % (datetime.datetime.now(),i, test_accuracy_rate)) validation_accuracy_rate_list.append(validate_accuracy_rate) test_accuracy_rate_list.append(test_accuracy_rate) # 得到訓練數據 xs,ys = mnist_datasets.train.next_batch(BATCH_SIZE) sess.run(train_op,feed_dict={x:xs,y_:ys}) # 主程序入口 def main(argv=None): mnist_datasets = mnist.read_data_sets(train_dir='MNIST_data/',one_hot=True) train(mnist_datasets) print("validation accuracy rate list:",validation_accuracy_rate_list) print("test accuracy rate list:",test_accuracy_rate_list) # 模塊入口 if __name__ == '__main__': tf.app.run()
每1000輪,使用測試和驗證數據分別對模型進行評估,繪製出以下準確率曲線圖,其中藍色曲線表示驗證數據準確率,深紅色曲線表示測試數據準確率,不難發現,經過引入滑動平均模型,模型在驗證數據上有更好的準確率。函數
進一步,經過以下代碼,咱們對兩個準確率求解相關係數:學習
import numpy as np import math x = np.array([0.1748, 0.9764, 0.9816, 0.9834, 0.982, 0.984, 0.9838, 0.9842, 0.9846, 0.985, 0.9848, 0.9854, 0.9854, 0.9838, 0.9846, 0.9838, 0.9848, 0.9844, 0.9846, 0.9858, 0.9846, 0.9848, 0.9852, 0.9844, 0.9846, 0.9848, 0.9852, 0.9846, 0.9852, 0.9854]) y = np.array([0.1839, 0.9751, 0.9796, 0.9807, 0.9813, 0.9825, 0.983, 0.983, 0.983, 0.9829, 0.9836, 0.9831, 0.9828, 0.9832, 0.9828, 0.9829, 0.9836, 0.9835, 0.9838, 0.9833, 0.9833, 0.9833, 0.9833, 0.9838, 0.9835, 0.9838, 0.9829, 0.9836, 0.9834, 0.984]) # 計算相關度 def computeCorrelation(x,y): xBar = np.mean(x) yBar = np.mean(y) SSR = 0.0 varX = 0.0 varY = 0.0 for i in range(0,len(x)): diffXXbar = x[i] - xBar difYYbar = y[i] - yBar SSR += (diffXXbar * difYYbar) varX += diffXXbar**2 varY += difYYbar**2 SST = math.sqrt(varX * varY) return SSR/SST # 計算R平方 def polyfit(x,y,degree): results = {} coeffs = np.polyfit(x,y,degree) results['polynomial'] = coeffs.tolist() p = np.poly1d(coeffs) yhat = p(x) ybar = np.sum(y)/len(y) ssreg = np.sum((yhat - ybar)**2) sstot = np.sum((y - ybar)**2) results['determination'] = ssreg/sstot return results result = computeCorrelation(x,y) r = result r_2 = result**2 print("r:",r) print("r^2:",r*r) print(polyfit(x,y,1)['determination'])
結果顯示,兩者相關係數大於0.9999,這意味着在MNIST問題上,徹底能夠模型在驗證數據上的表現來判斷模型的優劣。固然,這個僅僅是MNIST數據集上,在其它問題上,還須要具體分析。測試
C:\Users\Administrator\Anaconda3\python.exe D:/tensorflow-study/sample.py r: 0.9999913306679183 r^2: 0.999982661410994 0.9999826614109977