tensorflow學習筆記-bili莫煩

bilibili莫煩tensorflow視頻教程學習筆記web

1.初次使用Tensorflow實現一元線性迴歸

# 屏蔽警告
import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import numpy as np import tensorflow as tf # create dataset
x_data = np.random.rand(100).astype(np.float32) y_data = x_data * 2 + 5

### create tensorflow structure Start # 初始化Weights變量，因爲是一元變量，因此w也只有一個
Weights = tf.Variable(tf.random_uniform([1],-1.0,1.0)) # 初始化bias，即截距b
biases = tf.Variable(tf.zeros([1])) # 計算預測的y值，即y hat
y = Weights*x_data+biases # 計算損失值
loss = tf.reduce_mean(tf.square(y-y_data)) # 優化器，這裏採用普通的梯度降低，學習率alpha=0.5(0,1範圍)
optimizer = tf.train.GradientDescentOptimizer(0.5) # 使用優化器開始訓練loss
train = optimizer.minimize(loss) # tensorflow初始化變量
init = tf.global_variables_initializer() # create tensorflow structure End

# 建立tensorflow的Session
sess = tf.Session() # 激活initialize，很重要
sess.run(init) # 運行兩百輪
for step in range(201): # 執行一次訓練
 sess.run(train) # 每20輪打印一次Wights和biases，看其變化
    if step % 20 ==0: print(step,sess.run(Weights),sess.run(biases))

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 解釋:

　　TF_CPP_MIN_LOG_LEVEL = 0：0爲默認值，輸出全部的信息，包含info,warning,error,fatal四種級別

　　TF_CPP_MIN_LOG_LEVEL = 1：1表示屏蔽info，只顯示warning及以上級別

　　TF_CPP_MIN_LOG_LEVEL = 2：2表示屏蔽info和warning，顯示error和fatal（最經常使用的取值）

　　TF_CPP_MIN_LOG_LEVEL = 3：3表示只顯示fatal

2.Tensorflow基礎基本流程

# 導入tensorflow，安裝的GPU版本，則默認使用GPU
import tensorflow as tf # 定義兩個矩陣常量
matrix1 = tf.constant([[3, 3], [2, 4]]) matrix2 = tf.constant([[1, 2], [5, 5]]) # 矩陣乘法，至關於np.dot(mat1,mat2)
product = tf.matmul(matrix1, matrix2) # 初始化
init = tf.global_variables_initializer() # 使用with來定義Session，這樣使用完畢後會自動sess.close()
with tf.Session() as sess: # 執行初始化
 sess.run(init) # 打印結果
    result = sess.run(product) print(result)

3.tensorflow基礎變量、常量、傳值

import tensorflow as tf state = tf.Variable(0, name='counter') one = tf.constant(1) # 變量state和常量one相加
new_value = tf.add(state, one) # 將new_value賦值給state
update = tf.assign(state, new_value) # 初始化全局變量
init = tf.global_variables_initializer() # 打開Session
with tf.Session() as sess: # 執行初始化，很重要
 sess.run(init) # 運行三次update
    for _ in range(3): sess.run(update) print(sess.run(state))

4.使用placeholder

import tensorflow as tf # 使用placeholder定義兩個空間，用於存放float32的數據
input1 = tf.placeholder(tf.float32) input2 = tf.placeholder(tf.float32) # 計算input1和input2的乘積
output = tf.matmul(input1, input2) # 定義sess
with tf.Session() as sess: # 運行output，並在run的時候喂入數據
    print(sess.run(output, feed_dict={input1: [[2.0, 3.0]], input2: [[4.0], [2.0]]}))

5.定義一個層(Layer)

import tensorflow as tf # inputs是上一層的輸出，insize是上一層的節點數，outsize是本層節點數，af是激活函數，默認 # 爲線性激活函數，即f(x)=X
def add_layer(inputs, in_size, out_size, activation_function=None): # 定義權重w，而且用隨機值填充，大小爲in_size*out_size
    Weights = tf.Variable(tf.random_normal([in_size, out_size])) # 定義變差bias，大小爲1*out_size
    biases = tf.Variable(tf.zeros([1, out_size]) + 0.1) # 算出z=wx+b
    Wx_plus_b = tf.matmul(inputs, Weights) + biases # 若是激勵函數爲空，則使用線性激勵函數
    if activation_function is None: outputs = Wx_plus_b else: # 若是不爲空，則使用激勵方程activation_function()
        outputs = activation_function(Wx_plus_b) # 返回輸出值
    return outputs

6.手動建立一個簡單的神經網絡

（包含一個輸入層、一個隱藏層、一個輸出層）算法

import numpy as np import tensorflow as tf # 添加一個隱藏層
def add_layer(inputs, in_size, out_size, activation_function=None): Weights = tf.Variable(tf.random_normal([in_size, out_size])) biases = tf.Variable(tf.zeros([1, out_size]) + 0.1) Wx_plus_b = tf.matmul(inputs, Weights) + biases if activation_function is None: outputs = Wx_plus_b else: outputs = activation_function(Wx_plus_b) return outputs ### 準備數據 # 建立x_data，從-1到1，分紅300份，而後添加維度，讓其編程一個300*1的矩陣
x_data = np.linspace(-1, 1, 300)[:, np.newaxis] # 定義一個噪聲矩陣,大小和x_data同樣，數據均值爲0，方差爲0.05
noise = np.random.normal(0, 0.05, x_data.shape) # 按公式x^2-0.5計算y_data，並加上噪聲
y_data = np.square(x_data) - 0.5 + noise # 定義兩個placeholder分別用於傳入x_data和y_data
xs = tf.placeholder(tf.float32, [None, 1]) ys = tf.placeholder(tf.float32, [None, 1]) # 建立一個隱藏層，輸入爲xs,輸入層只有一個節點，本層有10個節點，激勵函數爲relu
l1 = add_layer(xs, 1, 10, activation_function=tf.nn.relu) # 建立輸出層
prediction = add_layer(l1, 10, 1, activation_function=None) # 定義損失
loss = tf.reduce_mean(tf.reduce_sum(tf.square(ys - prediction), reduction_indices=[1])) # 使用梯度降低對loss進行最小化，學習率爲0.01
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(loss) # 初始化全局變量
init = tf.global_variables_initializer() # 建立Session
with tf.Session() as sess: # 初始化
 sess.run(init) # 運行10000輪梯度降低
    for _ in range(10001): sess.run(train_step, feed_dict={xs: x_data, ys: y_data}) # 每50輪打印一下loss看是否在減少
        if _ % 50 == 0: print(sess.run(loss, feed_dict={xs: x_data, ys: y_data}))

7.使用matplotlib可視化擬合狀況、Loss曲線

import numpy as np import tensorflow as tf import matplotlib.pyplot as plt # 添加一個隱藏層
def add_layer(inputs, in_size, out_size, activation_function=None): Weights = tf.Variable(tf.random_normal([in_size, out_size])) biases = tf.Variable(tf.zeros([1, out_size]) + 0.1) Wx_plus_b = tf.matmul(inputs, Weights) + biases if activation_function is None: outputs = Wx_plus_b else: outputs = activation_function(Wx_plus_b) return outputs ### 準備數據 # 建立x_data，從-1到1，分紅300份，而後添加維度，讓其編程一個300*1的矩陣
x_data = np.linspace(-1, 1, 300)[:, np.newaxis] # 定義一個噪聲矩陣,大小和x_data同樣，數據均值爲0，方差爲0.05
noise = np.random.normal(0, 0.05, x_data.shape) # 按公式x^2-0.5計算y_data，並加上噪聲
y_data = np.square(x_data) - 0.5 + noise # 定義兩個placeholder分別用於傳入x_data和y_data
xs = tf.placeholder(tf.float32, [None, 1]) ys = tf.placeholder(tf.float32, [None, 1]) # 建立一個隱藏層，輸入爲xs,輸入層只有一個節點，本層有10個節點，激勵函數爲relu
l1 = add_layer(xs, 1, 10, activation_function=tf.nn.relu) # 建立輸出層
prediction = add_layer(l1, 10, 1, activation_function=None) # 定義損失
loss = tf.reduce_mean(tf.reduce_sum(tf.square(ys - prediction), reduction_indices=[1])) # 使用梯度降低對loss進行最小化，學習率爲0.01
train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss) # 初始化全局變量
init = tf.global_variables_initializer() # 建立Session
with tf.Session() as sess: # 初始化
 sess.run(init) # 建立圖形
    fig = plt.figure() # 建立子圖，上下兩個圖的第一個（行，列，子圖編號），用於畫擬合圖
    a1 = fig.add_subplot(2, 1, 1) # 使用x_data,y_data畫散點圖
 plt.scatter(x_data, y_data) plt.xlabel('x_data') plt.ylabel('y_data') # 修改圖形x,y軸上下限x limit,y limit
    # plt.xlim(-2, 2)
    # plt.ylim(-1, 1)
    # 也能夠用一行代碼修改plt.axis([-2,2,-1,1])
    plt.axis('tight')  # 能夠按內容自動收縮，不留空白

    # 建立第二個子圖，用於畫Loss曲線
    a2 = fig.add_subplot(2, 1, 2) # 可使用這種方式來一次性設置子圖的屬性，和使用plt差很少
    a2.set(xlim=(0, 10000), ylim=(0.0, 0.02), xlabel='Iterations', ylabel='Loss') # 使用plt.ion使其運行show()後不暫停
 plt.ion() # 展現圖片，必須使用show()
 plt.show() loss_list = [] index_list = [] # 運行10000輪梯度降低
    for _ in range(10001): sess.run(train_step, feed_dict={xs: x_data, ys: y_data}) # 每50輪打印一下loss看是否在減少
        if _ % 50 == 0: index_list.append(_) loss_list.append(sess.run(loss, feed_dict={xs: x_data, ys: y_data})) # 避免在圖中重複的畫線，線嘗試刪除已經存在的線
            try: a1.lines.remove(lines_in_a1[0]) a2.lines.remove(lines_in_a2[0]) except Exception: pass prediction_value = sess.run(prediction, feed_dict={xs: x_data}) # 在a1子圖中畫擬合線，黃色，寬度5
            lines_in_a1 = a1.plot(x_data, prediction_value, 'y-', lw=5) # 在a2子圖中畫Loss曲線，紅色，寬度3
            lines_in_a2 = a2.plot(index_list, loss_list, 'r-', lw=3) # 暫停一下，不然會卡
            plt.pause(0.1)

注意：若是在pycharm運行上述代碼，不能展現動態圖片刷新，則須要進入File->setting，搜索Python Scientific，而後右側去掉對勾（默認是勾選的），而後Apply，OK便可。編程

8.經常使用優化器Optimizers

train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss) train_step = tf.train.AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.999, epsilon=1e-8).minimize(loss) train_step = tf.train.MomentumOptimizer(learning_rate=0.01,momentum=0.9).minimize(loss) train_step = tf.train.RMSPropOptimizer(learning_rate=0.01).minimize(loss)

其中Adam效果比較好，但均可以嘗試使用。windows

9.使用tensorboard繪網絡結構圖

import numpy as np import tensorflow as tf def add_layer(inputs, in_size, out_size, activation_function=None): # 每使用該函數建立一層，則生成一個名爲Layer_n的外層框
    with tf.name_scope('Layer'): # 內層權重框
        with tf.name_scope('Wights'): Weights = tf.Variable(tf.random_normal([in_size, out_size])) # 內層Bias框
        with tf.name_scope('Biases'): biases = tf.Variable(tf.zeros([1, out_size]) + 0.1) # 內層z(x,w,b)框
        with tf.name_scope('Wx_plus_b'): Wx_plus_b = tf.matmul(inputs, Weights) + biases if activation_function is None: outputs = Wx_plus_b else: outputs = activation_function(Wx_plus_b) return outputs # 準備數據
x_data = np.linspace(-1, 1, 300)[:, np.newaxis] noise = np.random.normal(0, 0.05, x_data.shape) y_data = np.square(x_data) - 0.5 + noise # 使用tensorboard畫inputs層
with tf.name_scope('inputs'):  # 一個名爲inputs的外層框
    # x_input和y_input
    xs = tf.placeholder(tf.float32, [None, 1], name='x_input') ys = tf.placeholder(tf.float32, [None, 1], name='y_input') l1 = add_layer(xs, 1, 10, activation_function=tf.nn.relu) prediction = add_layer(l1, 10, 1, activation_function=None) # Loss框，其中包含計算Loss的各個步驟，例如sub,square,sum,mean等
with tf.name_scope("Loss"): loss = tf.reduce_mean(tf.reduce_sum(tf.square(ys - prediction), reduction_indices=[1])) # train框，其中包含梯度降低步驟和權重更新步驟
with tf.name_scope('train'): train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss) init = tf.global_variables_initializer() with tf.Session() as sess: # 將圖寫入文件夾logs
    writer = tf.summary.FileWriter('logs/') # 寫入文件,名爲events.out.tfevents.1561191707.06P2GHW85CAH236
 writer.add_graph(sess.graph) sess.run(init)

注意：在運行代碼後，在logs文件夾下生成 events.out.tfevents.1561191707.06P2GHW85CAH236 文件。網絡

而後進入windows cmd，進入logs的上層文件夾，使用tensorboard --logdir logs便可打開web服務，而後複製給出的url地址進行訪問。如圖：app

10.其餘可視化，例如Weight、bias、loss等

import numpy as np import tensorflow as tf def add_layer(inputs, in_size, out_size, n_layer, activation_function=None): layer_name = 'layer_%d' % n_layer # 每使用該函數建立一層，則生成一個名爲Layer_n的外層框
    with tf.name_scope('Layer'): # 內層權重框
        with tf.name_scope('Wights'): Weights = tf.Variable(tf.random_normal([in_size, out_size]), name='W') tf.summary.histogram(layer_name + '/weights', Weights) # 內層Bias框
        with tf.name_scope('Biases'): biases = tf.Variable(tf.zeros([1, out_size]) + 0.1, name='B') tf.summary.histogram(layer_name + '/biases', biases) # 內層z(x,w,b)框
        with tf.name_scope('Wx_plus_b'): Wx_plus_b = tf.matmul(inputs, Weights) + biases if activation_function is None: outputs = Wx_plus_b else: outputs = activation_function(Wx_plus_b) return outputs # 準備數據
x_data = np.linspace(-1, 1, 300)[:, np.newaxis] noise = np.random.normal(0, 0.05, x_data.shape) y_data = np.square(x_data) - 0.5 + noise # 使用tensorboard畫inputs層
with tf.name_scope('inputs'):  # 一個名爲inputs的外層框
    # x_input和y_input
    xs = tf.placeholder(tf.float32, [None, 1], name='x_input') ys = tf.placeholder(tf.float32, [None, 1], name='y_input') l1 = add_layer(xs, 1, 10, 1, activation_function=tf.nn.relu) prediction = add_layer(l1, 10, 1, 2, activation_function=None) # Loss框，其中包含計算Loss的各個步驟，例如sub,square,sum,mean等
with tf.name_scope("Loss"): loss = tf.reduce_mean(tf.reduce_sum(tf.square(ys - prediction), reduction_indices=[1])) tf.summary.scalar('Loss', loss) # train框，其中包含梯度降低步驟和權重更新步驟
with tf.name_scope('train'): train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss) init = tf.global_variables_initializer() merged = tf.summary.merge_all() with tf.Session() as sess: # 將圖寫入文件夾logs
    writer = tf.summary.FileWriter('logs/') # 寫入文件,名爲events.out.tfevents.1561191707.06P2GHW85CAH236
 writer.add_graph(sess.graph) sess.run(init) # 運行10000輪梯度降低
    for _ in range(10001): sess.run(train_step, feed_dict={xs: x_data, ys: y_data}) # 每50步在loss曲線中記一個點
        if _ % 50 == 0: # 將merged和步數加入到總結中
            result = sess.run(merged, feed_dict={xs: x_data, ys: y_data}) writer.add_summary(result, _)

11.使用tensorflow進行Mnist分類

import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('MNIST_data', one_hot=True) # 添加一個隱藏層
def add_layer(inputs, in_size, out_size, activation_function=None): Weights = tf.Variable(tf.random_normal([in_size, out_size])) biases = tf.Variable(tf.zeros([1, out_size]) + 0.1) Wx_plus_b = tf.matmul(inputs, Weights) + biases if activation_function is None: outputs = Wx_plus_b else: outputs = activation_function(Wx_plus_b) return outputs # 測試準確度accuracy
def compute_accuracy(v_xs, v_ys): # 引入全局變量prediction層
    global prediction # 用v_xs輸入數據跑一次prediction層，獲得輸出
    y_pre = sess.run(prediction, feed_dict={xs: v_xs}) # 對比輸出和數據集label，相同的爲1，不一樣的爲0
    correct_prediction = tf.equal(tf.argmax(y_pre, 1), tf.argmax(v_ys, 1)) # 計算比對結果，可獲得準確率百分比
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # 獲取result，並返回
    result = sess.run(accuracy, feed_dict={xs: v_xs, ys: v_ys}) return result # define placeholder for inputs on network
xs = tf.placeholder(tf.float32, [None, 784])  # 手寫數字的圖片大小爲28*28
ys = tf.placeholder(tf.float32, [None, 10])  # 輸出爲1*10的Onehot熱獨

# 只有一個輸出層，輸入爲m*784的數據,輸出爲m*10的數據,m=100,由於batch_size取的是100
prediction = add_layer(xs, 784, 10, activation_function=tf.nn.softmax) # 使用交叉熵損失函數g(x)=-E[y*log(y_hat)],y爲ys,例如[0,1,0,0,0,0,0,0,0,0],即數字爲1, # 假設y_hat=[0.05,0.81,0.05,0.003,0.012,0.043,0.012,0.009,0.006,0.005], # 則g(x) = -(0*log0.05+1*log0.81+...+0*log0.005)=-log0.81=0.0915 # g(x)就是-tf.reduce_sum(ys * tf.log(prediction) # tf.reduce_mean(g(x),reduction_indices=[1])是對一個batch_size的樣本取平均損失 # 至關於1/m * E(1 to m) [g(x)]
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction), reduction_indices=[1])) # 使用梯度降低，學習率爲0.5來最小化cross_entropy
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) # 定義Session
sess = tf.Session() # 初始化
sess.run(tf.global_variables_initializer()) # 跑10000輪
for i in range(10001): # 使用SGD，batch_size=100
    batch_x, batch_y = mnist.train.next_batch(100) # 執行一輪
    sess.run(train_step, feed_dict={xs: batch_x, ys: batch_y}) # 每跑50輪打印一次準確度
    if i % 50 == 0: # 訓練集準確度
        print('TRAIN acc:', compute_accuracy( batch_x, batch_y)) # 測試集準確度
        print('TEST acc:', compute_accuracy( mnist.test.images, mnist.test.labels))

重點：關注代碼中交叉熵損失函數的使用，多分類時的交叉熵損失函數爲L(y_hat,y)=-E(j=1 to k) yj*log y_hatj，成本函數爲J = 1/m E(i=1 to m) L(y_hati,yi)框架

12.使用Dropout避免過擬合

import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('MNIST_data', one_hot=True) # 添加一個隱藏層
def add_layer(inputs, in_size, out_size, activation_function=None, keep_prob=1): Weights = tf.Variable(tf.random_normal([in_size, out_size])) biases = tf.Variable(tf.zeros([1, out_size]) + 0.1) Wx_plus_b = tf.matmul(inputs, Weights) + biases # 這裏使用Dropout處理計算結果，默認keep_prob爲1，具體drop比例按1-keep_prob執行
    Wx_plus_b = tf.nn.dropout(Wx_plus_b, keep_prob) if activation_function is None: outputs = Wx_plus_b else: outputs = activation_function(Wx_plus_b) return outputs # 測試準確度accuracy
def compute_accuracy(v_xs, v_ys): # 引入全局變量prediction層
    global prediction # 用v_xs輸入數據跑一次prediction層，獲得輸出
    y_pre = sess.run(prediction, feed_dict={xs: v_xs}) # 對比輸出和數據集label，相同的爲1，不一樣的爲0
    correct_prediction = tf.equal(tf.argmax(y_pre, 1), tf.argmax(v_ys, 1)) # 計算比對結果，可獲得準確率百分比
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # 獲取result，並返回
    result = sess.run(accuracy, feed_dict={xs: v_xs, ys: v_ys}) return result # define placeholder for inputs on network
xs = tf.placeholder(tf.float32, [None, 784])  # 手寫數字的圖片大小爲28*28
ys = tf.placeholder(tf.float32, [None, 10])  # 輸出爲1*10的Onehot熱獨

# 建立一個隱層，有50個節點，使用Dropout 30%來避免過擬合
l1 = add_layer(xs, 784, 50, activation_function=tf.nn.tanh, keep_prob=0.7) # 建立輸出層，輸入爲m*784的數據,輸出爲m*10的數據,m=100,由於batch_size取的是100
prediction = add_layer(l1, 50, 10, activation_function=tf.nn.softmax) # 使用交叉熵損失函數g(x)=-E[y*log(y_hat)],y爲ys,例如[0,1,0,0,0,0,0,0,0,0],即數字爲1, # 假設y_hat=[0.05,0.81,0.05,0.003,0.012,0.043,0.012,0.009,0.006,0.005], # 則g(x) = -(0*log0.05+1*log0.81+...+0*log0.005)=-log0.81=0.0915 # g(x)就是-tf.reduce_sum(ys * tf.log(prediction) # tf.reduce_mean(g(x),reduction_indices=[1])是對一個batch_size的樣本取平均損失 # 至關於1/m * E(1 to m) [g(x)]
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction), reduction_indices=[1])) tf.summary.scalar('Loss', cross_entropy) # 使用梯度降低，學習率爲0.5來最小化cross_entropy
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) # 定義Session
sess = tf.Session() # 建立連個graph，圖是重合的，便可以再loss曲線中同時畫出train和test數據集的loss曲線，從而看是否存在過擬合
train_writer = tf.summary.FileWriter('logs/train', sess.graph) test_writer = tf.summary.FileWriter('logs/test', sess.graph) merged = tf.summary.merge_all() # 初始化
sess.run(tf.global_variables_initializer()) # 跑10000輪
for i in range(20001): # 使用SGD，batch_size=100
    batch_x, batch_y = mnist.train.next_batch(100) # 執行一輪
    sess.run(train_step, feed_dict={xs: batch_x, ys: batch_y}) # 每跑50輪打印一次準確度
    if i % 50 == 0: train_res = sess.run(merged, feed_dict={xs: mnist.train.images, ys: mnist.train.labels}) test_res = sess.run(merged, feed_dict={xs: mnist.test.images, ys: mnist.test.labels}) train_writer.add_summary(train_res, i) test_writer.add_summary(test_res, i)

重點：在建立每一個層時，若是須要Dropout，就給他一個keep_prob，而後使用tf.nn.dropout(result,keep_prob)來執行Dropout。dom

13.tensorflow中使用卷積網絡分類Mnist

import os import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' mnist = input_data.read_data_sets('MNIST_data', one_hot=True) # 添加一個隱藏層
def add_layer(inputs, in_size, out_size, activation_function=None, keep_prob=1): Weights = tf.Variable(tf.random_normal([in_size, out_size])) biases = tf.Variable(tf.zeros([1, out_size]) + 0.1) Wx_plus_b = tf.matmul(inputs, Weights) + biases # 這裏使用Dropout處理計算結果，默認keep_prob爲1，具體drop比例按1-keep_prob執行
    Wx_plus_b = tf.nn.dropout(Wx_plus_b, keep_prob) if activation_function is None: outputs = Wx_plus_b else: outputs = activation_function(Wx_plus_b) return outputs # 測試準確度accuracy
def compute_accuracy(v_xs, v_ys): # 引入全局變量prediction層
    global prediction # 用v_xs輸入數據跑一次prediction層，獲得輸出
    y_pre = sess.run(prediction, feed_dict={xs: v_xs}) # argmax(y,1)按行獲取最大值的index
    # 對比輸出和數據集label，相同的爲1，不一樣的爲0
    correct_prediction = tf.equal(tf.argmax(y_pre, 1), tf.argmax(v_ys, 1)) # 計算比對結果，可獲得準確率百分比
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # 獲取result，並返回
    result = sess.run(accuracy, feed_dict={xs: v_xs, ys: v_ys}) return result # 按shape參數建立參數W矩陣
def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial) # 按shape參數建立bias矩陣
def bias_variable(shape): initial = tf.constant(0.1, shape=shape) return tf.Variable(initial) # 建立2d卷積層，直接調用tf.nn.conv2d，x爲輸入，W爲參數矩陣，strides=[1,y_step,x_step,1] # padding有兩個取值'SAME'和'VALID',對應一個填充，一個不填充
def conv2d(x, W): return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') # 建立最大池化層，ksize=[1,y_size,x_size,1],strides同上，padding同上
def max_pool_2x2(x): return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # define placeholder for inputs on network
xs = tf.placeholder(tf.float32, [None, 784])  # 手寫數字的圖片大小爲28*28
ys = tf.placeholder(tf.float32, [None, 10])  # 輸出爲1*10的Onehot熱獨 # 將數據的維度變化爲圖片的形式，[-1,28,28,1]，-1表示樣本數m(根據每輪訓練的輸入大小batch_size=100),28*28表示圖片大小，1表示channel
x_data = tf.reshape(xs, [-1, 28, 28, 1]) ###### 下面定義網絡結構，大體根據Lenet的結構修改 ###### ### 定義conv1層 # 定義conv layer1的Weights，[5,5,1,6]中得5*5表示核的大小，1表示核的channel，16表示核的個數 # 該矩陣爲5*5*1*16的矩陣
W_conv1 = weight_variable([5, 5, 1, 16]) # 定義conv1的bias矩陣
b_conv1 = bias_variable([16]) # 定義conv1的激活函數
h_conv1 = tf.nn.relu(conv2d(x_data, W_conv1) + b_conv1) # 定義池化層
h_pool1 = max_pool_2x2(h_conv1) ### 定義conv2層，參數參照conv1
W_conv2 = weight_variable([5, 5, 16, 32]) b_conv2 = bias_variable([32]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 = max_pool_2x2(h_conv2) # 池化後要輸入給後面的全鏈接層，因此要把7*7*32的矩陣壓扁成[1568]的向量
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 32]) # 檢查一下矩陣維度，確認爲(100,1568),其中100是batch_size # h_shape = tf.shape(h_pool2_flat)

### 定義fc1層節點爲200 # 定義fc1的weight矩陣，維度爲1568*200
W_fc1 = weight_variable([7 * 7 * 32, 200]) # 200個bias
b_fc1 = bias_variable([200]) # fc層激活函數
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) # 是否啓用dropout # h_fc1_drop = tf.nn.dropout(h_fc1)

### 定義fc2層，參數參照fc1
W_fc2 = weight_variable([200, 100]) b_fc2 = bias_variable([100]) h_fc2 = tf.nn.relu(tf.matmul(h_fc1, W_fc2) + b_fc2) ### 定義輸出層，激勵函數不一樣
w_fc3 = weight_variable([100, 10]) b_fc3 = bias_variable([10]) # 輸出層使用多分類softmax激勵函數
prediction = tf.nn.softmax(tf.matmul(h_fc2, w_fc3) + b_fc3) # 交叉熵損失
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction), reduction_indices=[1])) tf.summary.scalar('Loss', cross_entropy) # 使用Adam優化算法，學習率爲0.0001來最小化cross_entropy
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) # 定義Session
sess = tf.Session() # 建立連個graph，圖是重合的，便可以再loss曲線中同時畫出train和test數據集的loss曲線，從而看是否存在過擬合
train_writer = tf.summary.FileWriter('logs2/train', sess.graph) test_writer = tf.summary.FileWriter('logs2/test', sess.graph) merged = tf.summary.merge_all() # 初始化
sess.run(tf.global_variables_initializer()) # 跑10000輪
for i in range(20001): # 使用SGD，batch_size=100
    batch_x, batch_y = mnist.train.next_batch(100) # 執行一輪
    sess.run(train_step, feed_dict={xs: batch_x, ys: batch_y}) # 每跑50輪打印一次準確度
    if i % 100 == 0: train_res = sess.run(merged, feed_dict={xs: batch_x, ys: batch_y}) test_res = sess.run(merged, feed_dict={xs: mnist.test.images, ys: mnist.test.labels}) train_writer.add_summary(train_res, i) test_writer.add_summary(test_res, i) print('Acc on loop ', i, ':', compute_accuracy(mnist.test.images, mnist.test.labels))

在tensorflow 1.14.0下的代碼以下（API更改較多）：ide

# -*- coding:utf-8 -*-
__author__ = 'Leo.Z'

import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' mnist = input_data.read_data_sets('MNIST_data', one_hot=True) # 添加一個隱藏層
def add_layer(inputs, in_size, out_size, activation_function=None, keep_prob=1): Weights = tf.Variable(tf.random_normal([in_size, out_size])) biases = tf.Variable(tf.zeros([1, out_size]) + 0.1) Wx_plus_b = tf.matmul(inputs, Weights) + biases # 這裏使用Dropout處理計算結果，默認keep_prob爲1，具體drop比例按1-keep_prob執行
    Wx_plus_b = tf.nn.dropout(Wx_plus_b, keep_prob) if activation_function is None: outputs = Wx_plus_b else: outputs = activation_function(Wx_plus_b) return outputs # 測試準確度accuracy
def compute_accuracy(v_xs, v_ys): # 引入全局變量prediction層
    global prediction # 用v_xs輸入數據跑一次prediction層，獲得輸出
    y_pre = sess.run(prediction, feed_dict={xs: v_xs}) # argmax(y,1)按行獲取最大值的index
    # 對比輸出和數據集label，相同的爲1，不一樣的爲0
    correct_prediction = tf.equal(tf.argmax(y_pre, 1), tf.argmax(v_ys, 1)) # 計算比對結果，可獲得準確率百分比
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # 獲取result，並返回
    result = sess.run(accuracy, feed_dict={xs: v_xs, ys: v_ys}) return result # 按shape參數建立參數W矩陣
def weight_variable(shape): initial = tf.random.truncated_normal(shape, stddev=0.1) return tf.Variable(initial) # 按shape參數建立bias矩陣
def bias_variable(shape): initial = tf.constant(0.1, shape=shape) return tf.Variable(initial) # 建立2d卷積層，直接調用tf.nn.conv2d，x爲輸入，W爲參數矩陣，strides=[1,y_step,x_step,1] # padding有兩個取值'SAME'和'VALID',對應一個填充，一個不填充
def conv2d(x, W): return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') # 建立最大池化層，ksize=[1,y_size,x_size,1],strides同上，padding同上
def max_pool_2x2(x): return tf.nn.max_pool2d(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # define placeholder for inputs on network
xs = tf.compat.v1.placeholder(tf.float32, [None, 784])  # 手寫數字的圖片大小爲28*28
ys = tf.compat.v1.placeholder(tf.float32, [None, 10])  # 輸出爲1*10的Onehot熱獨 # 將數據的維度變化爲圖片的形式，[-1,28,28,1]，-1表示樣本數m(根據每輪訓練的輸入大小batch_size=100),28*28表示圖片大小，1表示channel
x_data = tf.reshape(xs, [-1, 28, 28, 1]) ###### 下面定義網絡結構，大體根據Lenet的結構修改 ###### ### 定義conv1層 # 定義conv layer1的Weights，[5,5,1,6]中得5*5表示核的大小，1表示核的channel，16表示核的個數 # 該矩陣爲5*5*1*16的矩陣
W_conv1 = weight_variable([5, 5, 1, 16]) # 定義conv1的bias矩陣
b_conv1 = bias_variable([16]) # 定義conv1的激活函數
h_conv1 = tf.nn.relu(conv2d(x_data, W_conv1) + b_conv1) # 定義池化層
h_pool1 = max_pool_2x2(h_conv1) ### 定義conv2層，參數參照conv1
W_conv2 = weight_variable([5, 5, 16, 32]) b_conv2 = bias_variable([32]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 = max_pool_2x2(h_conv2) # 池化後要輸入給後面的全鏈接層，因此要把7*7*32的矩陣壓扁成[1568]的向量
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 32]) # 檢查一下矩陣維度，確認爲(100,1568),其中100是batch_size # h_shape = tf.shape(h_pool2_flat)

### 定義fc1層節點爲200 # 定義fc1的weight矩陣，維度爲1568*200
W_fc1 = weight_variable([7 * 7 * 32, 200]) # 200個bias
b_fc1 = bias_variable([200]) # fc層激活函數
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) # 是否啓用dropout # h_fc1_drop = tf.nn.dropout(h_fc1)

### 定義fc2層，參數參照fc1
W_fc2 = weight_variable([200, 100]) b_fc2 = bias_variable([100]) h_fc2 = tf.nn.relu(tf.matmul(h_fc1, W_fc2) + b_fc2) ### 定義輸出層，激勵函數不一樣
w_fc3 = weight_variable([100, 10]) b_fc3 = bias_variable([10]) # 輸出層使用多分類softmax激勵函數
prediction = tf.nn.softmax(tf.matmul(h_fc2, w_fc3) + b_fc3) # 交叉熵損失
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.math.log(prediction), reduction_indices=[1])) tf.compat.v1.summary.scalar('Loss', cross_entropy) # 使用Adam優化算法，學習率爲0.0001來最小化cross_entropy
train_step = tf.compat.v1.train.AdamOptimizer(1e-4).minimize(cross_entropy) # 定義Session
sess = tf.compat.v1.Session() # 建立連個graph，圖是重合的，便可以再loss曲線中同時畫出train和test數據集的loss曲線，從而看是否存在過擬合
train_writer = tf.compat.v1.summary.FileWriter('logs2/train', sess.graph) test_writer = tf.compat.v1.summary.FileWriter('logs2/test', sess.graph) merged = tf.compat.v1.summary.merge_all() # 初始化
sess.run(tf.compat.v1.global_variables_initializer()) # 跑10000輪
for i in range(20001): # 使用SGD，batch_size=100
    batch_x, batch_y = mnist.train.next_batch(100) # 執行一輪
    sess.run(train_step, feed_dict={xs: batch_x, ys: batch_y}) # 每跑50輪打印一次準確度
    if i % 100 == 0: train_res = sess.run(merged, feed_dict={xs: batch_x, ys: batch_y}) test_res = sess.run(merged, feed_dict={xs: mnist.test.images, ys: mnist.test.labels}) train_writer.add_summary(train_res, i) test_writer.add_summary(test_res, i) print('Acc on loop ', i, ':', compute_accuracy(mnist.test.images, mnist.test.labels))

14.使用tensorflow的Saver存放模型參數

import tensorflow as tf W = tf.Variable([[1, 2, 3], [4, 5, 6]], dtype=tf.float32, name='wrights') b = tf.Variable([1, 2, 3], dtype=tf.float32, name='biases') init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: sess.run(init) save_path = saver.save(sess, "my_net/save_net.ckpt") print('save_path: ', save_path)

15.使用Saver載入模型參數

import tensorflow as tf import numpy as np # 建立一個和保存時同樣的W,b矩陣，什麼內容無所謂，shape和dtype必須一致
W = tf.Variable(tf.zeros([2, 3]), dtype=tf.float32, name='wrights') # 使用numpy也能夠 # W = tf.Variable(np.zeros((2,3)), dtype=tf.float32, name='wrights')
b = tf.Variable(tf.zeros(3), dtype=tf.float32, name='biases') init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: sess.run(init) saver.restore(sess, "my_net/save_net.ckpt") print('Weights:', sess.run(W)) print('biased:', sess.run(b))

16.結束語函數

當擼完深度學習基礎理論，不知道如何選擇和使用繁多的框架時，真真感受方得一P。無心在bilibili發現了莫煩的tensorflow教程，雖然這門視頻課程示例都很是簡單，但也足夠讓我初窺其貌，以致於又有了前進的方向，從而在框架的學習上不在迷茫。在此感謝莫煩同窗的無私奉獻（^。^）。33歲還在路上的程序猿記於深夜......爲終身學習這個偉大目標加油.....