[tensorflow] tensorflow官方例子和傳統bp網絡的實現 -- 超參數初始化的重要性

首先準備mnist數據集(網上下載)和input.py。html

input.py內容以下:

# -*- coding: utf-8 -*-

# Copyright 2015 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==============================================================================

"""Functions for downloading and reading MNIST data."""
from __future__ import absolute_import from __future__ import division from __future__ import print_function import gzip import os import tempfile import numpy from six.moves import urllib from six.moves import xrange  # pylint: disable=redefined-builtin
import tensorflow as tf from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets

 test_soft.py 官方例子以下(網址):

# -*- coding: utf-8 -*-
""" Created on Wed Nov 22 13:24:22 2017 @author: fc """
#from tensorflow.examples.tutorials.mnist import input_data #mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
batchsize=100
import tensorflow as tf import input_data def train(): mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) sess = tf.InteractiveSession() x = tf.placeholder("float", [None, 784],name='x-input') y_ = tf.placeholder("float", [None,10],name='y-input') w = tf.Variable(tf.zeros([784,10])) b = tf.Variable(tf.zeros([10])) y = tf.nn.softmax(tf.matmul(x,w) + b) cross_entropy = -tf.reduce_sum(y_*tf.log(y)) train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy) init = tf.global_variables_initializer() sess.run(init) for i in range(1000): batch_xs, batch_ys = mnist.train.next_batch(batchsize) sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys}) correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) print (sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels})) if __name__ == '__main__': train() 

注意這句代碼中,傳入參數爲mnist在電腦中的實際位置。python

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

運行結果爲:express

test_nn.py 傳統bp神經網絡:

# -*- coding: utf-8 -*-
""" Created on Wed Dec 13 23:11:09 2017 @author: fc """
#from tensorflow.examples.tutorials.mnist import input_data #mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
batchsize=64
import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) def train(init_bias): sess = tf.InteractiveSession() #---------------------------------------初始化網絡結構-------------------------------------
    x = tf.placeholder("float", [None, 784],name='x-input') y_ = tf.placeholder("float", [None,10],name='y-input') W1 = tf.Variable(tf.random_uniform([784,100],-0.5+init_bias,0.5+init_bias)) b1 = tf.Variable(tf.random_uniform([100],-0.5+init_bias,0.5+init_bias)) u1 = tf.matmul(x,W1) + b1 y1 = tf.nn.sigmoid(u1) # y1=u1
    W2 = tf.Variable(tf.random_uniform([100,10],-0.5+init_bias,0.5+init_bias)) b2 = tf.Variable(tf.random_uniform([10],-0.5+init_bias,0.5+init_bias)) y = tf.nn.sigmoid(tf.matmul(y1,W2) + b2) #---------------------------------------設置網絡的訓練方式-------------------------------------
    mse = tf.reduce_sum(tf.square(y-y_))#mse # train_step = tf.train.GradientDescentOptimizer(0.02).minimize(mse)
    train_step = tf.train.AdamOptimizer(0.001).minimize(mse) correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) init = tf.global_variables_initializer() sess.run(init) #---------------------------------------開始訓練-------------------------------------
    for i in range(1001): batch_xs, batch_ys = mnist.train.next_batch(batchsize) sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys}) print ('權重初始化範圍[%.1f,%.1f],1000次訓練事後的準確率'
           %(init_bias-0.5,init_bias+0.5),sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels})) if __name__ == '__main__': init_bias=-0.6#權重的初始化時的偏置量
    for i in range(11): init_bias+=0.1 train(init_bias)

第一次寫這個程序的時候,權重初始化的函數是:apache

W1 = tf.Variable(tf.random_uniform([784,100],0,1.0))

權重隨機分佈在(0.0,1.0)之間,訓練出來效果極差,大概0.1的準確率(10%)。網絡

後來多番檢查,肯定問題在此,改成分佈在(-0.5,0.5)之間,訓練準確率獲得了極大的提高。app

原理還沒想通。less

test_nn.py運行結果以下:dom

 

 

說明不一樣的超參數(權重)初始化,對網絡的影響極大。

相關文章
相關標籤/搜索