經典CNN網絡結構

經典CNN網絡結構

Le-Net5 網絡結構

def inference(input_tensor,train,regularizer):

    #第一層:卷積層,過濾器的尺寸爲5×5,深度爲6,不使用全0補充,步長爲1。
    #尺寸變化:32×32×1->28×28×6
    with tf.variable_scope('layer1-conv1'):
        conv1_weights = tf.get_variable('weight',[5,5,1,6],initializer=tf.truncated_normal_initializer(stddev=0.1))
        conv1_biases = tf.get_variable('bias',[6],initializer=tf.constant_initializer(0.0))
        conv1 = tf.nn.conv2d(input_tensor,conv1_weights,strides=[1,1,1,1],padding='VALID')
        relu1 = tf.nn.relu(tf.nn.bias_add(conv1,conv1_biases))

    #第二層:池化層,過濾器的尺寸爲2×2,使用全0補充,步長爲2。
    #尺寸變化:28×28×6->14×14×6
    with tf.name_scope('layer2-pool1'):
        pool1 = tf.nn.max_pool(relu1,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

    #第三層:卷積層,過濾器的尺寸爲5×5,深度爲16,不使用全0補充,步長爲1。
    #尺寸變化:14×14×6->10×10×16
    with tf.variable_scope('layer3-conv2'):
        conv2_weights = tf.get_variable('weight',[5,5,6,16],initializer=tf.truncated_normal_initializer(stddev=0.1))
        conv2_biases = tf.get_variable('bias',[16],initializer=tf.constant_initializer(0.0))
        conv2 = tf.nn.conv2d(pool1,conv2_weights,strides=[1,1,1,1],padding='VALID')
        relu2 = tf.nn.relu(tf.nn.bias_add(conv2,conv2_biases))

    #第四層:池化層,過濾器的尺寸爲2×2,使用全0補充,步長爲2。
    #尺寸變化:10×10×16->5×5×16
    with tf.variable_scope('layer4-pool2'):
        pool2 = tf.nn.max_pool(relu2,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

    #將第四層池化層的輸出轉化爲第五層全鏈接層的輸入格式。第四層的輸出爲5×5×16的矩陣,然而第五層全鏈接層須要的輸入格式
    #爲向量,因此咱們須要把表明每張圖片的尺寸爲5×5×16的矩陣拉直成一個長度爲5×5×16的向量。
    #舉例說,每次訓練64張圖片,那麼第四層池化層的輸出的size爲(64,5,5,16),拉直爲向量,nodes=5×5×16=400,尺寸size變爲(64,400)
    pool_shape = pool2.get_shape().as_list()
    nodes = pool_shape[1]*pool_shape[2]*pool_shape[3]
    reshaped = tf.reshape(pool2,[-1,nodes])

    #第五層:全鏈接層,nodes=5×5×16=400,400->120的全鏈接
    #尺寸變化:好比一組訓練樣本爲64,那麼尺寸變化爲64×400->64×120
    #訓練時,引入dropout,dropout在訓練時會隨機將部分節點的輸出改成0,dropout能夠避免過擬合問題。
    #這和模型越簡單越不容易過擬合思想一致,和正則化限制權重的大小,使得模型不能任意擬合訓練數據中的隨機噪聲,以此達到避免過擬合思想一致。
    #本文最後訓練時沒有采用dropout,dropout項傳入參數設置成了False,由於訓練和測試寫在了一塊兒沒有分離,不過你們能夠嘗試。
    with tf.variable_scope('layer5-fc1'):
        fc1_weights = tf.get_variable('weight',[nodes,120],initializer=tf.truncated_normal_initializer(stddev=0.1))
        if regularizer != None:
            tf.add_to_collection('losses',regularizer(fc1_weights))
        fc1_biases = tf.get_variable('bias',[120],initializer=tf.constant_initializer(0.1))
        fc1 = tf.nn.relu(tf.matmul(reshaped,fc1_weights) + fc1_biases)
        if train:
            fc1 = tf.nn.dropout(fc1,0.5)

    #第六層:全鏈接層,120->84的全鏈接
    #尺寸變化:好比一組訓練樣本爲64,那麼尺寸變化爲64×120->64×84
    with tf.variable_scope('layer6-fc2'):
        fc2_weights = tf.get_variable('weight',[120,84],initializer=tf.truncated_normal_initializer(stddev=0.1))
        if regularizer != None:
            tf.add_to_collection('losses',regularizer(fc2_weights))
        fc2_biases = tf.get_variable('bias',[84],initializer=tf.truncated_normal_initializer(stddev=0.1))
        fc2 = tf.nn.relu(tf.matmul(fc1,fc2_weights) + fc2_biases)
        if train:
            fc2 = tf.nn.dropout(fc2,0.5)

    #第七層:全鏈接層(近似表示),84->10的全鏈接
    #尺寸變化:好比一組訓練樣本爲64,那麼尺寸變化爲64×84->64×10。最後,64×10的矩陣通過softmax以後就得出了64張圖片分類於每種數字的機率,
    #即獲得最後的分類結果。
    with tf.variable_scope('layer7-fc3'):
        fc3_weights = tf.get_variable('weight',[84,10],initializer=tf.truncated_normal_initializer(stddev=0.1))
        if regularizer != None:
            tf.add_to_collection('losses',regularizer(fc3_weights))
        fc3_biases = tf.get_variable('bias',[10],initializer=tf.truncated_normal_initializer(stddev=0.1))
        logit = tf.matmul(fc2,fc3_weights) + fc3_biases
    return logit

AlexNet

def inference(images):
  """
  構建一個AlexNet模型

  """
  parameters = []
  # 第一層:卷積層conv1
  with tf.name_scope('conv1') as scope:
    kernel = tf.Variable(tf.truncated_normal([11, 11, 3, 96], dtype=tf.float32,
                                             stddev=1e-1), name='weights')
    conv = tf.nn.conv2d(images, kernel, [1, 4, 4, 1], padding='SAME')
    biases = tf.Variable(tf.constant(0.0, shape=[96], dtype=tf.float32),
                         trainable=True, name='biases')
    bias = tf.nn.bias_add(conv, biases)
    conv1 = tf.nn.relu(bias, name=scope)
    print_activations(conv1)
    parameters += [kernel, biases]


  # 第二層:池化層pool1
    pool1 = tf.nn.max_pool(conv1,
                         ksize=[1, 3, 3, 1],
                         strides=[1, 2, 2, 1],
                         padding='VALID',
                         name='pool1')
    print_activations(pool1)

  # 第三層:卷積層2 conv2
  with tf.name_scope('conv2') as scope:
    kernel = tf.Variable(tf.truncated_normal([5, 5, 96, 256], dtype=tf.float32,
                                             stddev=1e-1), name='weights')
    conv = tf.nn.conv2d(pool1, kernel, [1, 1, 1, 1], padding='SAME')
    biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
                         trainable=True, name='biases')
    bias = tf.nn.bias_add(conv, biases)
    conv2 = tf.nn.relu(bias, name=scope)
    parameters += [kernel, biases]
    print_activations(conv2)

  # 第四層:池化層2 pool2
    pool2 = tf.nn.max_pool(conv2,
                         ksize=[1, 3, 3, 1],
                         strides=[1, 2, 2, 1],
                         padding='VALID',
                         name='pool2')
    print_activations(pool2)

  # 第五層:卷積層3 conv3
  with tf.name_scope('conv3') as scope:
    kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 384],
                                             dtype=tf.float32,
                                             stddev=1e-1), name='weights')
    conv = tf.nn.conv2d(pool2, kernel, [1, 1, 1, 1], padding='SAME')
    biases = tf.Variable(tf.constant(0.0, shape=[384], dtype=tf.float32),
                         trainable=True, name='biases')
    bias = tf.nn.bias_add(conv, biases)
    conv3 = tf.nn.relu(bias, name=scope)
    parameters += [kernel, biases]
    print_activations(conv3)

  # 第六層:卷積層4 conv4
  with tf.name_scope('conv4') as scope:
    kernel = tf.Variable(tf.truncated_normal([3, 3, 384, 384],
                                             dtype=tf.float32,
                                             stddev=1e-1), name='weights')
    conv = tf.nn.conv2d(conv3, kernel, [1, 1, 1, 1], padding='SAME')
    biases = tf.Variable(tf.constant(0.0, shape=[384], dtype=tf.float32),
                         trainable=True, name='biases')
    bias = tf.nn.bias_add(conv, biases)
    conv4 = tf.nn.relu(bias, name=scope)
    parameters += [kernel, biases]
    print_activations(conv4)

  # 第七層:卷積層5 conv5
  with tf.name_scope('conv5') as scope:
    kernel = tf.Variable(tf.truncated_normal([3, 3, 384, 256],
                                             dtype=tf.float32,
                                             stddev=1e-1), name='weights')
    conv = tf.nn.conv2d(conv4, kernel, [1, 1, 1, 1], padding='SAME')
    biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
                         trainable=True, name='biases')
    bias = tf.nn.bias_add(conv, biases)
    conv5 = tf.nn.relu(bias, name=scope)
    parameters += [kernel, biases]
    print_activations(conv5)

  # 第八層:池化層 pool5
    pool5 = tf.nn.max_pool(conv5,
                         ksize=[1, 3, 3, 1],
                         strides=[1, 2, 2, 1],
                         padding='VALID',
                         name='pool5')
    print_activations(pool5)

    return pool5, parameters

Vgg

import tensorflow as tf
import numpy as np
from scipy.misc import imread, imresize
from imagenet_classes import class_names


class vgg16:
    def __init__(self, imgs, weights=None, sess=None):
        self.imgs = imgs
        self.convlayers()
        self.fc_layers()
        self.probs = tf.nn.softmax(self.fc3l)
        if weights is not None and sess is not None:
            self.load_weights(weights, sess)


    def convlayers(self):
        self.parameters = []

        # zero-mean input
        with tf.name_scope('preprocess') as scope:
            mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean')
            images = self.imgs-mean

        # conv1_1
        with tf.name_scope('conv1_1') as scope:
            kernel = tf.Variable(tf.truncated_normal([3, 3, 3, 64], dtype=tf.float32,
                                                     stddev=1e-1), name='weights')
            conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
            biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32),
                                 trainable=True, name='biases')
            out = tf.nn.bias_add(conv, biases)
            self.conv1_1 = tf.nn.relu(out, name=scope)
            self.parameters += [kernel, biases]

        # conv1_2
        with tf.name_scope('conv1_2') as scope:
            kernel = tf.Variable(tf.truncated_normal([3, 3, 64, 64], dtype=tf.float32,
                                                     stddev=1e-1), name='weights')
            conv = tf.nn.conv2d(self.conv1_1, kernel, [1, 1, 1, 1], padding='SAME')
            biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32),
                                 trainable=True, name='biases')
            out = tf.nn.bias_add(conv, biases)
            self.conv1_2 = tf.nn.relu(out, name=scope)
            self.parameters += [kernel, biases]

        # pool1
        self.pool1 = tf.nn.max_pool(self.conv1_2,
                               ksize=[1, 2, 2, 1],
                               strides=[1, 2, 2, 1],
                               padding='SAME',
                               name='pool1')

        # conv2_1
        with tf.name_scope('conv2_1') as scope:
            kernel = tf.Variable(tf.truncated_normal([3, 3, 64, 128], dtype=tf.float32,
                                                     stddev=1e-1), name='weights')
            conv = tf.nn.conv2d(self.pool1, kernel, [1, 1, 1, 1], padding='SAME')
            biases = tf.Variable(tf.constant(0.0, shape=[128], dtype=tf.float32),
                                 trainable=True, name='biases')
            out = tf.nn.bias_add(conv, biases)
            self.conv2_1 = tf.nn.relu(out, name=scope)
            self.parameters += [kernel, biases]

        # conv2_2
        with tf.name_scope('conv2_2') as scope:
            kernel = tf.Variable(tf.truncated_normal([3, 3, 128, 128], dtype=tf.float32,
                                                     stddev=1e-1), name='weights')
            conv = tf.nn.conv2d(self.conv2_1, kernel, [1, 1, 1, 1], padding='SAME')
            biases = tf.Variable(tf.constant(0.0, shape=[128], dtype=tf.float32),
                                 trainable=True, name='biases')
            out = tf.nn.bias_add(conv, biases)
            self.conv2_2 = tf.nn.relu(out, name=scope)
            self.parameters += [kernel, biases]

        # pool2
        self.pool2 = tf.nn.max_pool(self.conv2_2,
                               ksize=[1, 2, 2, 1],
                               strides=[1, 2, 2, 1],
                               padding='SAME',
                               name='pool2')

        # conv3_1
        with tf.name_scope('conv3_1') as scope:
            kernel = tf.Variable(tf.truncated_normal([3, 3, 128, 256], dtype=tf.float32,
                                                     stddev=1e-1), name='weights')
            conv = tf.nn.conv2d(self.pool2, kernel, [1, 1, 1, 1], padding='SAME')
            biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
                                 trainable=True, name='biases')
            out = tf.nn.bias_add(conv, biases)
            self.conv3_1 = tf.nn.relu(out, name=scope)
            self.parameters += [kernel, biases]

        # conv3_2
        with tf.name_scope('conv3_2') as scope:
            kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 256], dtype=tf.float32,
                                                     stddev=1e-1), name='weights')
            conv = tf.nn.conv2d(self.conv3_1, kernel, [1, 1, 1, 1], padding='SAME')
            biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
                                 trainable=True, name='biases')
            out = tf.nn.bias_add(conv, biases)
            self.conv3_2 = tf.nn.relu(out, name=scope)
            self.parameters += [kernel, biases]

        # conv3_3
        with tf.name_scope('conv3_3') as scope:
            kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 256], dtype=tf.float32,
                                                     stddev=1e-1), name='weights')
            conv = tf.nn.conv2d(self.conv3_2, kernel, [1, 1, 1, 1], padding='SAME')
            biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
                                 trainable=True, name='biases')
            out = tf.nn.bias_add(conv, biases)
            self.conv3_3 = tf.nn.relu(out, name=scope)
            self.parameters += [kernel, biases]

        # pool3
        self.pool3 = tf.nn.max_pool(self.conv3_3,
                               ksize=[1, 2, 2, 1],
                               strides=[1, 2, 2, 1],
                               padding='SAME',
                               name='pool3')

        # conv4_1
        with tf.name_scope('conv4_1') as scope:
            kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 512], dtype=tf.float32,
                                                     stddev=1e-1), name='weights')
            conv = tf.nn.conv2d(self.pool3, kernel, [1, 1, 1, 1], padding='SAME')
            biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
                                 trainable=True, name='biases')
            out = tf.nn.bias_add(conv, biases)
            self.conv4_1 = tf.nn.relu(out, name=scope)
            self.parameters += [kernel, biases]

        # conv4_2
        with tf.name_scope('conv4_2') as scope:
            kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32,
                                                     stddev=1e-1), name='weights')
            conv = tf.nn.conv2d(self.conv4_1, kernel, [1, 1, 1, 1], padding='SAME')
            biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
                                 trainable=True, name='biases')
            out = tf.nn.bias_add(conv, biases)
            self.conv4_2 = tf.nn.relu(out, name=scope)
            self.parameters += [kernel, biases]

        # conv4_3
        with tf.name_scope('conv4_3') as scope:
            kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32,
                                                     stddev=1e-1), name='weights')
            conv = tf.nn.conv2d(self.conv4_2, kernel, [1, 1, 1, 1], padding='SAME')
            biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
                                 trainable=True, name='biases')
            out = tf.nn.bias_add(conv, biases)
            self.conv4_3 = tf.nn.relu(out, name=scope)
            self.parameters += [kernel, biases]

        # pool4
        self.pool4 = tf.nn.max_pool(self.conv4_3,
                               ksize=[1, 2, 2, 1],
                               strides=[1, 2, 2, 1],
                               padding='SAME',
                               name='pool4')

        # conv5_1
        with tf.name_scope('conv5_1') as scope:
            kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32,
                                                     stddev=1e-1), name='weights')
            conv = tf.nn.conv2d(self.pool4, kernel, [1, 1, 1, 1], padding='SAME')
            biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
                                 trainable=True, name='biases')
            out = tf.nn.bias_add(conv, biases)
            self.conv5_1 = tf.nn.relu(out, name=scope)
            self.parameters += [kernel, biases]

        # conv5_2
        with tf.name_scope('conv5_2') as scope:
            kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32,
                                                     stddev=1e-1), name='weights')
            conv = tf.nn.conv2d(self.conv5_1, kernel, [1, 1, 1, 1], padding='SAME')
            biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
                                 trainable=True, name='biases')
            out = tf.nn.bias_add(conv, biases)
            self.conv5_2 = tf.nn.relu(out, name=scope)
            self.parameters += [kernel, biases]

        # conv5_3
        with tf.name_scope('conv5_3') as scope:
            kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32,
                                                     stddev=1e-1), name='weights')
            conv = tf.nn.conv2d(self.conv5_2, kernel, [1, 1, 1, 1], padding='SAME')
            biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
                                 trainable=True, name='biases')
            out = tf.nn.bias_add(conv, biases)
            self.conv5_3 = tf.nn.relu(out, name=scope)
            self.parameters += [kernel, biases]

        # pool5
        self.pool5 = tf.nn.max_pool(self.conv5_3,
                               ksize=[1, 2, 2, 1],
                               strides=[1, 2, 2, 1],
                               padding='SAME',
                               name='pool4')

    def fc_layers(self):
        # fc1
        with tf.name_scope('fc1') as scope:
            shape = int(np.prod(self.pool5.get_shape()[1:]))
            fc1w = tf.Variable(tf.truncated_normal([shape, 4096],
                                                         dtype=tf.float32,
                                                         stddev=1e-1), name='weights')
            fc1b = tf.Variable(tf.constant(1.0, shape=[4096], dtype=tf.float32),
                                 trainable=True, name='biases')
            pool5_flat = tf.reshape(self.pool5, [-1, shape])
            fc1l = tf.nn.bias_add(tf.matmul(pool5_flat, fc1w), fc1b)
            self.fc1 = tf.nn.relu(fc1l)
            self.parameters += [fc1w, fc1b]

        # fc2
        with tf.name_scope('fc2') as scope:
            fc2w = tf.Variable(tf.truncated_normal([4096, 4096],
                                                         dtype=tf.float32,
                                                         stddev=1e-1), name='weights')
            fc2b = tf.Variable(tf.constant(1.0, shape=[4096], dtype=tf.float32),
                                 trainable=True, name='biases')
            fc2l = tf.nn.bias_add(tf.matmul(self.fc1, fc2w), fc2b)
            self.fc2 = tf.nn.relu(fc2l)
            self.parameters += [fc2w, fc2b]

        # fc3
        with tf.name_scope('fc3') as scope:
            fc3w = tf.Variable(tf.truncated_normal([4096, 1000],
                                                         dtype=tf.float32,
                                                         stddev=1e-1), name='weights')
            fc3b = tf.Variable(tf.constant(1.0, shape=[1000], dtype=tf.float32),
                                 trainable=True, name='biases')
            self.fc3l = tf.nn.bias_add(tf.matmul(self.fc2, fc3w), fc3b)
            self.parameters += [fc3w, fc3b]

    def load_weights(self, weight_file, sess):
        weights = np.load(weight_file)
        keys = sorted(weights.keys())
        for i, k in enumerate(keys):
            print (i, k, np.shape(weights[k]))
            sess.run(self.parameters[i].assign(weights[k]))

if __name__ == '__main__':
    sess = tf.Session()
    imgs = tf.placeholder(tf.float32, [None, 224, 224, 3])
    vgg = vgg16(imgs, 'vgg16_weights.npz', sess)

    img1 = imread('laska.png', mode='RGB')
    img1 = imresize(img1, (224, 224))

    prob = sess.run(vgg.probs, feed_dict={vgg.imgs: [img1]})[0]
    preds = (np.argsort(prob)[::-1])[0:5]
    for p in preds:
        print( class_names[p], prob[p])

GoogLeNet

########定義函數生成網絡中常常用到的函數的默認參數########
# 默認參數:卷積的激活函數、權重初始化方式、標準化器等
def inception_v3_arg_scope(weight_decay=0.00004,  # 設置L2正則的weight_decay
                           stddev=0.1, # 標準差默認值0.1
                           batch_norm_var_collection='moving_vars'):

    batch_norm_params = {  # 定義batch normalization(標準化)的參數字典
      'decay': 0.9997,  # 定義參數衰減係數
      'epsilon': 0.001,  
      'updates_collections': tf.GraphKeys.UPDATE_OPS,
      'variables_collections': {
          'beta': None,
          'gamma': None,
          'moving_mean': [batch_norm_var_collection],
          'moving_variance': [batch_norm_var_collection],
      }
  }

    with slim.arg_scope([slim.conv2d, slim.fully_connected], # 給函數的參數自動賦予某些默認值
                      weights_regularizer=slim.l2_regularizer(weight_decay)): # 對[slim.conv2d, slim.fully_connected]自動賦值
  # 使用slim.arg_scope後就不須要每次都重複設置參數了,只須要在有修改時設置
        with slim.arg_scope( # 嵌套一個slim.arg_scope對卷積層生成函數slim.conv2d的幾個參數賦予默認值
            [slim.conv2d],
            weights_initializer=trunc_normal(stddev), # 權重初始化器
            activation_fn=tf.nn.relu, # 激活函數
            normalizer_fn=slim.batch_norm, # 標準化器
            normalizer_params=batch_norm_params) as sc: # 標準化器的參數設置爲前面定義的batch_norm_params
        return sc # 最後返回定義好的scope


########定義函數能夠生成Inception V3網絡的卷積部分########
def inception_v3_base(inputs, scope=None):
  '''
  Args:
  inputs:輸入的tensor
  scope:包含了函數默認參數的環境
  '''
  end_points = {} # 定義一個字典表保存某些關鍵節點供以後使用

    with tf.variable_scope(scope, 'InceptionV3', [inputs]):
        with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], # 對三個參數設置默認值
                            stride=1, padding='VALID'):
          # 正式定義Inception V3的網絡結構。首先是前面的非Inception Module的卷積層
          # 299 x 299 x 3
          # 第一個參數爲輸入的tensor,第二個是輸出的通道數,卷積核尺寸,步長stride,padding模式
              net = slim.conv2d(inputs, 32, [3, 3], stride=2, scope='Conv2d_1a_3x3') # 直接使用slim.conv2d建立卷積層
              # 149 x 149 x 32
              '''
              由於使用了slim以及slim.arg_scope,咱們一行代碼就能夠定義好一個卷積層
              相比AlexNet使用好幾行代碼定義一個卷積層,或是VGGNet中專門寫一個函數定義卷積層,都更加方便
              '''
              net = slim.conv2d(net, 32, [3, 3], scope='Conv2d_2a_3x3')
              # 147 x 147 x 32
              net = slim.conv2d(net, 64, [3, 3], padding='SAME', scope='Conv2d_2b_3x3')
              # 147 x 147 x 64
              net = slim.max_pool2d(net, [3, 3], stride=2, scope='MaxPool_3a_3x3')
              # 73 x 73 x 64
              net = slim.conv2d(net, 80, [1, 1], scope='Conv2d_3b_1x1')
              # 73 x 73 x 80.
              net = slim.conv2d(net, 192, [3, 3], scope='Conv2d_4a_3x3')
              # 71 x 71 x 192.
              net = slim.max_pool2d(net, [3, 3], stride=2, scope='MaxPool_5a_3x3')
          # 35 x 35 x 192.

          # 上面部分代碼一共有5個卷積層,2個池化層,實現了對圖片數據的尺寸壓縮,並對圖片特徵進行了抽象

        '''
        三個連續的Inception模塊組,三個Inception模塊組中各自分別有多個Inception Module,這部分是Inception Module V3
        的精華所在。每一個Inception模塊組內部的幾個Inception Mdoule結構很是類似,可是存在一些細節的不一樣
        '''
        # Inception blocks
        with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], # 設置全部模塊組的默認參數
                            stride=1, padding='SAME'): # 將全部卷積層、最大池化、平均池化層步長都設置爲1
          # mixed: 35 x 35 x 256.
          # 第一個模塊組包含了三個結構相似的Inception Module
              with tf.variable_scope('Mixed_5b'): # 第一個Inception Module名稱。Inception Module有四個分支
                with tf.variable_scope('Branch_0'): # 第一個分支64通道的1*1卷積
                    branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
                with tf.variable_scope('Branch_1'): # 第二個分支48通道1*1卷積,連接一個64通道的5*5卷積
                    branch_1 = slim.conv2d(net, 48, [1, 1], scope='Conv2d_0a_1x1')
                    branch_1 = slim.conv2d(branch_1, 64, [5, 5], scope='Conv2d_0b_5x5')
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
                    branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3')
                    branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0c_3x3')
                with tf.variable_scope('Branch_3'): # 第四個分支爲3*3的平均池化,鏈接32通道的1*1卷積
                    branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
                    branch_3 = slim.conv2d(branch_3, 32, [1, 1], scope='Conv2d_0b_1x1')
                net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3) # 將四個分支的輸出合併在一塊兒(第三個維度合併,即輸出通道上合併)

          '''
          由於這裏全部層步長均爲1,而且padding模式爲SAME,因此圖片尺寸不會縮小,可是通道數增長了。四個分支通道數之和
          64+64+96+32=256,最終輸出的tensor的圖片尺寸爲35*35*256。
          第一個模塊組全部Inception Module輸出圖片尺寸都是35*35,可是後兩個輸出通道數會發生變化。
          '''

          # mixed_1: 35 x 35 x 288.
        with tf.variable_scope('Mixed_5c'):
            with tf.variable_scope('Branch_0'):
                branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
            with tf.variable_scope('Branch_1'):
                branch_1 = slim.conv2d(net, 48, [1, 1], scope='Conv2d_0b_1x1')
                branch_1 = slim.conv2d(branch_1, 64, [5, 5], scope='Conv_1_0c_5x5')
            with tf.variable_scope('Branch_2'):
                branch_2 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
                branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3')
                branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0c_3x3')
            with tf.variable_scope('Branch_3'):
                branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
                branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
            net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)

          # mixed_2: 35 x 35 x 288.
          with tf.variable_scope('Mixed_5d'):
            with tf.variable_scope('Branch_0'):
                branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
            with tf.variable_scope('Branch_1'):
                branch_1 = slim.conv2d(net, 48, [1, 1], scope='Conv2d_0a_1x1')
                branch_1 = slim.conv2d(branch_1, 64, [5, 5], scope='Conv2d_0b_5x5')
            with tf.variable_scope('Branch_2'):
                branch_2 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
                branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3')
                branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0c_3x3')
            with tf.variable_scope('Branch_3'):
                branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
                branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
            net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)

          # 第二個Inception模塊組。第二個到第五個Inception Module結構類似。
          # mixed_3: 17 x 17 x 768.
        with tf.variable_scope('Mixed_6a'):
            with tf.variable_scope('Branch_0'):
                branch_0 = slim.conv2d(net, 384, [3, 3], stride=2,
                                     padding='VALID', scope='Conv2d_1a_1x1') # 圖片會被壓縮
            with tf.variable_scope('Branch_1'):
                branch_1 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
                branch_1 = slim.conv2d(branch_1, 96, [3, 3], scope='Conv2d_0b_3x3')
                branch_1 = slim.conv2d(branch_1, 96, [3, 3], stride=2,
                                     padding='VALID', scope='Conv2d_1a_1x1') # 圖片被壓縮
            with tf.variable_scope('Branch_2'):
                branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
                                         scope='MaxPool_1a_3x3')
            net = tf.concat([branch_0, branch_1, branch_2], 3) # 輸出尺寸定格在17 x 17 x 768

          # mixed4: 17 x 17 x 768.
          with tf.variable_scope('Mixed_6b'):
            with tf.variable_scope('Branch_0'):
                branch_0 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')
            with tf.variable_scope('Branch_1'):
                branch_1 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')
                branch_1 = slim.conv2d(branch_1, 128, [1, 7], scope='Conv2d_0b_1x7') # 串聯1*7卷積和7*1卷積合成7*7卷積,減小了參數,減輕了過擬合
                branch_1 = slim.conv2d(branch_1, 192, [7, 1], scope='Conv2d_0c_7x1')
            with tf.variable_scope('Branch_2'): 
                branch_2 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1') # 反覆將7*7卷積拆分
                branch_2 = slim.conv2d(branch_2, 128, [7, 1], scope='Conv2d_0b_7x1') 
                branch_2 = slim.conv2d(branch_2, 128, [1, 7], scope='Conv2d_0c_1x7')
                branch_2 = slim.conv2d(branch_2, 128, [7, 1], scope='Conv2d_0d_7x1')
                branch_2 = slim.conv2d(branch_2, 192, [1, 7], scope='Conv2d_0e_1x7')
            with tf.variable_scope('Branch_3'):
                branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
                branch_3 = slim.conv2d(branch_3, 192, [1, 1], scope='Conv2d_0b_1x1')
            net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)

          # mixed_5: 17 x 17 x 768.
          with tf.variable_scope('Mixed_6c'):
            with tf.variable_scope('Branch_0'):
              '''
              咱們的網絡每通過一個inception module,即便輸出尺寸不變,可是特徵都至關於被從新精煉了一遍,
              其中豐富的卷積和非線性化對提高網絡性能幫助很大。
              '''
              branch_0 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')
            with tf.variable_scope('Branch_1'):
                branch_1 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')
                branch_1 = slim.conv2d(branch_1, 160, [1, 7], scope='Conv2d_0b_1x7')
                branch_1 = slim.conv2d(branch_1, 192, [7, 1], scope='Conv2d_0c_7x1')
            with tf.variable_scope('Branch_2'):
                branch_2 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')
                branch_2 = slim.conv2d(branch_2, 160, [7, 1], scope='Conv2d_0b_7x1')
                branch_2 = slim.conv2d(branch_2, 160, [1, 7], scope='Conv2d_0c_1x7')
                branch_2 = slim.conv2d(branch_2, 160, [7, 1], scope='Conv2d_0d_7x1')
                branch_2 = slim.conv2d(branch_2, 192, [1, 7], scope='Conv2d_0e_1x7')
            with tf.variable_scope('Branch_3'):
                branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
                branch_3 = slim.conv2d(branch_3, 192, [1, 1], scope='Conv2d_0b_1x1')
            net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)
          # mixed_6: 17 x 17 x 768.
          with tf.variable_scope('Mixed_6d'):
            with tf.variable_scope('Branch_0'):
                branch_0 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')
            with tf.variable_scope('Branch_1'):
                branch_1 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')
                branch_1 = slim.conv2d(branch_1, 160, [1, 7], scope='Conv2d_0b_1x7')
                branch_1 = slim.conv2d(branch_1, 192, [7, 1], scope='Conv2d_0c_7x1')
            with tf.variable_scope('Branch_2'):
                branch_2 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')
                branch_2 = slim.conv2d(branch_2, 160, [7, 1], scope='Conv2d_0b_7x1')
                branch_2 = slim.conv2d(branch_2, 160, [1, 7], scope='Conv2d_0c_1x7')
                branch_2 = slim.conv2d(branch_2, 160, [7, 1], scope='Conv2d_0d_7x1')
                branch_2 = slim.conv2d(branch_2, 192, [1, 7], scope='Conv2d_0e_1x7')
            with tf.variable_scope('Branch_3'):
                branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
                branch_3 = slim.conv2d(branch_3, 192, [1, 1], scope='Conv2d_0b_1x1')
            net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)

          # mixed_7: 17 x 17 x 768.
          with tf.variable_scope('Mixed_6e'):
            with tf.variable_scope('Branch_0'):
                branch_0 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')
            with tf.variable_scope('Branch_1'):
                branch_1 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')
                branch_1 = slim.conv2d(branch_1, 192, [1, 7], scope='Conv2d_0b_1x7')
                branch_1 = slim.conv2d(branch_1, 192, [7, 1], scope='Conv2d_0c_7x1')
            with tf.variable_scope('Branch_2'):
                branch_2 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')
                branch_2 = slim.conv2d(branch_2, 192, [7, 1], scope='Conv2d_0b_7x1')
                branch_2 = slim.conv2d(branch_2, 192, [1, 7], scope='Conv2d_0c_1x7')
                branch_2 = slim.conv2d(branch_2, 192, [7, 1], scope='Conv2d_0d_7x1')
                branch_2 = slim.conv2d(branch_2, 192, [1, 7], scope='Conv2d_0e_1x7')
            with tf.variable_scope('Branch_3'):
                branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
                branch_3 = slim.conv2d(branch_3, 192, [1, 1], scope='Conv2d_0b_1x1')
            net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)
        end_points['Mixed_6e'] = net # 將Mixed_6e存儲於end_points中,做爲Auxiliary Classifier輔助模型的分類

          # 第三個inception模塊組包含了三個inception module
          # mixed_8: 8 x 8 x 1280.
         with tf.variable_scope('Mixed_7a'):
            with tf.variable_scope('Branch_0'):
                branch_0 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')
                branch_0 = slim.conv2d(branch_0, 320, [3, 3], stride=2,
                                     padding='VALID', scope='Conv2d_1a_3x3') # 壓縮圖片
            with tf.variable_scope('Branch_1'):
                branch_1 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')
                branch_1 = slim.conv2d(branch_1, 192, [1, 7], scope='Conv2d_0b_1x7')
                branch_1 = slim.conv2d(branch_1, 192, [7, 1], scope='Conv2d_0c_7x1')
                branch_1 = slim.conv2d(branch_1, 192, [3, 3], stride=2,
                                     padding='VALID', scope='Conv2d_1a_3x3')
            with tf.variable_scope('Branch_2'): # 池化層不會對輸出通道數產生改變
                branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
                                         scope='MaxPool_1a_3x3')
            net = tf.concat([branch_0, branch_1, branch_2], 3) # 輸出圖片尺寸被縮小,通道數增長,tensor的總size在持續降低中
          # mixed_9: 8 x 8 x 2048.
          with tf.variable_scope('Mixed_7b'):
            with tf.variable_scope('Branch_0'):
                branch_0 = slim.conv2d(net, 320, [1, 1], scope='Conv2d_0a_1x1')
            with tf.variable_scope('Branch_1'):
                branch_1 = slim.conv2d(net, 384, [1, 1], scope='Conv2d_0a_1x1')
                branch_1 = tf.concat([
                  slim.conv2d(branch_1, 384, [1, 3], scope='Conv2d_0b_1x3'),
                  slim.conv2d(branch_1, 384, [3, 1], scope='Conv2d_0b_3x1')], 3)
            with tf.variable_scope('Branch_2'):
                branch_2 = slim.conv2d(net, 448, [1, 1], scope='Conv2d_0a_1x1')
                branch_2 = slim.conv2d(
                  branch_2, 384, [3, 3], scope='Conv2d_0b_3x3')
                branch_2 = tf.concat([
                  slim.conv2d(branch_2, 384, [1, 3], scope='Conv2d_0c_1x3'),
                  slim.conv2d(branch_2, 384, [3, 1], scope='Conv2d_0d_3x1')], 3)
            with tf.variable_scope('Branch_3'):
                branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
                branch_3 = slim.conv2d(
                  branch_3, 192, [1, 1], scope='Conv2d_0b_1x1')
            net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3) # 輸出通道數增長到2048

          # mixed_10: 8 x 8 x 2048.
          with tf.variable_scope('Mixed_7c'):
            with tf.variable_scope('Branch_0'):
                branch_0 = slim.conv2d(net, 320, [1, 1], scope='Conv2d_0a_1x1')
            with tf.variable_scope('Branch_1'):
                branch_1 = slim.conv2d(net, 384, [1, 1], scope='Conv2d_0a_1x1')
                branch_1 = tf.concat([
                slim.conv2d(branch_1, 384, [1, 3], scope='Conv2d_0b_1x3'),
                slim.conv2d(branch_1, 384, [3, 1], scope='Conv2d_0c_3x1')], 3)
            with tf.variable_scope('Branch_2'):
                branch_2 = slim.conv2d(net, 448, [1, 1], scope='Conv2d_0a_1x1')
                branch_2 = slim.conv2d(
                branch_2, 384, [3, 3], scope='Conv2d_0b_3x3')
                branch_2 = tf.concat([
                slim.conv2d(branch_2, 384, [1, 3], scope='Conv2d_0c_1x3'),
                slim.conv2d(branch_2, 384, [3, 1], scope='Conv2d_0d_3x1')], 3)
            with tf.variable_scope('Branch_3'):
                branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
                branch_3 = slim.conv2d(
                branch_3, 192, [1, 1], scope='Conv2d_0b_1x1')
            net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)
        return net, end_points
          #Inception V3網絡的核心部分,即卷積層部分就完成了
          '''
          設計inception net的重要原則是圖片尺寸不斷縮小,inception模塊組的目的都是將空間結構簡化,同時將空間信息轉化爲
          高階抽象的特徵信息,即將空間維度轉爲通道的維度。下降了計算量。Inception Module是經過組合比較簡單的特徵
          抽象(分支1)、比較比較複雜的特徵抽象(分支2和分支3)和一個簡化結構的池化層(分支4),一共四種不一樣程度的
          特徵抽象和變換來有選擇地保留不一樣層次的高階特徵,這樣最大程度地豐富網絡的表達能力。
      '''


########全局平均池化、Softmax和Auxiliary Logits########
def inception_v3(inputs,
                 num_classes=1000, # 最後須要分類的數量(比賽數據集的種類數)
                 is_training=True, # 標誌是否爲訓練過程,只有在訓練時Batch normalization和Dropout纔會啓用
                 dropout_keep_prob=0.8, # 節點保留比率
                 prediction_fn=slim.softmax, # 最後用來分類的函數
                 spatial_squeeze=True, # 參數標誌是否對輸出進行squeeze操做(去除維度數爲1的維度,好比5*3*1轉爲5*3)
                 reuse=None, # 是否對網絡和Variable進行重複使用
                 scope='InceptionV3'): # 包含函數默認參數的環境

    with tf.variable_scope(scope, 'InceptionV3', [inputs, num_classes], # 定義參數默認值
                         reuse=reuse) as scope:
        with slim.arg_scope([slim.batch_norm, slim.dropout], # 定義標誌默認值
                        is_training=is_training):
      # 拿到最後一層的輸出net和重要節點的字典表end_points
          net, end_points = inception_v3_base(inputs, scope=scope) # 用定義好的函數構築整個網絡的卷積部分

          # Auxiliary Head logits做爲輔助分類的節點,對分類結果預測有很大幫助
            with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                              stride=1, padding='SAME'): # 將卷積、最大池化、平均池化步長設置爲1
            aux_logits = end_points['Mixed_6e'] # 經過end_points取到Mixed_6e
            with tf.variable_scope('AuxLogits'):
                aux_logits = slim.avg_pool2d(
                    aux_logits, [5, 5], stride=3, padding='VALID', # 在Mixed_6e以後接平均池化。壓縮圖像尺寸
                    scope='AvgPool_1a_5x5')
                aux_logits = slim.conv2d(aux_logits, 128, [1, 1], # 卷積。壓縮圖像尺寸。
                                       scope='Conv2d_1b_1x1')

              # Shape of feature map before the final layer.
                aux_logits = slim.conv2d(
                  aux_logits, 768, [5,5],
                  weights_initializer=trunc_normal(0.01), # 權重初始化方式重設爲標準差爲0.01的正態分佈
                  padding='VALID', scope='Conv2d_2a_5x5')
                aux_logits = slim.conv2d(
                  aux_logits, num_classes, [1, 1], activation_fn=None,
                  normalizer_fn=None, weights_initializer=trunc_normal(0.001), # 輸出變爲1*1*1000
                  scope='Conv2d_2b_1x1')
                if spatial_squeeze: # tf.squeeze消除tensor中前兩個爲1的維度。
                    aux_logits = tf.squeeze(aux_logits, [1, 2], name='SpatialSqueeze')
                end_points['AuxLogits'] = aux_logits # 最後將輔助分類節點的輸出aux_logits儲存到字典表end_points中

          # 處理正常的分類預測邏輯
          # Final pooling and prediction
             with tf.variable_scope('Logits'):
                net = slim.avg_pool2d(net, [8, 8], padding='VALID',
                                      scope='AvgPool_1a_8x8')
                # 1 x 1 x 2048
                net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b')
                end_points['PreLogits'] = net
                # 2048
                logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, # 輸出通道數1000
                                     normalizer_fn=None, scope='Conv2d_1c_1x1') # 激活函數和規範化函數設爲空
            if spatial_squeeze: # tf.squeeze去除輸出tensor中維度爲1的節點
                logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
            # 1000
            end_points['Logits'] = logits
            end_points['Predictions'] = prediction_fn(logits, scope='Predictions') # Softmax對結果進行分類預測
    return logits, end_points # 最後返回logits和包含輔助節點的end_points

ResNet

class ResNet(object):

    def __init__(self, hps, images, labels, mode):
    
        self.hps = hps
        self._images = images
        self.labels = labels
        self.mode = mode

        self._extra_train_ops = []

  # 構建模型圖
   def build_graph(self):
    # 新建全局step
    self.global_step = tf.contrib.framework.get_or_create_global_step()
    # 構建ResNet網絡模型
    self._build_model()
    # 構建優化訓練操做
    if self.mode == 'train':
        self._build_train_op()
    # 合併全部總結
    self.summaries = tf.summary.merge_all()


  # 構建模型
  def _build_model(self):
    with tf.variable_scope('init'):
        x = self._images
        """第一層卷積(3,3x3/1,16)"""
        x = self._conv('init_conv', x, 3, 3, 16, self._stride_arr(1))

    # 殘差網絡參數
    strides = [1, 2, 2]
    # 激活前置
    activate_before_residual = [True, False, False]
    if self.hps.use_bottleneck:
      # bottleneck殘差單元模塊
      res_func = self._bottleneck_residual
      # 通道數量
      filters = [16, 64, 128, 256]
    else:
      # 標準殘差單元模塊
      res_func = self._residual
      # 通道數量
      filters = [16, 16, 32, 64]

    # 第一組
    with tf.variable_scope('unit_1_0'):
        x = res_func(x, filters[0], filters[1], 
                   self._stride_arr(strides[0]),
                   activate_before_residual[0])
    for i in six.moves.range(1, self.hps.num_residual_units):
        with tf.variable_scope('unit_1_%d' % i):
            x = res_func(x, filters[1], filters[1], self._stride_arr(1), False)

    # 第二組
    with tf.variable_scope('unit_2_0'):
        x = res_func(x, filters[1], filters[2], 
                   self._stride_arr(strides[1]),
                   activate_before_residual[1])
    for i in six.moves.range(1, self.hps.num_residual_units):
        with tf.variable_scope('unit_2_%d' % i):
            x = res_func(x, filters[2], filters[2], self._stride_arr(1), False)
        
    # 第三組
    with tf.variable_scope('unit_3_0'):
        x = res_func(x, filters[2], filters[3], self._stride_arr(strides[2]),
                   activate_before_residual[2])
    for i in six.moves.range(1, self.hps.num_residual_units):
        with tf.variable_scope('unit_3_%d' % i):
            x = res_func(x, filters[3], filters[3], self._stride_arr(1), False)

    # 全局池化層
    with tf.variable_scope('unit_last'):
        x = self._batch_norm('final_bn', x)
        x = self._relu(x, self.hps.relu_leakiness)
        x = self._global_avg_pool(x)

    # 全鏈接層 + Softmax
    with tf.variable_scope('logit'):
        logits = self._fully_connected(x, self.hps.num_classes)
        self.predictions = tf.nn.softmax(logits)

    # 構建損失函數
    with tf.variable_scope('costs'):
      # 交叉熵
      xent = tf.nn.softmax_cross_entropy_with_logits(
          logits=logits, labels=self.labels)
      # 加和
      self.cost = tf.reduce_mean(xent, name='xent')
      # L2正則,權重衰減
      self.cost += self._decay()
      # 添加cost總結,用於Tensorborad顯示
      tf.summary.scalar('cost', self.cost)

  # 構建訓練操做
  def _build_train_op(self):
    # 學習率/步長
    self.lrn_rate = tf.constant(self.hps.lrn_rate, tf.float32)
    tf.summary.scalar('learning_rate', self.lrn_rate)

    # 計算訓練參數的梯度
    trainable_variables = tf.trainable_variables()
    grads = tf.gradients(self.cost, trainable_variables)

    # 設置優化方法
    if self.hps.optimizer == 'sgd':
        optimizer = tf.train.GradientDescentOptimizer(self.lrn_rate)
    elif self.hps.optimizer == 'mom':
        optimizer = tf.train.MomentumOptimizer(self.lrn_rate, 0.9)

    # 梯度優化操做
    apply_op = optimizer.apply_gradients(
                        zip(grads, trainable_variables),
                        global_step=self.global_step, 
                        name='train_step')
    
    # 合併BN更新操做
    train_ops = [apply_op] + self._extra_train_ops
    # 創建優化操做組
    self.train_op = tf.group(*train_ops)


  # 把步長值轉換成tf.nn.conv2d須要的步長數組
  def _stride_arr(self, stride):    
    return [1, stride, stride, 1]

  # 殘差單元模塊
  def _residual(self, x, in_filter, out_filter, stride, activate_before_residual=False):
    # 是否前置激活(取殘差直連以前進行BN和ReLU)
    if activate_before_residual:
      with tf.variable_scope('shared_activation'):
        # 先作BN和ReLU激活
        x = self._batch_norm('init_bn', x)
        x = self._relu(x, self.hps.relu_leakiness)
        # 獲取殘差直連
        orig_x = x
    else:
      with tf.variable_scope('residual_only_activation'):
        # 獲取殘差直連
        orig_x = x
        # 後作BN和ReLU激活
        x = self._batch_norm('init_bn', x)
        x = self._relu(x, self.hps.relu_leakiness)

    # 第1子層
    with tf.variable_scope('sub1'):
      # 3x3卷積,使用輸入步長,通道數(in_filter -> out_filter)
      x = self._conv('conv1', x, 3, in_filter, out_filter, stride)

    # 第2子層
    with tf.variable_scope('sub2'):
      # BN和ReLU激活
      x = self._batch_norm('bn2', x)
      x = self._relu(x, self.hps.relu_leakiness)
      # 3x3卷積,步長爲1,通道數不變(out_filter)
      x = self._conv('conv2', x, 3, out_filter, out_filter, [1, 1, 1, 1])
    
    # 合併殘差層
    with tf.variable_scope('sub_add'):
      # 當通道數有變化時
      if in_filter != out_filter:
        # 均值池化,無補零
        orig_x = tf.nn.avg_pool(orig_x, stride, stride, 'VALID')
        # 通道補零(第4維先後對稱補零)
        orig_x = tf.pad(orig_x, 
                        [[0, 0], 
                         [0, 0], 
                         [0, 0],
                         [(out_filter-in_filter)//2, (out_filter-in_filter)//2]
                        ])
      # 合併殘差
      x += orig_x

    tf.logging.debug('image after unit %s', x.get_shape())
    return x

  # bottleneck殘差單元模塊
  def _bottleneck_residual(self, x, in_filter, out_filter, stride,
                           activate_before_residual=False):
    # 是否前置激活(取殘差直連以前進行BN和ReLU)
    if activate_before_residual:
      with tf.variable_scope('common_bn_relu'):
        # 先作BN和ReLU激活
        x = self._batch_norm('init_bn', x)
        x = self._relu(x, self.hps.relu_leakiness)
        # 獲取殘差直連
        orig_x = x
    else:
      with tf.variable_scope('residual_bn_relu'):
        # 獲取殘差直連
        orig_x = x
        # 後作BN和ReLU激活
        x = self._batch_norm('init_bn', x)
        x = self._relu(x, self.hps.relu_leakiness)

    # 第1子層
    with tf.variable_scope('sub1'):
      # 1x1卷積,使用輸入步長,通道數(in_filter -> out_filter/4)
      x = self._conv('conv1', x, 1, in_filter, out_filter/4, stride)

    # 第2子層
    with tf.variable_scope('sub2'):
      # BN和ReLU激活
      x = self._batch_norm('bn2', x)
      x = self._relu(x, self.hps.relu_leakiness)
      # 3x3卷積,步長爲1,通道數不變(out_filter/4)
      x = self._conv('conv2', x, 3, out_filter/4, out_filter/4, [1, 1, 1, 1])

    # 第3子層
    with tf.variable_scope('sub3'):
      # BN和ReLU激活
      x = self._batch_norm('bn3', x)
      x = self._relu(x, self.hps.relu_leakiness)
      # 1x1卷積,步長爲1,通道數不變(out_filter/4 -> out_filter)
      x = self._conv('conv3', x, 1, out_filter/4, out_filter, [1, 1, 1, 1])

    # 合併殘差層
    with tf.variable_scope('sub_add'):
      # 當通道數有變化時
      if in_filter != out_filter:
        # 1x1卷積,使用輸入步長,通道數(in_filter -> out_filter)
        orig_x = self._conv('project', orig_x, 1, in_filter, out_filter, stride)
      
      # 合併殘差
      x += orig_x

    tf.logging.info('image after unit %s', x.get_shape())
    return x


  # Batch Normalization批歸一化
  # ((x-mean)/var)*gamma+beta
  def _batch_norm(self, name, x):
    with tf.variable_scope(name):
      # 輸入通道維數
      params_shape = [x.get_shape()[-1]]
      # offset
      beta = tf.get_variable('beta', 
                             params_shape, 
                             tf.float32,
                             initializer=tf.constant_initializer(0.0, tf.float32))
      # scale
      gamma = tf.get_variable('gamma', 
                              params_shape, 
                              tf.float32,
                              initializer=tf.constant_initializer(1.0, tf.float32))

      if self.mode == 'train':
        # 爲每一個通道計算均值、標準差
        mean, variance = tf.nn.moments(x, [0, 1, 2], name='moments')
        # 新建或創建測試階段使用的batch均值、標準差
        moving_mean = tf.get_variable('moving_mean', 
                                      params_shape, tf.float32,
                                      initializer=tf.constant_initializer(0.0, tf.float32),
                                      trainable=False)
        moving_variance = tf.get_variable('moving_variance', 
                                          params_shape, tf.float32,
                                          initializer=tf.constant_initializer(1.0, tf.float32),
                                          trainable=False)
        # 添加batch均值和標準差的更新操做(滑動平均)
        # moving_mean = moving_mean * decay + mean * (1 - decay)
        # moving_variance = moving_variance * decay + variance * (1 - decay)
        self._extra_train_ops.append(moving_averages.assign_moving_average(
                                                        moving_mean, mean, 0.9))
        self._extra_train_ops.append(moving_averages.assign_moving_average(
                                                        moving_variance, variance, 0.9))
      else:
        # 獲取訓練中積累的batch均值、標準差
        mean = tf.get_variable('moving_mean', 
                               params_shape, tf.float32,
                               initializer=tf.constant_initializer(0.0, tf.float32),
                               trainable=False)
        variance = tf.get_variable('moving_variance', 
                                   params_shape, tf.float32,
                                   initializer=tf.constant_initializer(1.0, tf.float32),
                                   trainable=False)
        # 添加到直方圖總結
        tf.summary.histogram(mean.op.name, mean)
        tf.summary.histogram(variance.op.name, variance)

      # BN層:((x-mean)/var)*gamma+beta
      y = tf.nn.batch_normalization(x, mean, variance, beta, gamma, 0.001)
      y.set_shape(x.get_shape())
      return y


  # 權重衰減,L2正則loss
  def _decay(self):
    costs = []
    # 遍歷全部可訓練變量
    for var in tf.trainable_variables():
      #只計算標有「DW」的變量
      if var.op.name.find(r'DW') > 0:
        costs.append(tf.nn.l2_loss(var))
    # 加和,並乘以衰減因子
    return tf.multiply(self.hps.weight_decay_rate, tf.add_n(costs))

  # 2D卷積
  def _conv(self, name, x, filter_size, in_filters, out_filters, strides):
    with tf.variable_scope(name):
      n = filter_size * filter_size * out_filters
      # 獲取或新建卷積核,正態隨機初始化
      kernel = tf.get_variable(
              'DW', 
              [filter_size, filter_size, in_filters, out_filters],
              tf.float32, 
              initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0/n)))
      # 計算卷積
      return tf.nn.conv2d(x, kernel, strides, padding='SAME')

  # leaky ReLU激活函數,泄漏參數leakiness爲0就是標準ReLU
  def _relu(self, x, leakiness=0.0):
    return tf.where(tf.less(x, 0.0), leakiness * x, x, name='leaky_relu')
  
  # 全鏈接層,網絡最後一層
  def _fully_connected(self, x, out_dim):
    # 輸入轉換成2D tensor,尺寸爲[N,-1]
    x = tf.reshape(x, [self.hps.batch_size, -1])
    # 參數w,平均隨機初始化,[-sqrt(3/dim), sqrt(3/dim)]*factor
    w = tf.get_variable('DW', [x.get_shape()[1], out_dim],
                        initializer=tf.uniform_unit_scaling_initializer(factor=1.0))
    # 參數b,0值初始化
    b = tf.get_variable('biases', [out_dim], initializer=tf.constant_initializer())
    # 計算x*w+b
    return tf.nn.xw_plus_b(x, w, b)

  # 全局均值池化
  def _global_avg_pool(self, x):
    assert x.get_shape().ndims == 4
    # 在第2&3維度上計算均值,尺寸由WxH收縮爲1x1
    return tf.reduce_mean(x, [1, 2])
相關文章
相關標籤/搜索