Tensorflow在CIFAR-10構建CNN

時間 2020-07-08

標籤 tensorflow cifar 構建 cnn 简体版

原文原文鏈接

使用Tensorflow在CIFAR-10二進制數據集上構建CNN

以爲有用的話,歡迎一塊兒討論相互學習~Follow Me

參考文獻 Tensorflow機器學習實戰指南利用Tensorflow讀取二進制CIFAR-10數據集 Tensorflow官方文檔 tf.transpose函數解析 tf.slice函數解析 CIFAR10/CIFAR100數據集介紹 tf.train.shuffle_batch函數解析 Python urllib urlretrieve函數解析 Tensorflow實現學習率衰減交叉熵損失函數 tf.nn.local_response_normalization 局部響應歸一化python

源代碼請點擊下方連接歡迎加星

使用Tensorflow在CIFAR-10二進制數據集上構建CNNgit

少說廢話多寫代碼

下載CIFAR-10數據集

# More Advanced CNN Model: CIFAR-10
# ---------------------------------------
#
# In this example, we will download the CIFAR-10 images
# and build a CNN model with dropout and regularization
# 在這個例子中，咱們會下載CIFAR-10圖像數據集而且利用dropout和標準化建立一個CNN模型
#
# CIFAR is composed ot 50k train and 10k test
# CIFAR數據集包含5W訓練圖片,和1W測試圖片。圖片是32*32個像素點組成的。
# images that are 32x32.

import os
import sys
import tarfile
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from six.moves import urllib
from tensorflow.python.framework import ops

ops.reset_default_graph()

# 更改工做目錄
abspath = os.path.abspath(__file__)  # 獲取當前文件絕對地址
# E:\GitHub\TF_Cookbook\08_Convolutional_Neural_Networks\03_CNN_CIFAR10\ostest.py
dname = os.path.dirname(abspath)  # 獲取文件所在文件夾地址
# E:\GitHub\TF_Cookbook\08_Convolutional_Neural_Networks\03_CNN_CIFAR10
os.chdir(dname)  # 轉換目錄文件夾到上層

# Start a graph session
# 初始化Session
sess = tf.Session()

# 設置模型超參數
batch_size = 128  # 批處理數量
data_dir = 'temp'  # 數據目錄
output_every = 50  # 輸出訓練loss值
generations = 20000  # 迭代次數
eval_every = 500  # 輸出測試loss值
image_height = 32  # 圖片高度
image_width = 32  # 圖片寬度
crop_height = 24  # 裁剪後圖片高度
crop_width = 24  # 裁剪後圖片寬度
num_channels = 3  # 圖片通道數
num_targets = 10  # 標籤數
extract_folder = 'cifar-10-batches-bin'

# 指數學習速率衰減參數
learning_rate = 0.1  # 學習率
lr_decay = 0.1  # 學習率衰減速度
num_gens_to_wait = 250.  # 學習率更新週期

# 提取模型參數
image_vec_length = image_height*image_width*num_channels  # 將圖片轉化成向量所需大小
record_length = 1 + image_vec_length  # ( + 1 for the 0-9 label)

# 讀取數據
data_dir = 'temp'
if not os.path.exists(data_dir):  # 當前目錄下是否存在temp文件夾
    os.makedirs(data_dir)  # 若是當前文件目錄下不存在這個文件夾，建立一個temp文件夾
#  設定CIFAR10下載路徑
cifar10_url = 'http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz'

# 檢查這個文件是否存在，若是不存在下載這個文件
data_file = os.path.join(data_dir, 'cifar-10-binary.tar.gz')
# temp\cifar-10-binary.tar.gz
if os.path.isfile(data_file):
    pass
else:
    # 回調函數，當鏈接上服務器、以及相應的數據塊傳輸完畢時會觸發該回調，咱們能夠利用這個回調函數來顯示當前的下載進度。
    # block_num已經下載的數據塊數目，block_size數據塊大小，total_size下載文件總大小

    def progress(block_num, block_size, total_size):
        progress_info = [cifar10_url, float(block_num*block_size)/float(total_size)*100.0]
        print('\r Downloading {} - {:.2f}%'.format(*progress_info), end="")


    # urlretrieve(url, filename=None, reporthook=None, data=None)
    # 參數 finename 指定了保存本地路徑（若是參數未指定，urllib會生成一個臨時文件保存數據。）
    # 參數 reporthook 是一個回調函數，當鏈接上服務器、以及相應的數據塊傳輸完畢時會觸發該回調，咱們能夠利用這個回調函數來顯示當前的下載進度。
    # 參數 data指 post 到服務器的數據，該方法返回一個包含兩個元素的(filename, headers)元組，filename 表示保存到本地的路徑，header 表示服務器的響應頭。
    # 此處 url=cifar10_url,filename=data_file,reporthook=progress

    filepath, _ = urllib.request.urlretrieve(cifar10_url, data_file, progress)
    # 解壓文件
    tarfile.open(filepath, 'r:gz').extractall(data_dir)

讀取CIFAR-10數據集

# Define CIFAR reader
# 定義CIFAR讀取器
def read_cifar_files(filename_queue, distort_images=True):
    reader = tf.FixedLengthRecordReader(record_bytes=record_length)
    # 返回固定長度的文件記錄 record_length函數參數爲一條圖片信息即1+32*32*3
    key, record_string = reader.read(filename_queue)
    # 此處調用tf.FixedLengthRecordReader.read函數返回鍵值對
    record_bytes = tf.decode_raw(record_string, tf.uint8)
    # 讀出來的原始文件是string類型，此處咱們須要用decode_raw函數將String類型轉換成uint8類型
    image_label = tf.cast(tf.slice(record_bytes, [0], [1]), tf.int32)
    # 見slice函數用法，取從0號索引開始的第一個元素。並將其轉化爲int32型數據。其中存儲的是圖片的標籤

    # Extract image
    # 截取圖像
    image_extracted = tf.reshape(tf.slice(record_bytes, [1], [image_vec_length]),
                                 [num_channels, image_height, image_width])
    # 從1號索引開始提取圖片信息。這和此數據集存儲圖片信息的格式相關。
    # CIFAR-10數據集中
    """第一個字節是第一個圖像的標籤，它是一個0-9範圍內的數字。接下來的3072個字節是圖像像素的值。
       前1024個字節是紅色通道值，下1024個綠色，最後1024個藍色。值以行優先順序存儲，所以前32個字節是圖像第一行的紅色通道值。
       每一個文件都包含10000個這樣的3073字節的「行」圖像，但沒有任何分隔行的限制。所以每一個文件應該徹底是30730000字節長。"""

    # Reshape image
    image_uint8image = tf.transpose(image_extracted, [1, 2, 0])
    # 詳見tf.transpose函數，將[channel,image_height,image_width]轉化爲[image_height,image_width,channel]的數據格式。
    reshaped_image = tf.cast(image_uint8image, tf.float32)
    # 將圖片剪裁或填充至合適大小
    final_image = tf.image.resize_image_with_crop_or_pad(reshaped_image, crop_width, crop_height)

    if distort_images:
        # 將圖像水平隨機翻轉，改變亮度和對比度。
        final_image = tf.image.random_flip_left_right(final_image)
        final_image = tf.image.random_brightness(final_image, max_delta=63)
        final_image = tf.image.random_contrast(final_image, lower=0.2, upper=1.8)

        # 對圖片作標準化處理
        """Linearly scales `image` to have zero mean and unit norm.
        This op computes `(x - mean) / adjusted_stddev`, where `mean` is the average
        of all values in image, and `adjusted_stddev = max(stddev, 1.0/sqrt(image.NumElements()))`.
        `stddev` is the standard deviation of all values in `image`.
        It is capped away from zero to protect against division by 0 when handling uniform images."""
    final_image = tf.image.per_image_standardization(final_image)
    return (final_image, image_label)


# Create a CIFAR image pipeline from reader
# 從閱讀器中構造CIFAR圖片管道
def input_pipeline(batch_size, train_logical=True):
    # train_logical標誌用於區分讀取訓練和測試數據集
    if train_logical:
        files = [os.path.join(data_dir, extract_folder, 'data_batch_{}.bin'.format(i)) for i in range(1, 6)]
    #  data_dir=tmp
    # extract_folder=cifar-10-batches-bin
    else:
        files = [os.path.join(data_dir, extract_folder, 'test_batch.bin')]
    filename_queue = tf.train.string_input_producer(files)
    image, label = read_cifar_files(filename_queue)

    # min_after_dequeue defines how big a buffer we will randomly sample
    # from -- bigger means better shuffling but slower start up and more
    # memory used.
    # capacity must be larger than min_after_dequeue and the amount larger
    # determines the maximum we will prefetch.  Recommendation:
    # min_after_dequeue + (num_threads + a small safety margin) * batch_size
    min_after_dequeue = 5000
    capacity = min_after_dequeue + 3*batch_size
    example_batch, label_batch = tf.train.shuffle_batch([image, label],
                                                        batch_size=batch_size,
                                                        capacity=capacity,
                                                        min_after_dequeue=min_after_dequeue)

    return (example_batch, label_batch)

定義CNN框架

# Define the model architecture, this will return logits from images
# 定義模型架構，返回圖片的元素
def cifar_cnn_model(input_images, batch_size, train_logical=True):
    # 截斷高斯函數初始化化
    def truncated_normal_var(name, shape, dtype):
        return (tf.get_variable(name=name, shape=shape, dtype=dtype,
                                initializer=tf.truncated_normal_initializer(stddev=0.05)))

    # 0初始化
    def zero_var(name, shape, dtype):
        return (tf.get_variable(name=name, shape=shape, dtype=dtype, initializer=tf.constant_initializer(0.0)))

    # 第一卷積層
    with tf.variable_scope('conv1') as scope:
        # Conv_kernel is 5x5 for all 3 colors and we will create 64 features
        # 第一層卷積層是5*5在3通道上進行卷積，而且構造64個feature map
        conv1_kernel = truncated_normal_var(name='conv_kernel1', shape=[5, 5, 3, 64], dtype=tf.float32)
        # We convolve across the image with a stride size of 1
        # 咱們使用步長爲1在原有圖像上進行卷積
        conv1 = tf.nn.conv2d(input_images, conv1_kernel, [1, 1, 1, 1], padding='SAME')
        # Initialize and add the bias term
        # 初始化bias,而且加上偏置項
        conv1_bias = zero_var(name='conv_bias1', shape=[64], dtype=tf.float32)
        conv1_add_bias = tf.nn.bias_add(conv1, conv1_bias)
        # ReLU element wise
        # 對結果使用ReLU非線性激活函數
        relu_conv1 = tf.nn.relu(conv1_add_bias)

    # Max Pooling
    # 池化層/下采樣層
    pool1 = tf.nn.max_pool(relu_conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool_layer1')

    # Local Response Normalization (parameters from paper)
    # 局部響應歸一化
    # http://blog.csdn.net/mao_xiao_feng/article/details/53488271
    # paper: http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks
    norm1 = tf.nn.lrn(pool1, depth_radius=5, bias=2.0, alpha=1e-3, beta=0.75, name='norm1')

    # 第二個卷積層
    with tf.variable_scope('conv2') as scope:
        # Conv kernel is 5x5, across all prior 64 features and we create 64 more features
        # 卷積核大小爲5*5，輸入通道數爲64，輸出通道數也爲64
        conv2_kernel = truncated_normal_var(name='conv_kernel2', shape=[5, 5, 64, 64], dtype=tf.float32)
        # Convolve filter across prior output with stride size of 1
        # 卷積步長爲1
        conv2 = tf.nn.conv2d(norm1, conv2_kernel, [1, 1, 1, 1], padding='SAME')
        # Initialize and add the bias
        # 初始化和添加偏置值
        conv2_bias = zero_var(name='conv_bias2', shape=[64], dtype=tf.float32)
        conv2_add_bias = tf.nn.bias_add(conv2, conv2_bias)
        # ReLU element wise
        # 對結果使用ReLU非線性激活函數
        relu_conv2 = tf.nn.relu(conv2_add_bias)

    # Max Pooling
    # 池化層/下采樣層
    pool2 = tf.nn.max_pool(relu_conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool_layer2')

    # Local Response Normalization (parameters from paper)
    # 局部響應歸一化
    norm2 = tf.nn.lrn(pool2, depth_radius=5, bias=2.0, alpha=1e-3, beta=0.75, name='norm2')

    # Reshape output into a single matrix for multiplication for the fully connected layers
    # 光柵化處理，將其打平方便和全鏈接層進行鏈接
    reshaped_output = tf.reshape(norm2, [batch_size, -1])
    reshaped_dim = reshaped_output.get_shape()[1].value

    # First Fully Connected Layer
    # 全鏈接層1
    with tf.variable_scope('full1') as scope:
        # Fully connected layer will have 384 outputs.
        # 第一個全鏈接層有384個輸出
        full_weight1 = truncated_normal_var(name='full_mult1', shape=[reshaped_dim, 384], dtype=tf.float32)
        full_bias1 = zero_var(name='full_bias1', shape=[384], dtype=tf.float32)
        full_layer1 = tf.nn.relu(tf.add(tf.matmul(reshaped_output, full_weight1), full_bias1))

    # Second Fully Connected Layer
    # 全鏈接層2
    with tf.variable_scope('full2') as scope:
        # Second fully connected layer has 192 outputs.
        # 第二個全鏈接層有192個輸出
        full_weight2 = truncated_normal_var(name='full_mult2', shape=[384, 192], dtype=tf.float32)
        full_bias2 = zero_var(name='full_bias2', shape=[192], dtype=tf.float32)
        full_layer2 = tf.nn.relu(tf.add(tf.matmul(full_layer1, full_weight2), full_bias2))

    # Final Fully Connected Layer -> 10 categories for output (num_targets)
    # 最後的全鏈接層只有10個輸出
    with tf.variable_scope('full3') as scope:
        # Final fully connected layer has 10 (num_targets) outputs.
        full_weight3 = truncated_normal_var(name='full_mult3', shape=[192, num_targets], dtype=tf.float32)
        full_bias3 = zero_var(name='full_bias3', shape=[num_targets], dtype=tf.float32)
        final_output = tf.add(tf.matmul(full_layer2, full_weight3), full_bias3)

    return (final_output)


# Loss function損失函數
def cifar_loss(logits, targets):
    # Get rid of extra dimensions and cast targets into integers
    # 去掉多餘的維度而且將標籤所有轉換爲int類型
    targets = tf.squeeze(tf.cast(targets, tf.int32))
    # Calculate cross entropy from logits and targets
    # 計算預測結果和標籤值的交叉熵函數
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=targets)
    # Take the average loss across batch size
    # 計算出整個batch中的平均偏差值
    cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
    return (cross_entropy_mean)


# 訓練階段
def train_step(loss_value, generation_num):
    # Our learning rate is an exponential decay after we wait a fair number of generations
    # 自適應學習率遞減
    model_learning_rate = tf.train.exponential_decay(learning_rate, generation_num,
                                                     num_gens_to_wait, lr_decay, staircase=True)
    # Create optimizer
    # 建立優化器
    my_optimizer = tf.train.GradientDescentOptimizer(model_learning_rate)
    # Initialize train step
    # 初始化訓練迭代器
    train_step = my_optimizer.minimize(loss_value)
    return (train_step)


# Accuracy function
# 精準度函數
def accuracy_of_batch(logits, targets):
    # 去除多餘的維度並確保target爲int類型
    targets = tf.squeeze(tf.cast(targets, tf.int32))
    # Get predicted values by finding which logit is the greatest
    batch_predictions = tf.cast(tf.argmax(logits, 1), tf.int32)
    # Check if they are equal across the batch
    predicted_correctly = tf.equal(batch_predictions, targets)
    # Average the 1's and 0's (True's and False's) across the batch size
    accuracy = tf.reduce_mean(tf.cast(predicted_correctly, tf.float32))
    return (accuracy)

實例化模型

# 初始化數據通道
images, targets = input_pipeline(batch_size, train_logical=True)
# Get batch test images and targets from pipline
test_images, test_targets = input_pipeline(batch_size, train_logical=False)

# Declare Model
# 聲明模型
print('Creating the CIFAR10 Model.')
with tf.variable_scope('model_definition') as scope:
    # Declare the training network model
    model_output = cifar_cnn_model(images, batch_size)
    # 這很是重要，咱們必須設置scope重用變量
    # 不然，當咱們設置測試網絡模型，它會設置新的隨機變量，這會使在測試批次上進行隨機評估，影響評估結果
    scope.reuse_variables()
    test_output = cifar_cnn_model(test_images, batch_size)

# Declare loss function
# 聲明損失函數
print('Declare Loss Function.')
loss = cifar_loss(model_output, targets)

# Create accuracy functio
# 建立精準度函數
accuracy = accuracy_of_batch(test_output, test_targets)

# Create training operations
print('Creating the Training Operation.')
generation_num = tf.Variable(0, trainable=False)
train_op = train_step(loss, generation_num)

# Initialize Variables
print('Initializing the Variables.')
init = tf.global_variables_initializer()
sess.run(init)

# Initialize queue (This queue will feed into the model, so no placeholders necessary)
# 初始化隊列，因此再也不須要使placeholders佔位符提供數據
tf.train.start_queue_runners(sess=sess)

訓練模型

# Train CIFAR Model
# 訓練CIFAR模型
print('Starting Training')
train_loss = []
test_accuracy = []
for i in range(generations):
    _, loss_value = sess.run([train_op, loss])

    if (i + 1)%output_every == 0:
        train_loss.append(loss_value)
        output = 'Generation {}: Loss = {:.5f}'.format((i + 1), loss_value)
        print(output)

    if (i + 1)%eval_every == 0:
        [temp_accuracy] = sess.run([accuracy])
        test_accuracy.append(temp_accuracy)
        acc_output = ' --- Test Accuracy = {:.2f}%.'.format(100.*temp_accuracy)
        print(acc_output)

顯示模型結果

# Print loss and accuracy
# 打印損失函數和精準度函數
# Matlotlib code to plot the loss and accuracies
eval_indices = range(0, generations, eval_every)
output_indices = range(0, generations, output_every)

# Plot loss over time
plt.plot(output_indices, train_loss, 'k-')
plt.title('Softmax Loss per Generation')
plt.xlabel('Generation')
plt.ylabel('Softmax Loss')
plt.show()

# Plot accuracy over time
plt.plot(eval_indices, test_accuracy, 'k-')
plt.title('Test Accuracy')
plt.xlabel('Generation')
plt.ylabel('Accuracy')
plt.show()