在以前的Demo中,咱們使用了條件GAN來生成了手寫數字圖像。那麼除了生成數字圖像之外咱們還能用神經網絡來幹些什麼呢?python
在本案例中,咱們用神經網絡來給口袋妖怪的線框圖上色。git
第一步: 導入使用庫
from __future__ import absolute_import, division, print_function, unicode_literals import tensorflow as tf tf.enable_eager_execution() import numpy as np import pandas as pd import os import time import matplotlib.pyplot as plt from IPython.display import clear_output
口袋妖怪上色的模型訓練過程當中,須要比較大的顯存。爲了保證咱們的模型能在2070上順利的運行,咱們限制了顯存的使用量爲90%, 來避免顯存不足的引發的錯誤。網絡
config = tf.compat.v1.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.9 session = tf.compat.v1.Session(config=config)
定義須要使用到的常量。session
BUFFER_SIZE = 400 BATCH_SIZE = 1 IMG_WIDTH = 256 IMG_HEIGHT = 256 PATH = 'dataset/' OUTPUT_CHANNELS = 3 LAMBDA = 100 EPOCHS = 10
第二步: 定義須要使用的函數
圖片數據加載函數,主要的做用是使用Tensorflow的io接口讀入圖片,而且放入tensor的對象中,方便後續使用app
def load(image_file): image = tf.io.read_file(image_file) image = tf.image.decode_jpeg(image) w = tf.shape(image)[1] w = w // 2 input_image = image[:, :w, :] real_image = image[:, w:, :] input_image = tf.cast(input_image, tf.float32) real_image = tf.cast(real_image, tf.float32) return input_image, real_image
tensor對象轉成numpy對象的函數框架
在訓練過程當中,我會可視化一些訓練的結果以及中間狀態的圖片。Tensorflow的tensor對象沒法直接在matplot中直接使用,所以咱們須要一個函數,將tensor轉成numpy對象。dom
def tensor_to_array(tensor1): return tensor1.numpy()
第三步: 數據可視化
咱們先來看下咱們的訓練數據長成什麼樣。 咱們每張數據圖片分紅了兩個部分,左邊部分是線框圖,咱們用來做爲輸入數據,右邊部分是上色圖,咱們用來做爲訓練的目標圖片。 咱們使用上面定義的load函數來加載一張圖片看下ide
input, real = load(PATH+'train/114.jpg') plt.figure() plt.imshow(tensor_to_array(input)/255.0) plt.figure() plt.imshow(tensor_to_array(real)/255.0)
第四步: 數據加強
因爲咱們的訓練數據不夠多,咱們使用數據加強來增長咱們的樣本。從而讓小樣本的數據也能達到更好的效果。函數
咱們採起以下的數據加強方案:測試
- 圖片縮放, 將輸入數據的圖片縮放到咱們指定的圖片的大小
- 隨機裁剪
- 數據歸一化
- 左右翻轉
def resize(input_image, real_image, height, width): input_image = tf.image.resize(input_image, [height, width], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) real_image = tf.image.resize(real_image, [height, width], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) return input_image, real_image
def random_crop(input_image, real_image): stacked_image = tf.stack([input_image, real_image], axis=0) cropped_image = tf.image.random_crop(stacked_image, size=[2, IMG_HEIGHT, IMG_WIDTH, 3]) return cropped_image[0], cropped_image[1]
def random_crop(input_image, real_image): stacked_image = tf.stack([input_image, real_image], axis=0) cropped_image = tf.image.random_crop(stacked_image, size=[2, IMG_HEIGHT, IMG_WIDTH, 3]) return cropped_image[0], cropped_image[1]
咱們將上述的加強方案作成一個函數,其中左右翻轉是隨機進行
@tf.function() def random_jitter(input_image, real_image): input_image, real_image = resize(input_image, real_image, 286, 286) input_image, real_image = random_crop(input_image, real_image) if tf.random.uniform(()) > 0.5: input_image = tf.image.flip_left_right(input_image) real_image = tf.image.flip_left_right(real_image) return input_image, real_image
數據加強的效果
plt.figure(figsize=(6, 6)) for i in range(4): input_image, real_image = random_jitter(input, real) plt.subplot(2, 2, i+1) plt.imshow(tensor_to_array(input_image)/255.0) plt.axis('off') plt.show()
第五步: 訓練數據的準備
定義訓練數據跟測試數據的加載函數
def load_image_train(image_file): input_image, real_image = load(image_file) input_image, real_image = random_jitter(input_image, real_image) input_image, real_image = normalize(input_image, real_image) return input_image, real_image
def load_image_test(image_file): input_image, real_image = load(image_file) input_image, real_image = resize(input_image, real_image, IMG_HEIGHT, IMG_WIDTH) input_image, real_image = normalize(input_image, real_image) return input_image, real_image
使用tensorflow的DataSet來加載訓練和測試數據, 定義咱們的訓練數據跟測試數據集對象
train_dataset = tf.data.Dataset.list_files(PATH+'train/*.jpg') train_dataset = train_dataset.map(load_image_train, num_parallel_calls=tf.data.experimental.AUTOTUNE) train_dataset = train_dataset.cache().shuffle(BUFFER_SIZE) train_dataset = train_dataset.batch(1)
test_dataset = tf.data.Dataset.list_files(PATH+'test/*.jpg') test_dataset = test_dataset.map(load_image_test) test_dataset = test_dataset.batch(1)
第六步: 定義模型
口袋妖怪的上色,咱們使用的是GAN模型來訓練, 相比上個條件GAN生成手寫數字圖片,此次的GAN模型的複雜讀更加的高。 咱們先來看下生成網絡跟判別網絡的總體結構
生成網絡
生成網絡使用了U-Net的基本框架,編碼階段的每個Block咱們使用, 卷積層->BN層->LeakyReLU的方式。解碼階段的每個Block咱們使用, 反捲積->BN層->Dropout或者ReLU。其中前三個Block咱們使用Dropout, 後面的咱們使用ReLU。每個編碼層的Block輸出還鏈接了與之對應的解碼層的Block. 具體能夠參考U-Net的skip connection.
定義編碼Block
def downsample(filters, size, apply_batchnorm=True): initializer = tf.random_normal_initializer(0., 0.02) result = tf.keras.Sequential() result.add(tf.keras.layers.Conv2D(filters, size, strides=2, padding='same', kernel_initializer=initializer, use_bias=False)) if apply_batchnorm: result.add(tf.keras.layers.BatchNormalization()) result.add(tf.keras.layers.LeakyReLU()) return result down_model = downsample(3, 4)
定義解碼Block
def upsample(filters, size, apply_dropout=False): initializer = tf.random_normal_initializer(0., 0.02) result = tf.keras.Sequential() result.add(tf.keras.layers.Conv2DTranspose(filters, size, strides=2, padding='same', kernel_initializer=initializer, use_bias=False)) result.add(tf.keras.layers.BatchNormalization()) if apply_dropout: result.add(tf.keras.layers.Dropout(0.5)) result.add(tf.keras.layers.ReLU()) return result up_model = upsample(3, 4)
定義生成網絡模型
def Generator(): down_stack = [ downsample(64, 4, apply_batchnorm=False), # (bs, 128, 128, 64) downsample(128, 4), # (bs, 64, 64, 128) downsample(256, 4), # (bs, 32, 32, 256) downsample(512, 4), # (bs, 16, 16, 512) downsample(512, 4), # (bs, 8, 8, 512) downsample(512, 4), # (bs, 4, 4, 512) downsample(512, 4), # (bs, 2, 2, 512) downsample(512, 4), # (bs, 1, 1, 512) ] up_stack = [ upsample(512, 4, apply_dropout=True), # (bs, 2, 2, 1024) upsample(512, 4, apply_dropout=True), # (bs, 4, 4, 1024) upsample(512, 4, apply_dropout=True), # (bs, 8, 8, 1024) upsample(512, 4), # (bs, 16, 16, 1024) upsample(256, 4), # (bs, 32, 32, 512) upsample(128, 4), # (bs, 64, 64, 256) upsample(64, 4), # (bs, 128, 128, 128) ] initializer = tf.random_normal_initializer(0., 0.02) last = tf.keras.layers.Conv2DTranspose(OUTPUT_CHANNELS, 4, strides=2, padding='same', kernel_initializer=initializer, activation='tanh') # (bs, 256, 256, 3) concat = tf.keras.layers.Concatenate() inputs = tf.keras.layers.Input(shape=[None,None,3]) x = inputs skips = [] for down in down_stack: x = down(x) skips.append(x) skips = reversed(skips[:-1]) for up, skip in zip(up_stack, skips): x = up(x) x = concat([x, skip]) x = last(x) return tf.keras.Model(inputs=inputs, outputs=x) generator = Generator()
判別網絡
判別網絡咱們使用PatchGAN, PatchGAN又稱之爲馬爾可夫判別器。傳統的基於CNN的分類模型有不少都是在最後引入了一個全鏈接層,而後將判別的結果輸出。然而PatchGAN卻不同,它徹底由卷積層構成,最後輸出的是一個緯度爲N的方陣。而後計算矩陣的均值做真或者假的輸出。從直觀上看,輸出方陣的每個輸出,是模型對原圖中的一個感覺野,這個感覺野對應了原圖中的一塊地方,也稱之爲Patch,所以,把這種結構的GAN稱之爲PatchGAN。
PatchGAN中的每個Block是由卷積層->BN層->Leaky ReLU組成的。
在咱們的這個模型中,最後一層咱們的輸出的緯度是(Batch Size, 30, 30, 1), 其中1表示圖片的通道。
每一個30x30的輸出對應着原圖的70x70的區域。詳細的結構能夠參考這篇論文。
def Discriminator(): initializer = tf.random_normal_initializer(0., 0.02) inp = tf.keras.layers.Input(shape=[None, None, 3], name='input_image') tar = tf.keras.layers.Input(shape=[None, None, 3], name='target_image') # (batch size, 256, 256, channels*2) x = tf.keras.layers.concatenate([inp, tar]) # (batch size, 128, 128, 64) down1 = downsample(64, 4, False)(x) # (batch size, 64, 64, 128) down2 = downsample(128, 4)(down1) # (batch size, 32, 32, 256) down3 = downsample(256, 4)(down2) # (batch size, 34, 34, 256) zero_pad1 = tf.keras.layers.ZeroPadding2D()(down3) # (batch size, 31, 31, 512) conv = tf.keras.layers.Conv2D(512, 4, strides=1, kernel_initializer=initializer, use_bias=False)(zero_pad1) batchnorm1 = tf.keras.layers.BatchNormalization()(conv) leaky_relu = tf.keras.layers.LeakyReLU()(batchnorm1) # (batch size, 33, 33, 512) zero_pad2 = tf.keras.layers.ZeroPadding2D()(leaky_relu) # (batch size, 30, 30, 1) last = tf.keras.layers.Conv2D(1, 4, strides=1, kernel_initializer=initializer)(zero_pad2) return tf.keras.Model(inputs=[inp, tar], outputs=last) discriminator = Discriminator()
第七步: 定義損失函數和優化器
** **
loss_object = tf.keras.losses.BinaryCrossentropy(from_logits=True)
**
def discriminator_loss(disc_real_output, disc_generated_output): real_loss = loss_object(tf.ones_like(disc_real_output), disc_real_output) generated_loss = loss_object(tf.zeros_like(disc_generated_output), disc_generated_output) total_disc_loss = real_loss + generated_loss return total_disc_loss
def generator_loss(disc_generated_output, gen_output, target): gan_loss = loss_object(tf.ones_like(disc_generated_output), disc_generated_output) l1_loss = tf.reduce_mean(tf.abs(target - gen_output)) total_gen_loss = gan_loss + (LAMBDA * l1_loss) return total_gen_loss
generator_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5) discriminator_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5)
第八步: 定義CheckPoint函數
因爲咱們的訓練時間較長,所以咱們會保存中間的訓練狀態,方便後續加載繼續訓練
checkpoint = tf.train.Checkpoint(generator_optimizer=generator_optimizer, discriminator_optimizer=discriminator_optimizer, generator=generator, discriminator=discriminator)
若是咱們保存了以前的訓練的結果,咱們加載保存的數據。而後咱們應用上次保存的模型來輸出下咱們的測試數據。
def generate_images(model, test_input, tar): prediction = model(test_input, training=True) plt.figure(figsize=(15,15)) display_list = [test_input[0], tar[0], prediction[0]] title = ['Input', 'Target', 'Predicted'] for i in range(3): plt.subplot(1, 3, i+1) plt.title(title[i]) plt.imshow(tensor_to_array(display_list[i]) * 0.5 + 0.5) plt.axis('off') plt.show()
ckpt_manager = tf.train.CheckpointManager(checkpoint, "./", max_to_keep=2) if ckpt_manager.latest_checkpoint: checkpoint.restore(ckpt_manager.latest_checkpoint) for inp, tar in test_dataset.take(20): generate_images(generator, inp, tar)
第九步: 訓練
在訓練中,咱們輸出第一張圖片來查看每一個epoch給咱們的預測結果帶來的變化。讓你們感覺到其中的樂趣 每20個epoch咱們保存一次狀態
@tf.function def train_step(input_image, target): with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape: gen_output = generator(input_image, training=True) disc_real_output = discriminator([input_image, target], training=True) disc_generated_output = discriminator([input_image, gen_output], training=True) gen_loss = generator_loss(disc_generated_output, gen_output, target) disc_loss = discriminator_loss(disc_real_output, disc_generated_output) generator_gradients = gen_tape.gradient(gen_loss, generator.trainable_variables) discriminator_gradients = disc_tape.gradient(disc_loss, discriminator.trainable_variables) generator_optimizer.apply_gradients(zip(generator_gradients, generator.trainable_variables)) discriminator_optimizer.apply_gradients(zip(discriminator_gradients, discriminator.trainable_variables))
def fit(train_ds, epochs, test_ds): for epoch in range(epochs): start = time.time() for input_image, target in train_ds: train_step(input_image, target) clear_output(wait=True) for example_input, example_target in test_ds.take(1): generate_images(generator, example_input, example_target) if (epoch + 1) % 20 == 0: ckpt_save_path = ckpt_manager.save() print ('保存第{}個epoch到{}\n'.format(epoch+1, ckpt_save_path)) print ('訓練第{}個epoch所用的時間爲{:.2f}秒\n'.format(epoch + 1, time.time()-start))
fit(train_dataset, EPOCHS, test_dataset)
訓練第8個epoch所用的時間爲51.33秒。
第十步: 使用測試數據上色,查看下咱們的效果
for input, target in test_dataset.take(20): generate_images(generator, input, target)
矩池雲如今已經上架 「口袋妖怪上色」 鏡像;矩池雲是致力於打造全球領先的開放式人工智能算力平臺。感興趣的小夥伴能夠經過矩池雲官網「Jupyter 教程 Demo」 鏡像中嘗試使用。