cifar-10 每張圖片的大小爲 32×32,而 AlexNet 要求圖片的輸入是 224×224(也有說 227×227 的,這是 224×224 的圖片進行大小爲 2 的 zero padding 的結果),因此一種作法是將 cifar-10 數據集的圖片 resize 到 224×224。python
此時遇到的問題是,cifar-10 resize 到 224×224 時,32G 內存都將沒法徹底加載全部數據,在歸一化那一步(即每一個像素點除以 255)就將發生 OOM(out of memory)。git
那麼此時的作法有:
1)將 resize 做爲模型的一部分,如設置一個 layer 來對一個 batch 的圖像進行 resize,這樣 32×32 的 cifar-10 仍然能夠徹底加載到內存中;
2)一種通用的方法,每次只加載一部分數據到內存中,其他數據等到須要的時候再加載到內存。github
注:本文 AlexNet 結構與 PyTorch 中一致。AlexNet in pytorch/visionsession
import tensorflow as tf from tensorflow.keras import layers from tensorflow.python.keras import backend as K K.clear_session() config = tf.ConfigProto() config.gpu_options.allow_growth = True # 不所有佔滿顯存, 按需分配 K.set_session(tf.Session(config=config)) # 超參數 learning_rate = 0.001 epochs = 120 batch_size = 32 cifar10 = tf.keras.datasets.cifar10 (x_train, y_train), (x_test, y_test) = cifar10.load_data() x_train = x_train.astype(np.float32) x_test = x_test.astype(np.float32) x_train = x_train / 255 x_test = x_test / 255 model = tf.keras.models.Sequential([ # Lambda 層,對輸入圖片進行 resize,如下是將圖片擴大了 7 倍 # resize 時,默認使用最近鄰插值,想要用其它插值方式,須要直接修改 K.resize_images 方法的源代碼。 layers.Lambda(lambda img: K.resize_images(img, 7, 7, data_format='channels_last'), input_shape=(32, 32, 3)), layers.ZeroPadding2D(padding=(2, 2)), layers.Conv2D(64, (11, 11), strides=(4, 4), padding='valid', activation='relu', kernel_initializer='he_uniform'), layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2)), layers.Conv2D(192, (5, 5), strides=(1, 1), padding='same', activation='relu', kernel_initializer='he_uniform'), layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2)), layers.Conv2D(384, (3, 3), strides=(1, 1), padding='same', activation='relu', kernel_initializer='he_uniform'), layers.Conv2D(256, (3, 3), strides=(1, 1), padding='same', activation='relu', kernel_initializer='he_uniform'), layers.Conv2D(256, (3, 3), strides=(1, 1), padding='same', activation='relu', kernel_initializer='he_uniform'), layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2)), layers.Flatten(), layers.Dense(4096, activation='relu', kernel_initializer='he_uniform'), layers.Dropout(drop_rate), layers.Dense(4096, activation='relu', kernel_initializer='he_uniform'), layers.Dropout(drop_rate), layers.Dense(num_classes, activation='softmax', kernel_initializer='he_uniform') ]) model.summary() model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, verbose=2, validation_data=(x_val, y_val))
import tensorflow as tf from tensorflow.keras import layers from tensorflow.python.keras import backend as K from tensorflow.keras.utils import Sequence from sklearn.model_selection import StratifiedShuffleSplit import cv2 import os import numpy as np import h5py import time class CIFAR10Sequence(Sequence): def __init__(self, x_set, y_set, batch_size): """ :param x_set: hdf5 :param y_set: hdf5 :param batch_size: int """ self.x, self.y = x_set, y_set self.batch_size = batch_size def __len__(self): return int(np.ceil(len(self.x) / float(self.batch_size))) def __getitem__(self, idx): batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size] batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size] batch_x = batch_x.astype(np.float32) batch_x = batch_x / 255 return batch_x, batch_y def _resized_data(): """ 將 resize 後的 cifar-10 保存到 'data/cifar-10.h5' 圖片大小: [224, 224, 3] :return: None """ cifar10 = tf.keras.datasets.cifar10 (x_train, y_train), (x_test, y_test) = cifar10.load_data() start_time = time.clock() x_train = np.array([cv2.resize(img, (224, 224), interpolation=cv2.INTER_CUBIC) for img in x_train]) x_test = np.array([cv2.resize(img, (224, 224), interpolation=cv2.INTER_CUBIC) for img in x_test]) # initialize x_val = np.array([]) y_val = np.array([]) sss = StratifiedShuffleSplit(n_splits=1, test_size=0.1, random_state=32) for train_index, val_index in sss.split(x_train, y_train): print("TRAIN:", train_index, "VAL:", val_index) x_train, x_val = x_train[train_index], x_train[val_index] y_train, y_val = y_train[train_index], y_train[val_index] end_time = time.clock() print('Time consuming of resizing: ', (end_time - start_time)) # 寫文件 filename = 'data/cifar-10.h5' h5f = h5py.File(filename, 'w') h5f.create_dataset('x_train', data=x_train) h5f.create_dataset('y_train', data=y_train) h5f.create_dataset('x_val', data=x_val) h5f.create_dataset('y_val', data=y_val) h5f.create_dataset('x_test', data=x_test) h5f.create_dataset('y_test', data=y_test) h5f.close() def load_resized_data(filename='data/cifar-10.h5'): if not os.path.exists(filename): _resized_data() # 不要關閉 h5 文件,不然將沒法讀取數據,這一步並不會直接將數據加載到內存中 # h5 文件支持切片讀取,並且也很快 h5f = h5py.File(filename, 'r') x_train = h5f['x_train'] y_train = h5f['y_train'] x_val = h5f['x_val'] y_val = h5f['y_val'] x_test = h5f['x_test'] y_test = h5f['y_test'] return (x_train, y_train), (x_val, y_val), (x_test, y_test) K.clear_session() config = tf.ConfigProto() config.gpu_options.allow_growth = True # 不所有佔滿顯存, 按需分配 K.set_session(tf.Session(config=config)) # 超參數 learning_rate = 0.001 epochs = 120 batch_size = 32 (x_train, y_train), (x_val, y_val), (x_test, y_test) = load_resized_data() x_val = x_val.astype(np.float32) x_test = x_test.astype(np.float32) x_val = x_val / 255 x_test = x_test / 255 model = tf.keras.models.Sequential([ layers.ZeroPadding2D(padding=(2, 2), input_shape=(224, 224, 3)), layers.Conv2D(64, (11, 11), strides=(4, 4), padding='valid', activation='relu', kernel_initializer='he_uniform'), layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2)), layers.Conv2D(192, (5, 5), strides=(1, 1), padding='same', activation='relu', kernel_initializer='he_uniform'), layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2)), layers.Conv2D(384, (3, 3), strides=(1, 1), padding='same', activation='relu', kernel_initializer='he_uniform'), layers.Conv2D(256, (3, 3), strides=(1, 1), padding='same', activation='relu', kernel_initializer='he_uniform'), layers.Conv2D(256, (3, 3), strides=(1, 1), padding='same', activation='relu', kernel_initializer='he_uniform'), layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2)), layers.Flatten(), layers.Dense(4096, activation='relu', kernel_initializer='he_uniform'), layers.Dropout(drop_rate), layers.Dense(4096, activation='relu', kernel_initializer='he_uniform'), layers.Dropout(drop_rate), layers.Dense(num_classes, activation='softmax', kernel_initializer='he_uniform') ]) model.summary() model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) # shuffle 默認爲 True, 意味着在訓練一個 epoch 以後,CIFAR10Sequence 的 idx 會隨機選擇,而不是順序選擇,這樣在 batch-level 進行了隨機,一個 batch 內的樣本順序是固定的 model.fit_generator(CIFAR10Sequence(x_train, y_train, batch_size=batch_size), # steps_per_epoch=int(np.ceil(len(x_train)/batch_size)), epochs=epochs, verbose=2, callbacks=None, validation_data=(x_val[:], y_val[:]))
class CIFAR10Sequence(Sequence) -- github
keras.utils.Sequence()
AlexNet in pytorch/visiondom