文章目錄

引入

AlexNet $\color{red}^{[1]}$ 與Lenet的設計理念很類似，但也有顯著的區別 $\color{red}^{[2]}$ ：
1）包含 $\mathbf{8}$ 層變換，其中有 $5$ 層卷積和 $2$ 層全鏈接隱藏層，以及一個全鏈接輸出層：
1.1）第一層卷積窗口形狀爲 $\mathbf{11 \times 11}$ ，能夠適用於尺寸更大的圖像；
1.2）第二層卷積窗口形狀爲 $\mathbf{5 \times 5}$ ，其他爲 $\times 3$ ；
1.3）第1、2、五個卷積層以後都使用了窗口形狀爲 $\mathbf{3 \times 3}$ 、步幅爲 $2$ 的最大池化層；
1.4）使用更多的卷積通道數；
1.5）最後一個卷積層是兩個輸出個數爲4096的全鏈接層。
2）使用更簡單的ReLU激活函數：
2.1）ReLU函數計算簡單，例如無需求冪運算；
2.2）ReLU函數在不一樣的參數初始化方法下，模型都更容易訓練。
3）經過丟棄法來控制全鏈接層的模型複雜度。
4）引入了大量的圖像增廣，如翻轉、裁剪和顏色變化等，從而進一步擴大數據集，並緩解過擬合。
html

1 模型構建

""" @author: Inki @contact: inki.yinji@qq.com @version: Created in 2020 1218, last modified in 2020 1218. """

import time
import torch
import torchvision
from torch import nn, optim
from util.SimpleTool import load_data_fashion_mnist


class AlexNet(nn.Module):

    def __init__(self):
        super(AlexNet, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=96, kernel_size=11, stride=4),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(3, 2),
            nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(3, 2)
        )
        self.fc = nn.Sequential(
            nn.Linear(256 * 5 * 5, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 10),
        )
	
	    def forward(self, img):
        """ The forward function. """
        feature = self.conv(img)
        output = self.fc(feature.view(img.shape[0], -1))
        return output


if __name__ == '__main__':
    temp_net = AlexNet()
    print(temp_net)

輸出以下：python

AlexNet(
  (conv): Sequential(
    (0): Conv2d(1, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=6400, out_features=4096, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.5, inplace=False)
    (6): Linear(in_features=4096, out_features=10, bias=True)
  )
)

2 讀取數據

Alex中使用的ImageNet數據集，可是該訓練數據集訓練時間較長，這裏仍然使用Fashion-MNIST數據集來測試。
讀取數據時，額外將圖像擴大到AlexNet中的 $224 \times 224$ ：web

if __name__ == '__main__':
    temp_batch_size = 128
    temp_resize = 224
    temp_tr_iter, temp_te_iter = load_data_fashion_mnist(temp_batch_size, resize=temp_resize)

3 模型訓練

訓練函數與Lenet一致：app

def train(net, tr_iter, te_iter, batch_size, optimizer,
          loss=nn.CrossEntropyLoss(),
          device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
          num_epochs=100):
    """ The train function. """
    net = net.to(device)
    temp_batch_count = 0
    print("Training on", device)
    for epoch in range(num_epochs):
        temp_tr_loss_sum, temp_tr_acc_sum, temp_num, temp_start_time = 0., 0., 0, time.time()
        for x, y in tr_iter:
            x = x.to(device)
            y = y.to(device)
            temp_y_pred = net(x)
            temp_loss = loss(temp_y_pred, y)
            optimizer.zero_grad()
            temp_loss.backward()
            optimizer.step()
            temp_tr_loss_sum += temp_loss.cpu().item()
            temp_tr_acc_sum += (temp_y_pred.argmax(dim=1) == y).sum().cpu().item()
            temp_num += y.shape[0]
            temp_batch_count += 1
        test_acc = evaluate_accuracy(te_iter, net)
        print("Epoch %d, loss %.4f, training acc %.3f, test ass %.3f, time %.1f s" %
              (epoch + 1, temp_tr_loss_sum / temp_batch_count, temp_tr_acc_sum / temp_num, test_acc,
               time.time() - temp_start_time))


def evaluate_accuracy(data_iter, net, device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')):
    """ The evaluate function, and the performance measure is accuracy. """
    ret_acc, temp_num = 0., 0
    with torch.no_grad():
        for x, y in data_iter:
            net.eval() # The evaluate mode, and the dropout is closed.
            ret_acc += (net(x.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
            net.train()
            temp_num += y.shape[0]

    return ret_acc / temp_num

內存用了窩8個G，CPU拉滿，難頂。。ide

Training on cpu
Epoch 1, loss 0.6334, training acc 0.756, test ass 0.846, time 1856.7 s

完整代碼

""" @author: Inki @contact: inki.yinji@qq.com @version: Created in 2020 1218, last modified in 2020 1218. """

import time
import torch
import torchvision
from torch import nn, optim
from util.SimpleTool import load_data_fashion_mnist


class AlexNet(nn.Module):

    def __init__(self):
        super(AlexNet, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=96, kernel_size=11, stride=4),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(3, 2),
            nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(3, 2)
        )
        self.fc = nn.Sequential(
            nn.Linear(256 * 5 * 5, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 10),
        )

    def forward(self, img):
        """ The forward function. """
        feature = self.conv(img)
        output = self.fc(feature.view(img.shape[0], -1))
        return output


def train(net, tr_iter, te_iter, batch_size, optimizer,
          loss=nn.CrossEntropyLoss(),
          device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
          num_epochs=100):
    """ The train function. """
    net = net.to(device)
    temp_batch_count = 0
    print("Training on", device)
    for epoch in range(num_epochs):
        temp_tr_loss_sum, temp_tr_acc_sum, temp_num, temp_start_time = 0., 0., 0, time.time()
        for x, y in tr_iter:
            x = x.to(device)
            y = y.to(device)
            temp_y_pred = net(x)
            temp_loss = loss(temp_y_pred, y)
            optimizer.zero_grad()
            temp_loss.backward()
            optimizer.step()
            temp_tr_loss_sum += temp_loss.cpu().item()
            temp_tr_acc_sum += (temp_y_pred.argmax(dim=1) == y).sum().cpu().item()
            temp_num += y.shape[0]
            temp_batch_count += 1
        test_acc = evaluate_accuracy(te_iter, net)
        print("Epoch %d, loss %.4f, training acc %.3f, test ass %.3f, time %.1f s" %
              (epoch + 1, temp_tr_loss_sum / temp_batch_count, temp_tr_acc_sum / temp_num, test_acc,
               time.time() - temp_start_time))


def evaluate_accuracy(data_iter, net, device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')):
    """ The evaluate function, and the performance measure is accuracy. """
    ret_acc, temp_num = 0., 0
    with torch.no_grad():
        for x, y in data_iter:
            net.eval() # The evaluate mode, and the dropout is closed.
            ret_acc += (net(x.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
            net.train()
            temp_num += y.shape[0]

    return ret_acc / temp_num


if __name__ == '__main__':
    temp_batch_size = 128
    temp_resize = 224
    temp_lr = 0.001
    temp_num_epochs = 5
    temp_tr_iter, temp_te_iter = load_data_fashion_mnist(temp_batch_size, resize=temp_resize)
    temp_net = AlexNet()
    temp_optimizer = optim.Adam(temp_net.parameters(), lr=temp_lr)
    train(temp_net, temp_tr_iter, temp_te_iter, temp_batch_size, temp_optimizer, num_epochs=temp_num_epochs)

支持代碼

uitl.SimpleTool

def load_data_fashion_mnist(batch_size=10, root='D:/Data/Datasets/FashionMNIST', resize=None):
    """ Download the fashion mnist dataset and then load into memory. """
    trans = []
    if resize:
        trans.append(transforms.Resize(size=resize))
    trans.append(transforms.ToTensor())

    transform = transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)
    mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)
    if sys.platform.startswith('win'):
        num_workers = 0
    else:
        num_workers = cpu_count()
    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    return train_iter, test_iter

注：
[1] Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). Imagenet classification with deep convolutional neural networks. In Advances in neural information processing systems (pp. 1097-1105).
[2] 李沐、Aston Zhang等老師的這本《動手學深度學習》一書。svg

本文同步分享在博客「因吉」（CSDN）。
若有侵權，請聯繫 support@oschina.cn 刪除。
本文參與「OSC源創計劃」，歡迎正在閱讀的你也加入，一塊兒分享。函數

深度學習 (二十一)：卷積神經網絡之AlexNet模型