引入
AlexNet [ 1 ] \color{red}^{[1]} [1]與Lenet的設計理念很類似,但也有顯著的區別 [ 2 ] \color{red}^{[2]} [2]:
1)包含 8 \mathbf{8} 8層變換,其中有 5 5 5層卷積和 2 2 2層全鏈接隱藏層,以及一個全鏈接輸出層:
1.1)第一層卷積窗口形狀爲 11 × 11 \mathbf{11 \times 11} 11×11,能夠適用於尺寸更大的圖像;
1.2)第二層卷積窗口形狀爲 5 × 5 \mathbf{5 \times 5} 5×5,其他爲 3 × 3 3 \times 3 3×3;
1.3)第1、2、五個卷積層以後都使用了窗口形狀爲 3 × 3 \mathbf{3 \times 3} 3×3、步幅爲 2 2 2的最大池化層;
1.4)使用更多的卷積通道數;
1.5)最後一個卷積層是兩個輸出個數爲4096的全鏈接層。
2)使用更簡單的ReLU激活函數:
2.1)ReLU函數計算簡單,例如無需求冪運算;
2.2)ReLU函數在不一樣的參數初始化方法下,模型都更容易訓練。
3)經過丟棄法來控制全鏈接層的模型複雜度。
4)引入了大量的圖像增廣,如翻轉、裁剪和顏色變化等,從而進一步擴大數據集,並緩解過擬合。
html
1 模型構建
""" @author: Inki @contact: inki.yinji@qq.com @version: Created in 2020 1218, last modified in 2020 1218. """ import time import torch import torchvision from torch import nn, optim from util.SimpleTool import load_data_fashion_mnist class AlexNet(nn.Module): def __init__(self): super(AlexNet, self).__init__() self.conv = nn.Sequential( nn.Conv2d(in_channels=1, out_channels=96, kernel_size=11, stride=4), nn.ReLU(), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2), nn.ReLU(), nn.MaxPool2d(3, 2), nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.MaxPool2d(3, 2) ) self.fc = nn.Sequential( nn.Linear(256 * 5 * 5, 4096), nn.ReLU(), nn.Dropout(0.5), nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(0.5), nn.Linear(4096, 10), ) def forward(self, img): """ The forward function. """ feature = self.conv(img) output = self.fc(feature.view(img.shape[0], -1)) return output if __name__ == '__main__': temp_net = AlexNet() print(temp_net)
輸出以下:python
AlexNet( (conv): Sequential( (0): Conv2d(1, 96, kernel_size=(11, 11), stride=(4, 4)) (1): ReLU() (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False) (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2)) (4): ReLU() (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False) (6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (7): ReLU() (8): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (9): ReLU() (10): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (11): ReLU() (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False) ) (fc): Sequential( (0): Linear(in_features=6400, out_features=4096, bias=True) (1): ReLU() (2): Dropout(p=0.5, inplace=False) (3): Linear(in_features=4096, out_features=4096, bias=True) (4): ReLU() (5): Dropout(p=0.5, inplace=False) (6): Linear(in_features=4096, out_features=10, bias=True) ) )
2 讀取數據
Alex中使用的ImageNet數據集,可是該訓練數據集訓練時間較長,這裏仍然使用Fashion-MNIST數據集來測試。
讀取數據時,額外將圖像擴大到AlexNet中的 224 × 224 224 \times 224 224×224:web
if __name__ == '__main__': temp_batch_size = 128 temp_resize = 224 temp_tr_iter, temp_te_iter = load_data_fashion_mnist(temp_batch_size, resize=temp_resize)
3 模型訓練
訓練函數與Lenet一致:app
def train(net, tr_iter, te_iter, batch_size, optimizer, loss=nn.CrossEntropyLoss(), device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'), num_epochs=100): """ The train function. """ net = net.to(device) temp_batch_count = 0 print("Training on", device) for epoch in range(num_epochs): temp_tr_loss_sum, temp_tr_acc_sum, temp_num, temp_start_time = 0., 0., 0, time.time() for x, y in tr_iter: x = x.to(device) y = y.to(device) temp_y_pred = net(x) temp_loss = loss(temp_y_pred, y) optimizer.zero_grad() temp_loss.backward() optimizer.step() temp_tr_loss_sum += temp_loss.cpu().item() temp_tr_acc_sum += (temp_y_pred.argmax(dim=1) == y).sum().cpu().item() temp_num += y.shape[0] temp_batch_count += 1 test_acc = evaluate_accuracy(te_iter, net) print("Epoch %d, loss %.4f, training acc %.3f, test ass %.3f, time %.1f s" % (epoch + 1, temp_tr_loss_sum / temp_batch_count, temp_tr_acc_sum / temp_num, test_acc, time.time() - temp_start_time)) def evaluate_accuracy(data_iter, net, device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')): """ The evaluate function, and the performance measure is accuracy. """ ret_acc, temp_num = 0., 0 with torch.no_grad(): for x, y in data_iter: net.eval() # The evaluate mode, and the dropout is closed. ret_acc += (net(x.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item() net.train() temp_num += y.shape[0] return ret_acc / temp_num
內存用了窩8個G,CPU拉滿,難頂。。ide
Training on cpu Epoch 1, loss 0.6334, training acc 0.756, test ass 0.846, time 1856.7 s
完整代碼
""" @author: Inki @contact: inki.yinji@qq.com @version: Created in 2020 1218, last modified in 2020 1218. """ import time import torch import torchvision from torch import nn, optim from util.SimpleTool import load_data_fashion_mnist class AlexNet(nn.Module): def __init__(self): super(AlexNet, self).__init__() self.conv = nn.Sequential( nn.Conv2d(in_channels=1, out_channels=96, kernel_size=11, stride=4), nn.ReLU(), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2), nn.ReLU(), nn.MaxPool2d(3, 2), nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.MaxPool2d(3, 2) ) self.fc = nn.Sequential( nn.Linear(256 * 5 * 5, 4096), nn.ReLU(), nn.Dropout(0.5), nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(0.5), nn.Linear(4096, 10), ) def forward(self, img): """ The forward function. """ feature = self.conv(img) output = self.fc(feature.view(img.shape[0], -1)) return output def train(net, tr_iter, te_iter, batch_size, optimizer, loss=nn.CrossEntropyLoss(), device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'), num_epochs=100): """ The train function. """ net = net.to(device) temp_batch_count = 0 print("Training on", device) for epoch in range(num_epochs): temp_tr_loss_sum, temp_tr_acc_sum, temp_num, temp_start_time = 0., 0., 0, time.time() for x, y in tr_iter: x = x.to(device) y = y.to(device) temp_y_pred = net(x) temp_loss = loss(temp_y_pred, y) optimizer.zero_grad() temp_loss.backward() optimizer.step() temp_tr_loss_sum += temp_loss.cpu().item() temp_tr_acc_sum += (temp_y_pred.argmax(dim=1) == y).sum().cpu().item() temp_num += y.shape[0] temp_batch_count += 1 test_acc = evaluate_accuracy(te_iter, net) print("Epoch %d, loss %.4f, training acc %.3f, test ass %.3f, time %.1f s" % (epoch + 1, temp_tr_loss_sum / temp_batch_count, temp_tr_acc_sum / temp_num, test_acc, time.time() - temp_start_time)) def evaluate_accuracy(data_iter, net, device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')): """ The evaluate function, and the performance measure is accuracy. """ ret_acc, temp_num = 0., 0 with torch.no_grad(): for x, y in data_iter: net.eval() # The evaluate mode, and the dropout is closed. ret_acc += (net(x.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item() net.train() temp_num += y.shape[0] return ret_acc / temp_num if __name__ == '__main__': temp_batch_size = 128 temp_resize = 224 temp_lr = 0.001 temp_num_epochs = 5 temp_tr_iter, temp_te_iter = load_data_fashion_mnist(temp_batch_size, resize=temp_resize) temp_net = AlexNet() temp_optimizer = optim.Adam(temp_net.parameters(), lr=temp_lr) train(temp_net, temp_tr_iter, temp_te_iter, temp_batch_size, temp_optimizer, num_epochs=temp_num_epochs)
支持代碼
uitl.SimpleTool
def load_data_fashion_mnist(batch_size=10, root='D:/Data/Datasets/FashionMNIST', resize=None): """ Download the fashion mnist dataset and then load into memory. """ trans = [] if resize: trans.append(transforms.Resize(size=resize)) trans.append(transforms.ToTensor()) transform = transforms.Compose(trans) mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform) mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform) if sys.platform.startswith('win'): num_workers = 0 else: num_workers = cpu_count() train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers) test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers) return train_iter, test_iter
注:
[1] Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). Imagenet classification with deep convolutional neural networks. In Advances in neural information processing systems (pp. 1097-1105).
[2] 李沐、Aston Zhang等老師的這本《動手學深度學習》一書。svg
本文同步分享在 博客「因吉」(CSDN)。
若有侵權,請聯繫 support@oschina.cn 刪除。
本文參與「OSC源創計劃」,歡迎正在閱讀的你也加入,一塊兒分享。函數