本文目的:展現如何利用PyTorch進行手寫數字識別。html
1 導入相關庫,定義一些參數
import torch import torch.nn as nn import torch.nn.functional as F from torchvision import datasets, transforms from torch.utils.data import DataLoader #定義一些參數 BATCH_SIZE = 64 EPOCHS = 10 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
2 準備數據
使用Pytorch自帶數據集。python
#圖像預處理 transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ]) #訓練集 train_set = datasets.MNIST('data', train=True, transform=transform, download=True) train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True) #測試集 test_set = datasets.MNIST('data', train=False, transform=transform, download=True) test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=True)
3 準備模型
#搭建模型 class ConvNet(nn.Module): #圖像輸入是(batch,1,28,28) def __init__(self): super().__init__() self.conv1 = nn.Conv2d(1, 10, (3,3)) #輸入通道數爲1,輸出通道數爲10,卷積核(3,3) self.conv2 = nn.Conv2d(10, 32, (3,3)) self.fc1 = nn.Linear(12*12*32, 100) self.fc2 = nn.Linear(100, 10) def forward(self, x): x = self.conv1(x) #(batch,10,26,26) x = F.relu(x) x = self.conv2(x) #(batch,32,24,24) x = F.relu(x) x = F.max_pool2d(x, (2,2)) #(batch,32,12,12) x = x.view(x.size(0), -1) #flatten (batch,12*12*32) x = self.fc1(x) #(batch,100) x = F.relu(x) x = self.fc2(x) #(batch,10) out = F.log_softmax(x, dim=1) #softmax激活並取對數,數值上更穩定 return out
4 訓練
#定義模型和優化器 model = ConvNet().to(DEVICE) #模型移至GPU optimizer = torch.optim.Adam(model.parameters()) #定義訓練函數 def train(model, device, train_loader, optimizer, epoch): #跑一個epoch model.train() #開啓訓練模式,即啓用BatchNormalization和Dropout等 for batch_idx, (data, target) in enumerate(train_loader): #每次產生一個batch data, target = data.to(device), target.to(device) #產生的數據移至GPU output = model(data) loss = F.nll_loss(output, target) #CrossEntropyLoss = log_softmax + NLLLoss optimizer.zero_grad() #全部梯度清零 loss.backward() #反向傳播求全部參數梯度 optimizer.step() #沿負梯度方向走一步 if(batch_idx+1) % 234 == 0: print('Train Epoch: {} [{}/{} ({:.1f}%)]\tLoss: {:.6f}'.format( epoch, (batch_idx+1) * len(data), len(train_loader.dataset), 100. * (batch_idx+1) / len(train_loader), loss.item())) #定義測試函數 def test(model, device, test_loader): model.eval() #測試模式,不啓用BatchNormalization和Dropout test_loss = 0 correct = 0 with torch.no_grad(): #避免梯度跟蹤 for data, target in test_loader: data, target = data.to(device), target.to(device) output = model(data) test_loss += F.nll_loss(output, target, reduction='sum').item() #將一批損失相加 pred = output.max(1, keepdim=True)[1] #找到機率最大的下標 #上句效果等同於 pred = torch.argmax(output, dim=1, keepdim=True) correct += pred.eq(target.view_as(pred)).sum().item() test_loss /= len(test_loader.dataset) #len(train_loader)爲batch數,len(train_loader.dataset)爲樣本總數 print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.1f}%)\n'.format( test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset))) #開始訓練 for epoch in range(1, EPOCHS + 1): train(model, DEVICE, train_loader, optimizer, epoch) test(model, DEVICE, test_loader)
注意,torch.max()有兩種用法:git
- 直接傳入一個tensor,則返回全局最大值;
- torch.max(a, dim, [keepdim])返回一個tuple,前者爲最大值結果,後者爲indices(效果同argmax);
- 詳見 https://pytorch.org/docs/stable/torch.html?highlight=max#torch.max
- 此處 output.max() 與 torch.max()相似,只不過無需傳入tensor
最終結果以下:github
5 小結
- 任務流程:準備數據,準備模型,訓練
- 如何使用PyTorch自帶數據集進行訓練
- 自定義模型須要實現forward函數
- model.train()和model.eval()做用
- 最後一層x的交叉熵兩種方式等價:CrossEntropyLoss = log_softmax + nll_loss
- torch.max()有兩種用法,返回值不同
Reference函數