PyTorch是FaceBook開源的第三方庫,其做用是快速搭建深度學習模型,驗證思路。網絡
首先導入各類第三方庫函數
import torch from torch import nn,optim from matplotlib import pyplot as plt from torch.utils.data import DataLoader from torch.autograd import Variable import numpy as np from torchvision import datasets, transforms
train_dataset = datasets.MNIST(root='./', train=True, transform=transforms.ToTensor(), download=True) test_dataset = datasets.MNIST(root='./', train=False, transform=transforms.ToTensor(), download=True)
運行這段代碼時,對於PyTorch的低版本會出現錯誤,筆者升級到1.7.1則運行成功學習
# 批次大小 batch_size = 64 # 裝載訓練集 train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) # 裝載測試集 test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)
因爲給定的數據的形狀是$$[batch\_size, 1, 28, 28]$$
以後訓練和預測時咱們須要將數據轉換成二維格式測試
# 定義網絡結構 class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.fc1 = nn.Linear(784, 10) self.softmax = nn.Softmax(dim=1) def forward(self, x): inputs = x.reshape((x.shape[0], -1)) x = self.fc1(inputs) out = self.softmax(x) return out
def train(): for i, data in enumerate(train_loader): # 得到一個批次的數據和標籤 inputs, labels = data inputs = inputs.cuda() labels = labels.cuda() # 得到模型預測結果 out = model(inputs) # to one-hot # scatter_(input, dim, index, src) → Tensor # 將src中的全部值按照index肯定的索引寫入本tensor中。其中索引是根據給定的dimension,dim按照gather()描述的規則來肯定。 labels = labels.reshape((-1, 1)).cuda() one_hot = torch.zeros(inputs.shape[0], 10).cuda().scatter(1, labels, 1) # 計算loss # mse_loss要求形狀一致 loss = MSE_loss(out, one_hot) # 梯度清零 optimizer.zero_grad() # 反向傳播 loss.backward() # 更新參數 optimizer.step()
def test(): correct = 0 for i, data in enumerate(test_loader): # 得到一個批次的數據和標籤 inputs, labels = data inputs = inputs.cuda() labels = labels.cuda() # 得到模型預測結果 out = model(inputs) _, predicted = torch.max(out, 1) correct += (predicted == labels).sum() print("Test acc:{0}".format(correct.item()/len(test_dataset)))
Dropout能夠使神經元隨機失活,其中參數p表示失活機率優化
# 定義網絡結構 class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.layer1 = nn.Sequential(nn.Linear(784, 500), nn.Dropout(p=0.5), nn.Tanh()) self.layer2 = nn.Sequential(nn.Linear(500, 200), nn.Dropout(p=0.5), nn.Tanh()) self.layer3 = nn.Sequential(nn.Linear(200, 10),nn.Softmax(dim=1)) def forward(self, x): inputs = x.reshape((x.shape[0], -1)) x = self.layer1(inputs) x = self.layer2(x) out = self.layer3(x) return out
# 定義代價函數 MSE_loss = nn.CrossEntropyLoss()
除了交叉熵損失函數,PyTorch中還定義了不少損失函數,暫不一一列舉
交叉熵損失函數要求第一個參數形狀爲[batch_size, C](C爲類別數), 第二個參數形狀爲[batch_size](一維向量)code
optimizer = optim.SGD(model.parameters(), lr=0.1) # 或者改爲Adam optimizer = optim.Adam(model.parameters(), lr=0.1)
optimizer = optim.SGD(model.parameters(), lr=0.1, weight_decay=0.001)
這裏是添加L2正則化,weight_decay是懲罰係數orm