import torch from torch import nn from torch.nn import init import numpy as np import sys sys.path.append('..') import d2lzh_pytorch as d2l import torchvision import torchvision.transforms as transforms
#與上一節一樣的數據集以及批量大小 batch_size= 256 mnist_train= torchvision.datasets.FashionMNIST(root='~/Datasets/FashionMNIST',download=True,train=True,transform=transforms.ToTensor()) mnist_test = torchvision.datasets.FashionMNIST(root='~/Datasets/FashionMNIST',download=True,train=False,transform=transforms.ToTensor()) if sys.platform.startswith('win'): num_worker=0 # 表示不用額外的進程來加速讀取數據 else: num_worker=4 train_iter = torch.utils.data.DataLoader(mnist_train,batch_size=batch_size,shuffle=True,num_workers=num_worker) test_iter = torch.utils.data.DataLoader(mnist_test,batch_size=batch_size,shuffle=False,num_workers=num_worker)
softmax的輸出層是一個全鏈接層,因此咱們使用一個線性模塊就能夠,由於前面咱們數據返回的每一個batch的樣本X的形狀爲(batch_size,1,28,28),咱們先用view()將X轉化爲(batch_size,784)才送入全鏈接層python
num_inputs = 784 num_outputs = 10 class LinearNet(nn.Module): def __init__(self,num_inputs,num_outputs): super(LinearNet,self).__init__() self.linear = nn.Linear(num_inputs,num_outputs) def forward(self,x): y = self.linear(x.view(x.shape[0],-1)) return y net = LinearNet(num_inputs,num_outputs)
# 咱們將形狀轉化的這個功能定義成一個FlattenLayer class FlattenLayer(nn.Module): def __init__(self): super(FlattenLayer,self).__init__() def forward(self,x): return x.view(x.shape[0],-1)
from collections import OrderedDict net = nn.Sequential( OrderedDict( [ ('flatten',FlattenLayer()), ('linear',nn.Linear(num_inputs,num_outputs)) ]) ) # 以前線性迴歸的是num_output是1
init.normal_(net.linear.weight,mean=0,std=0.01) init.constant_(net.linear.bias,val=0)
Parameter containing: tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)
print(net)
Sequential( (flatten): FlattenLayer() (linear): Linear(in_features=784, out_features=10, bias=True) )
#pytorch提供了一個包括softmax預算和交叉熵損失計算的函數 loss = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(),lr=0.1)
def evaluate_accuracy(data_iter, net): acc_sum, n = 0.0, 0 for X, y in data_iter: acc_sum += (net(X).argmax(dim=1) == y).float().sum().item() n += y.shape[0] return acc_sum / n
num_epochs, lr = 5, 0.1 def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params=None, lr=None, optimizer=None): for epoch in range(num_epochs): train_l_sum, train_acc_sum, n = 0.0, 0.0, 0 for X, y in train_iter: y_hat = net(X) l = loss(y_hat, y).sum() # 梯度清零 if optimizer is not None: optimizer.zero_grad() elif params is not None and params[0].grad is not None: for param in params: param.grad.data.zero_() l.backward() if optimizer is None: # 上節的代碼optimizer is None,使用的手寫的代碼SGD sgd(params, lr, batch_size) else: # optimizer 非None, optimizer.step() # 「softmax迴歸的簡潔實現」一節將用到 train_l_sum += l.item() train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item() n += y.shape[0] test_acc = evaluate_accuracy(test_iter, net) print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f' % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))
train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None,optimizer)
epoch 1, loss 0.0031, train acc 0.749, test acc 0.765 epoch 2, loss 0.0022, train acc 0.813, test acc 0.808 epoch 3, loss 0.0021, train acc 0.826, test acc 0.818 epoch 4, loss 0.0020, train acc 0.832, test acc 0.816 epoch 5, loss 0.0019, train acc 0.837, test acc 0.821