pytorch基礎學習（一）

時間 2019-11-09

標籤 pytorch 基礎學習简体版

原文原文鏈接

　　在煉丹師的路上越走越遠，開始入手pytorch框架的學習，越煉越熟吧。。。html

1. 張量的建立和操做git

　　建立爲初始化矩陣，並初始化github

a = torch.empty(5, 3)    #建立一個5*3的未初始化矩陣
nn.init.zeros_(a)        #初始化a爲0
nn.init.constant_(a, 3)  # 初始化a爲3
nn.init.uniform_(a)      #初始化爲uniform分佈

　　隨機數矩陣網絡

torch.rand(5, 3)              # 5*3 ， [0, 1)的隨機數
torch.rand_like(m)            #建立和m的size同樣的隨機數矩陣
torch.rand(3, 3)              # 5*3 ， mean=0， variance=1，的正態分佈
torch.randint(1, 10, (3,3))   #3*3的整數矩陣（1-10之間）

　　tensor類型和形狀架構

a = torch.Tensor([1, 2, 3])   #經過列表建立Tensor
a = torch.eye(3,4)            #對角矩形
a = torch.ones(3,4)
b = torch.ones_like(a)        #建立和a同樣size
a = torch.zeros(5, 3, dtype = torch.long)  
a.dtype  #查看數據類型
    32位浮點型：torch.Float (默認的就是這種類型, float32)
    64位整型：torch.Long     (int64 )
    32位整型：torch.Int      (int32)
    16位整型：torch.Short    (int16)
    64位浮點型：torch.Double  (float64)
    
a.size()
    h, w = torch.Size([5, 3])   #5*3維 ,h=5, w=3
    
a.view(3, 5)        #將a進行reshape成3*5的矩陣
a.view(-1, 15)      # -1表示自動計算， 即轉變爲1*15

　　tensor和numpy（array）的相互轉換app

b = a.numpy()    #Tensor轉numpy

c = np.ones((3,3))
d = torch.from_numpy(c)  #numpy 轉tensor

2. 張量的操做框架

　　索引：支持numpy的經常使用索引和切片操做dom

　　加法：和numpy相似的廣播原則ide

索引操做
y = torch.rand(5,3)
y[1:, 2]  切片和索引
y[y>0.5] 花式索引

加法操做(和numpy同樣的廣播原則)
result = x+y
reslut = torch.add(x, y)
y.add_(x)  #直接對y的值進行修改, 
（以_結尾的方法會直接在原地修改變量， 如x.copy_(y), x.t_()會修改x.）

result = torch.empty(5,3)
torch.add(x, y, out=result)  #這裏的result必須先定義

對於一個元素的張量，能夠直接經過x.item()拿到元素值
x = torch.ones(3,4)
y = torch.sum(x)
print(y.item(0))     #獲得整數12.0

　　cuda Tensor: pytorch 支持Gpu操做，能夠在Gpu上建立tensor，經過to()方法能夠在cpu和Gpu上間轉換tensor函數

if torch.cuda.is_available():
    device = torch.device("cuda")
    y = torch.ones_like(x, device=device)   #直接在Gpu上建立tensor
    x = x.to(device)  #從cpu上轉移到gpu
    z = x+y
    print(z.to("cpu", torch.double))  #轉回到cpu，並改變數據類型

3. 自動求導（Autograd）

　　在pytorch搭建的神經網絡中，Tensor 和Function爲最主要的兩個類，一塊兒組成了一個無環圖。在前向傳播時，Function操做tensor的值，而進行反向傳播時，須要計算function的導數來更新參數tensor， pytorch爲咱們自動實現了求導。每個tensor都有一個requires_grad屬性，若tensor.requires_grad=True, 則無環圖會記錄對該tensor的全部操做，當進行backward時，pytorch就會自動計算其導數值，並保存在tensor的grad屬性中。

x = torch.ones(2, 2, requires_grad=True)  #設置requires_grad=True， backward時會計算導數
y = x+2  
    屬性值
        y.requirs_grad     是否autograd, 會自動繼承x的requires_grad
        y.grad               導數或梯度值
        y.grad_fn           對x的操做function，grad_fn=<AddBackward0>
tensor.detach()           將tensor從計算曆史（無環圖）中脫離出來？
with torch.no_grad():     從計算曆史（無環圖）中脫離, backward時不求導  
with torch.set_grad_enabled(phase == 'train')：  （phase == 'train'）爲True時求導

tensor.backward()    #反向傳播， 計算梯度，若是tensor只包含一個數時，backward不須要參數， 不然須要指明參數

backward:

#out爲標量，因此backward時不帶參數
x = torch.ones(2, 2, requires_grad=True) 
y = x+2
z = y*y*3
out = z.mean()
out.backward()
print(x.grad)     #tensor([[4.5000, 4.5000],[4.5000, 4.5000]])
print(y.grad)     #None

backward計算過程以下：

#y不爲爲標量，backward時須要帶參數
x = torch.ones(2, 2, requires_grad=True) 
y = 2*x+2
y.backward(torch.tensor([[1,1],[1,1]], dtype=torch.float))  #能夠理解爲tensor([1, 1, 1, 1]) * dy/dx
print(x.grad)    # tensor([[2.,2.],[2.,2.]])


#y不爲爲標量，backward時須要帶參數
x = torch.ones(2, 2, requires_grad=True) 
y = 2*x+2
y.backward(torch.tensor([[1,0.1],[1,0.1]], dtype=torch.float))  #能夠理解爲tensor([1, 0.1, 1, 0.1]) * dy/dx
print(x.grad)    # tensor([[2.0000,0.2000],[2.0000,0.2000]])

4. 神經網絡(Neutral Networks)

　　訓練神經網絡的典型步驟以下：

　  定義神經網絡(權重參數)
    在數據集上進行迭代
    前向傳播，神經網絡逐層計算輸入的數據
    計算loss（神經網絡的計算值和正確值的距離）
    計算梯度，反向傳遞到神經網絡中，更新神經網絡的權重(weight = weight - learning_rate * gradient)

4.1 定義神經網絡

　　定義一個神經網絡，須要繼承torch.nn.Module, 並實現初始化方法和前向傳播。下面代碼爲AlexNet的實現(經過三種方式定義網絡結構)：

#coding:utf-8

#pytorch implementation of AlexNet

import torch.nn as nn
import torch.nn.functional as F
import torch


class AlexNet(nn.Module):

    def __init__(self):               # image size 227*227
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2,2)),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False),    # when ceil_mode is False, floor will be used to calculate the shape, else ceil  
            nn.Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2)),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False),
            nn.Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)    
        )
        
        # self.avgpool = nn.AdaptiveAvgPool2d(output_size=(6,6))       #使輸出的形狀變成6*6
        
        self.classifier=nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(in_features=9216, out_features=4096, bias=True),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(in_features=4096, out_features=4096, bias=True),
            nn.ReLU(inplace=True),
            nn.Linear(in_features=4096, out_features=1000, bias=True)
        )
        
    def forward(self, x):
        x = self.features(x)
        x = x.view(-1, 256*6*6)
        x = self.classifier(x)
        return x
        
        
class AlexNet1(nn.Module):
    
    def __init__(self):
        super(AlexNet1, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2)),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False),
        )
        
        self.conv2 = nn.Sequential(
            nn.Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2)),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False),
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            nn.ReLU(inplace=True),
        )
        self.conv4 = nn.Sequential(
            nn.Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            nn.ReLU(inplace=True),
        )
        self.conv5 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)    
        )
        
        self.classifier=nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(in_features=9216, out_features=4096, bias=True),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(in_features=4096, out_features=4096, bias=True),
            nn.ReLU(inplace=True),
            nn.Linear(in_features=4096, out_features=1000, bias=True)
        )
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = x.view(-1, 256*6*6)
        x = self.classifier(x)
        return x

class AlexNet2(nn.Module):
    
    def __init__(self):
        super(AlexNet2, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2,2))
        self.relu1 = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)   # when ceil_mode is False, floor will be used to calculate the shape, else ceil  
        self.conv2 = nn.Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
        self.relu2 = nn.ReLU(inplace=True)
        self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
        self.conv3 = nn.Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.relu3 = nn.ReLU(inplace=True)
        self.conv4 = nn.Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.relu4 = nn.ReLU(inplace=True)
        self.conv5 = nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.relu5 = nn.ReLU(inplace=True)
        self.pool5 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        
        self.dropout = nn.Dropout(p=0.5)
        self.linear1 = nn.Linear(in_features=9216, out_features=4096, bias=True)
        self.relu6 = nn.ReLU(inplace=True)
        self.linear2 = nn.Linear(in_features=4096, out_features=4096, bias=True)
        self.relu7 = nn.ReLU(inplace=True)
        self.linear3 = nn.Linear(in_features=4096, out_features=1000, bias=True)
        
    def forward(self, x):
        x = self.pool1(self.relu1(self.conv1(x)))
        x = self.pool2(self.relu2(self.conv2(x)))
        x = self.relu3(self.conv3(x))
        x = self.relu4(self.conv4(x))
        x = self.pool5(self.relu5(self.conv5(x)))
        
        x = x.view(-1, 256*6*6)
        
        x = self.dropout(x)
        x = self.dropout(self.relu6(self.linear1(x)))
        x = self.relu7(self.linear2(x))
        x = self.linear3(x)
        return x

if __name__=="__main__":
    # net = AlexNet()
    # net = AlexNet1()
    net = AlexNet2()
    input = torch.randn(1,3,227,227)  #torch中net輸入爲四維的： batch_size*channel*W*H
    output = net(input)
    print(output)

AlexNet

4.2 定義loss函數

　　pytorch中經常使用的loss函數有：

　　nn.L1Loss()　　

nn.L1Loss(): 取預測值和真實值差的絕對值，最後平均數
x = torch.tensor([[1,1],[2,2]], dtype=torch.float)  #計算loss時須要float類型
y = torch.tensor([[3,3],[4,4]], dtype=torch.float)
criterion = nn.L1Loss()
loss = criterion(x, y)   #(|3-1|+|3-1|+|4-2|+|4-2|)/4=2.0
print(loss.item()) # 2.0

　　nn.SmoothL1Loss(size_average=None, reduce=True,reduction='mean')

　　#reduce爲False時，返回向量，返回整個bacth的每個loss

　　# reduce 默認爲True，返回標量， size_average爲True時返回batch_loss的平均值，爲False時返回batch_loss的和（size_average廢棄，由reduction取代）

nn.SmoothL1Loss():  在(-1, 1)範圍內是平方loss(L2 loss), 其餘範圍內是L1 loss
x = torch.tensor([[1,1],[2,2]], dtype=torch.float)  #計算loss時須要float類型
y = torch.tensor([[1.5,1.5],[4,4]], dtype=torch.float)
criterion = nn.SmoothL1Loss()
loss = criterion(x, y)   #(((1.5-1)**2)/2+((1.5-1)**2)/2+|4-2|-0.5+|4-2|-0.5)/4=2.0
print(loss.item()) # 0.8125

　　nn.MSELoss()

nn.MSELoss():  平方loss(L2 loss), 最後平均數
x = torch.tensor([[1,1],[2,2]], dtype=torch.float)  #計算loss時須要float類型
y = torch.tensor([[3,3],[4,4]], dtype=torch.float)
criterion = nn.MSELoss()
loss = criterion(x, y)   #((3-1)**2+(3-1)**2+(4-2)**2+4-2)**2)/4=4.0
print(loss.item()) # 4

　　nn.NLLLoss() : 負對數似然損失函數（Negative Log Likelihood）

　　　　和CrossEntropyLoss()的惟一區別是，不會對輸入值進行softmax計算。（所以model計算輸出時，最後一層須要加上LogSoftmax）

　　　　　　　　　　　　　　　　　　　　　　　　　　　　（假如x=[1, 2, 3], class=0; 則f=x[0]=1）

nn.NLLLoss()  : 負對數似然損失函數（Negative Log Likelihood）
input = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)  #必須是torch.long類型
criterion = nn.NLLLoss()
loss = criterion(nn.LogSoftmax(dim=1)(input), target)
print(loss.item())

　　nn.CrossEntropyLoss() 交叉熵函數
　　nn.LogSoftmax()和nn.NLLLoss()結合體：會對輸入值使用softmax，再進行計算（所以model計算輸出時不須要進行softmax）

　　　　（參考： https://www.cnblogs.com/marsggbo/p/10401215.html）

loss = nn.CrossEntropyLoss()
#input = torch.randn(3, 5, requires_grad=True)
#target = torch.empty(3, dtype=torch.long).random_(5)  #必須是torch.long類型
loss = nn.CrossEntropyLoss()
input = torch.tensor([[-0.7678,  0.2773, -0.9249,  1.4503,  0.5256],
        [-0.8529, -1.4283, -0.3284,  1.8608, -0.3206],
        [ 0.1201, -0.7239,  0.6798, -0.8335, -2.1710]], requires_grad=True)

target = torch.tensor([0, 0, 1], dtype=torch.long)  #必須是torch.long類型, 且input中每一行對應的target中的一個數字（不是one-hot）
output = loss(input, target)
print(output.item())


#nn.LogSoftmax()和nn.NLLLoss()分開計算以下：
input = torch.tensor([[-0.7678,  0.2773, -0.9249,  1.4503,  0.5256],
        [-0.8529, -1.4283, -0.3284,  1.8608, -0.3206],
        [ 0.1201, -0.7239,  0.6798, -0.8335, -2.1710]], requires_grad=True)
target = torch.tensor([0, 0, 1], dtype=torch.long)  #必須是torch.long類型
sft = nn.LogSoftmax(dim=1)(input)
nls = nn.NLLLoss()(sft, target)

　　　numpy實現交叉熵函數

def label_encoder(target, nclass):
    label = np.zeros((target.shape[0],nclass))
    for i in range(target.shape[0]):
        label[i][target[i]]=1
    print(label)
    return label

def cross_entropy_loss(pred, target):
    target = label_encoder(target, pred.shape[1])  #one-hot編碼
    pred_exp = np.exp(pred)
    pred_sft = pred_exp/(np.sum(pred_exp, axis=1)[:,None])
    print(np.log(pred_sft))
    loss = -np.sum(np.log(pred_sft)*target)/pred.shape[0]  #取一個batch的平均值
    print(loss)
    return loss
    
if __name__=="__main__":
    input = np.array([[-0.7678,  0.2773, -0.9249,  1.4503,  0.5256],
        [-0.8529, -1.4283, -0.3284,  1.8608, -0.3206],
        [ 0.1201, -0.7239,  0.6798, -0.8335, -2.1710]])
    target = np.array([0, 0, 1])
    loss = cross_entropy_loss(input,target)

numpy實現交叉熵

　　nn.BCELoss() 二分類時的交叉熵(Bianry cross entropy),

　　　　nn.CrossEntropyLoss()的特例,即分類限定爲二分類，label必須爲0,1; 模型輸出最後一層須要用sigmoid函數

criterion = nn.BCELoss()
input = torch.randn(5, 1, requires_grad=True)
target = torch.empty(5,1).random_(2)
pre = nn.Sigmoid()(input)
loss = criterion(pre, target)
print(loss.item())

　　nn.BCEWithLogitsLoss(): 將nn.sigmoid()和nn.BCELoss()結合

criterion = nn.BCEWithLogitsLoss()
input = torch.randn(5, 1, requires_grad=True)
target = torch.empty(5,1).random_(2)
loss = criterion(input, target)
print(loss.item())

4.3 定義優化器

　　經過loss函數計算出網絡的預測值和真實值之間的loss後，loss.backward()能將梯度反向傳播，須要根據梯度來更新網絡的權重係數。優化器能幫咱們實現權重係數的更新。

　　不採用優化器：

#不用優化器，更新系數
criterion = nn.CrossEntropyLoss()
input = torch.randn(5, 2, requires_grad=True)
target = torch.empty(5,1).random_(2)
pre = net(input)
loss = criterion(pre, target)
net.zero_grad()
loss.backward()
learing_rate=0.01
for f in net.parameters():
    f.data.sub_(f.grad.data*learing_rate)  #更新系數

　　採用優化器：

#採用優化器，更新系數
import torch.optim as optim
optimizer = optim.SGD(net.parameters(), lr=0.01)

criterion = nn.CrossEntropyLoss()
input = torch.randn(5, 2, requires_grad=True)
target = torch.empty(5,1).random_(2)
optimizer.zero_grad()
pre = net(input)
loss = criterion(pre, target)
loss.backward()
optimizer.step()    #更新系數

4.4 定義DataLoader

　　進行網絡訓練時，能夠一次性導入全部數據(BGD，batch gradient descent), 能夠一次導入一條數據（SGD, stochastic gradient descent），還能夠一次導入部分數據（MBGD, Mini-batch gradient descent），目前MBGD爲較爲經常使用的方法。並且通常狀況，無特殊說明時，論文裏面說起SGD，都指代的時MGBD的方式進行數據導入和網絡訓練。

　　採用pytorch自帶的數據集：

#採用pytorch自帶的數據集
import torchvision
import torchvision.transform as transforms
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5),(0.5, 0.5, 0.5))
    ]
    )
trainset = torchvision.datasets.CIFAR10(root="./data", train=True, download=True, transform = transform)  #下載數據集
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2)             #每個mini-batch，導入4條數據

　　採用自定義的數據集：

class RandomDataset(Dataset):

    def __init__(self, size, length):
        self.len = length
        self.data = torch.randn(length, size)

    def __getitem__(self, index):
        return self.data[index]

    def __len__(self):
        return self.len

rand_loader = DataLoader(dataset=RandomDataset(input_size, data_size),
                         batch_size=batch_size, shuffle=True)

自定義數據集一

class MyDataset(Dataset):

    def __init__(self, root_dir, annotations_file, transform=None):

        self.root_dir = root_dir
        self.annotations_file = annotations_file
        self.transform = transform

        if not os.path.isfile(self.annotations_file):
            print(self.annotations_file + 'does not exist!')
        self.file_info = pd.read_csv(annotations_file, index_col=0)
        self.size = len(self.file_info)

    def __len__(self):
        return self.size

    def __getitem__(self, idx):
        image_path = self.file_info['path'][idx]
        if not os.path.isfile(image_path):
            print(image_path + '  does not exist!')
            return None

        image = Image.open(image_path).convert('RGB')
        label_species = int(self.file_info.iloc[idx]['species'])

        sample = {'image': image, 'species': label_species}
        if self.transform:
            sample['image'] = self.transform(image)
        return sample

train_transforms = transforms.Compose([transforms.Resize((600, 600)),
                       transforms.RandomCrop(500),
                                       transforms.RandomHorizontalFlip(),
                                       transforms.ToTensor(),
                                       ])
val_transforms = transforms.Compose([transforms.Resize((500, 500)),
                                     transforms.ToTensor()
                                     ])

train_dataset = MyDataset(root_dir= ROOT_DIR + TRAIN_DIR,
                          annotations_file= TRAIN_ANNO,
                          transform=train_transforms)

test_dataset = MyDataset(root_dir= ROOT_DIR + VAL_DIR,
                         annotations_file= VAL_ANNO,
                         transform=val_transforms)

train_loader = DataLoader(dataset=train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(dataset=test_dataset)

自定義數據集二

4.5 神經網絡訓練：

　　準備好數據集，定義好模型，loss函數，優化器，數據迭代器，即可以進行網絡訓練了。下面是一個簡單的圖片分類器，將圖片分紅三類：兔子，老鼠，雞

　　A. 準備數據集的label文件：

import pandas as pd
import os
from PIL import Image

# ROOTS = '../Dataset/' 
ROOTS = '/home/ai/project/data/project_I/Dataset/'  
PHASE = ['train', 'val']
SPECIES = ['rabbits', 'rats', 'chickens']  # [0,1,2]

DATA_info = {'train': {'path': [], 'species': []},
             'val': {'path': [], 'species': []}
             }
for p in PHASE:
    for s in SPECIES:
        DATA_DIR = ROOTS + p + '/' + s
        DATA_NAME = os.listdir(DATA_DIR)

        for item in DATA_NAME:
            try:
                img = Image.open(os.path.join(DATA_DIR, item))
            except OSError:
                pass
            else:
                DATA_info[p]['path'].append(os.path.join(DATA_DIR, item))
                if s == 'rabbits':
                    DATA_info[p]['species'].append(0)
                elif s == 'rats':
                    DATA_info[p]['species'].append(1)
                else:
                    DATA_info[p]['species'].append(2)

    ANNOTATION = pd.DataFrame(DATA_info[p])
    ANNOTATION.to_csv('Species_%s_annotation.csv' % p)
    print('Species_%s_annotation file is saved.' % p)

生成label文件

　　生成的label文件格式以下，包括圖片路徑，以及對應的分類（0，1， 2依次表明'rabbits', 'rats', 'chickens'）

　　B. 定義網絡結構：

　　　　能夠本身搭建全新的網絡結構，也能夠採用成熟的網絡架構，如AlexNet，VGG，GoogleNet, Resnet等。下面分別展現了自定義和借用Resnet網絡結構：

import torch.nn as nn
import torchvision
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 3, 3)
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)
        self.relu1 = nn.ReLU(inplace=True)

        self.conv2 = nn.Conv2d(3, 6, 3)
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)
        self.relu2 = nn.ReLU(inplace=True)
        
        self.conv3 = nn.Conv2d(6,12,3)

        self.fc1 = nn.Linear(12 * 123 * 123, 150)
        self.relu3 = nn.ReLU(inplace=True)

        self.drop = nn.Dropout2d()

        self.fc2 = nn.Linear(150, 3)
        # self.softmax1 = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.maxpool1(x)
        x = self.relu1(x)

        x = self.conv2(x)
        x = self.maxpool2(x)
        x = self.relu2(x)
        
        x =nn.ReLU(nn.MaxPool2d(self.conv3(x),kernel_size=3))

        # print(x.shape)
        x = x.view(-1, 12 * 123 * 123)
        x = self.fc1(x)
        x = self.relu3(x)

        x = F.dropout(x, training=self.training)

        x_species = self.fc2(x)
        # x_species = self.softmax1(x_species)

        return x_species

自定義網絡結構

import torch.nn as nn
import torchvision
import torch.nn.functional as F
from torchvision import models


class Net(nn.Module):
    def __init__(self, model):
        super(Net, self).__init__()
        # self.conv1 = nn.Conv2d(3, 64, 3, padding=1)
        # self.conv2 = nn.Conv2d(64, 64, 3, padding=1)
        # self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        # self.conv4 = nn.Conv2d(128, 128, 3, padding=1)
        # self.conv5 = nn.Conv2d(128, 256, 3, padding=1)
        # self.conv6 = nn.Conv2d(256, 256, 3, padding=1)
        # self.conv7 = nn.Conv2d(256, 256, 3, padding=1)
                
        self.resnet18_layer = nn.Sequential(*list(model.children())[:-1])    
        
        self.fc1 = nn.Linear(512 * 1 * 1, 150)
        self.relu3 = nn.ReLU(inplace=True)

        # self.drop = nn.Dropout2d()

        self.fc2 = nn.Linear(150, 3)
        self.softmax1 = nn.Softmax(dim=1)

    def forward(self, x):
        # x = F.relu(self.conv1(x))
        # x = F.max_pool2d(F.relu(self.conv2(x)),2)
        # x = F.relu(self.conv3(x))
        # x = F.max_pool2d(F.relu(self.conv4(x)),2)
        # x = F.relu(self.conv5(x))
        # x = F.relu(self.conv6(x))
        # x = F.max_pool2d(F.relu(self.conv7(x)),2)
        
        x = self.resnet18_layer(x)    
        
        # x = F.dropout(x, self.training)
        
        #print(x.shape)
        x = x.view(-1, 512 * 1 * 1)
        x = self.fc1(x)
        x = self.relu3(x)

        # x = F.dropout(x, training=self.training)

        x_species = self.fc2(x)
        #x_species = self.softmax1(x_species)

        return x_species

採用Resnet

　　C. 定義訓練主函數：

　　訓練網絡時，能夠對自定義的網絡從頭訓練，也能夠採用在ImageNet上預訓練好的網絡，進行finetune，這裏採用預訓練好的Resnet16.

import os
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
import torch
from Species_Network import *
from torchvision.transforms import transforms
from PIL import Image
import pandas as pd
import random
from torch import optim
from torch.optim import lr_scheduler
import copy

ROOT_DIR = '../Dataset/'
TRAIN_DIR = 'train/'
VAL_DIR = 'val/'
TRAIN_ANNO = 'Species_train_annotation.csv'
VAL_ANNO = 'Species_val_annotation.csv'
CLASSES = ['Mammals', 'Birds']
SPECIES = ['rabbits', 'rats', 'chickens']

class MyDataset():

    def __init__(self, root_dir, annotations_file, transform=None):

        self.root_dir = root_dir
        self.annotations_file = annotations_file
        self.transform = transform

        if not os.path.isfile(self.annotations_file):
            print(self.annotations_file + 'does not exist!')
        self.file_info = pd.read_csv(annotations_file, index_col=0)
        self.size = len(self.file_info)

    def __len__(self):
        return self.size

    def __getitem__(self, idx):
        image_path = self.file_info['path'][idx]
        if not os.path.isfile(image_path):
            print(image_path + '  does not exist!')
            return None

        image = Image.open(image_path).convert('RGB')
        label_species = int(self.file_info.iloc[idx]['species'])

        sample = {'image': image, 'species': label_species}
        if self.transform:
            sample['image'] = self.transform(image)
        return sample

train_transforms = transforms.Compose([transforms.Resize((500, 500)),
                                       transforms.RandomHorizontalFlip(),
                                       transforms.ToTensor(),
                                       ])
val_transforms = transforms.Compose([transforms.Resize((500, 500)),
                                     transforms.ToTensor()
                                     ])

train_dataset = MyDataset(root_dir= ROOT_DIR + TRAIN_DIR,
                          annotations_file= TRAIN_ANNO,
                          transform=train_transforms)

test_dataset = MyDataset(root_dir= ROOT_DIR + VAL_DIR,
                         annotations_file= VAL_ANNO,
                         transform=val_transforms)

train_loader = DataLoader(dataset=train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(dataset=test_dataset)
data_loaders = {'train': train_loader, 'val': test_loader}

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

def visualize_dataset():
    print(len(train_dataset))
    idx = random.randint(0, len(train_dataset))
    sample = train_loader.dataset[idx]
    print(idx, sample['image'].shape, SPECIES[sample['species']])
    img = sample['image']
    plt.imshow(transforms.ToPILImage()(img))
    plt.show()
visualize_dataset()

def train_model(model, criterion, optimizer, scheduler, num_epochs=50):
    Loss_list = {'train': [], 'val': []}
    Accuracy_list_species = {'train': [], 'val': []}

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-*' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            corrects_species = 0

            for idx,data in enumerate(data_loaders[phase]):
                #print(phase+' processing: {}th batch.'.format(idx))
                inputs = data['image'].to(device)
                labels_species = data['species'].to(device)
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    x_species = model(inputs)
                    x_species = x_species.view(-1,3)

                    _, preds_species = torch.max(x_species, 1)

                    loss = criterion(x_species, labels_species)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)

                corrects_species += torch.sum(preds_species == labels_species)

            epoch_loss = running_loss / len(data_loaders[phase].dataset)
            Loss_list[phase].append(epoch_loss)

            epoch_acc_species = corrects_species.double() / len(data_loaders[phase].dataset)
            epoch_acc = epoch_acc_species

            Accuracy_list_species[phase].append(100 * epoch_acc_species)
            print('{} Loss: {:.4f}  Acc_species: {:.2%}'.format(phase, epoch_loss,epoch_acc_species))

            if phase == 'val' and epoch_acc > best_acc:

                best_acc = epoch_acc_species
                best_model_wts = copy.deepcopy(model.state_dict())
                print('Best val species Acc: {:.2%}'.format(best_acc))

    model.load_state_dict(best_model_wts)
    torch.save(model.state_dict(), 'best_model.pt')
    print('Best val species Acc: {:.2%}'.format(best_acc))
    return model, Loss_list,Accuracy_list_species

network = Net().to(device)
optimizer = optim.SGD(network.parameters(), lr=0.005, momentum=0.9, weight_decay=1e-6)  #weight_decay：L2正則項懲罰
criterion = nn.CrossEntropyLoss()
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1) # Decay LR by a factor of 0.1 every 1 epochs
model, Loss_list, Accuracy_list_species = train_model(network, criterion, optimizer, exp_lr_scheduler, num_epochs=100)

x = range(0, 100)
y1 = Loss_list["val"]
y2 = Loss_list["train"]

plt.plot(x, y1, color="r", linestyle="-", marker="o", linewidth=1, label="val")
plt.plot(x, y2, color="b", linestyle="-", marker="o", linewidth=1, label="train")
plt.legend()
plt.title('train and val loss vs. epoches')
plt.ylabel('loss')
plt.savefig("train and val loss vs epoches.jpg")
plt.close('all') # 關閉圖 0

y5 = Accuracy_list_species["train"]
y6 = Accuracy_list_species["val"]
plt.plot(x, y5, color="r", linestyle="-", marker=".", linewidth=1, label="train")
plt.plot(x, y6, color="b", linestyle="-", marker=".", linewidth=1, label="val")
plt.legend()
plt.title('train and val Species acc vs. epoches')
plt.ylabel('Species accuracy')
plt.savefig("train and val Species acc vs epoches.jpg")
plt.close('all')

######################################## Visualization ##################################
def visualize_model(model):
    model.eval()
    with torch.no_grad():
        for i, data in enumerate(data_loaders['val']):
            inputs = data['image']
            labels_species = data['species'].to(device)

            x_species = model(inputs.to(device))
            x_species = x_species.view( -1,2)
            _, preds_species = torch.max(x_species, 1)

            print(inputs.shape)
            plt.imshow(transforms.ToPILImage()(inputs.squeeze(0)))
            plt.title('predicted species: {}\n ground-truth species:{}'.format(SPECIES[preds_species],SPECIES[labels_species]))
            plt.show()

visualize_model(model)

自定義網絡訓練

import os
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
import torch
from Spe_Network import *
from torchvision.transforms import transforms
from PIL import Image
import pandas as pd
import random
from torch import optim
from torch.optim import lr_scheduler
import copy
from torchvision import models
import logging

logging.basicConfig(level=logging.DEBUG, filename="train.log", filemode="a+")

ROOT_DIR = '/home/ai/project/data/project_I/Dataset/'  
TRAIN_DIR = 'train/'
VAL_DIR = 'val/'
TRAIN_ANNO = 'Species_train_annotation.csv'
VAL_ANNO = 'Species_val_annotation.csv'
CLASSES = ['Mammals', 'Birds']
SPECIES = ['rabbits', 'rats', 'chickens']

class MyDataset(Dataset):

    def __init__(self, root_dir, annotations_file, transform=None):

        self.root_dir = root_dir
        self.annotations_file = annotations_file
        self.transform = transform

        if not os.path.isfile(self.annotations_file):
            print(self.annotations_file + 'does not exist!')
        self.file_info = pd.read_csv(annotations_file, index_col=0)
        self.size = len(self.file_info)

    def __len__(self):
        return self.size

    def __getitem__(self, idx):
        image_path = self.file_info['path'][idx]
        if not os.path.isfile(image_path):
            print(image_path + '  does not exist!')
            return None

        image = Image.open(image_path).convert('RGB')
        label_species = int(self.file_info.iloc[idx]['species'])

        sample = {'image': image, 'species': label_species}
        if self.transform:
            sample['image'] = self.transform(image)
        return sample

train_transforms = transforms.Compose([transforms.Resize((600, 600)),
                       transforms.RandomCrop(500),
                                       transforms.RandomHorizontalFlip(),
                                       transforms.ToTensor(),
                                       ])
val_transforms = transforms.Compose([transforms.Resize((500, 500)),
                                     transforms.ToTensor()
                                     ])

train_dataset = MyDataset(root_dir= ROOT_DIR + TRAIN_DIR,
                          annotations_file= TRAIN_ANNO,
                          transform=train_transforms)

test_dataset = MyDataset(root_dir= ROOT_DIR + VAL_DIR,
                         annotations_file= VAL_ANNO,
                         transform=val_transforms)

train_loader = DataLoader(dataset=train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(dataset=test_dataset)
data_loaders = {'train': train_loader, 'val': test_loader}

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

def visualize_dataset():
    print(len(train_dataset))
    idx = random.randint(0, len(train_dataset))
    sample = train_loader.dataset[idx]
    print(idx, sample['image'].shape, SPECIES[sample['species']])
    img = sample['image']
    plt.imshow(transforms.ToPILImage()(img))
    plt.show()
visualize_dataset()

def train_model(model, criterion, optimizer, scheduler, num_epochs=50):
    Loss_list = {'train': [], 'val': []}
    Accuracy_list_species = {'train': [], 'val': []}

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-*' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            corrects_species = 0

            for idx,data in enumerate(data_loaders[phase]):
                #print(phase+' processing: {}th batch.'.format(idx))
                inputs = data['image'].to(device)
                labels_species = data['species'].to(device)
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    x_species = model(inputs)
                    x_species = x_species.view(-1,3)

                    _, preds_species = torch.max(x_species, 1)

                    loss = criterion(x_species, labels_species)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)

                corrects_species += torch.sum(preds_species == labels_species)

            epoch_loss = running_loss / len(data_loaders[phase].dataset)
            Loss_list[phase].append(epoch_loss)

            epoch_acc_species = corrects_species.double() / len(data_loaders[phase].dataset)
            epoch_acc = epoch_acc_species

            Accuracy_list_species[phase].append(100 * epoch_acc_species)
            print('{} Loss: {:.4f}  Acc_species: {:.2%}'.format(phase, epoch_loss,epoch_acc_species))
            logging.info('{} Loss: {:.4f}  Acc_species: {:.2%}'.format(phase, epoch_loss,epoch_acc_species))

            if phase == 'val' and epoch_acc > best_acc:

                best_acc = epoch_acc_species
                best_model_wts = copy.deepcopy(model.state_dict())
                print('Best val species Acc: {:.2%}'.format(best_acc))
                logging.info('Best val species Acc: {:.2%}'.format(best_acc))

    model.load_state_dict(best_model_wts)
    torch.save(model.state_dict(), 'best_model.pt')
    print('Best val species Acc: {:.2%}'.format(best_acc))
    logging.info('Best val species Acc: {:.2%}'.format(best_acc))
    return model, Loss_list,Accuracy_list_species
    
    
# vgg16 = models.vgg16(pretrained=True)
res18 = models.resnet18(pretrained=True)
network = Net(res18).to(device)
optimizer = optim.SGD(network.parameters(), lr=0.005, momentum=0.9, weight_decay=1e-4)  #weight_decay：L2正則項懲罰
criterion = nn.CrossEntropyLoss()
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1) # Decay LR by a factor of 0.1 every 1 epochs
model, Loss_list, Accuracy_list_species = train_model(network, criterion, optimizer, exp_lr_scheduler, num_epochs=100)

# x = range(0, 100)
# y1 = Loss_list["val"]
# y2 = Loss_list["train"]

# plt.plot(x, y1, color="r", linestyle="-", marker="o", linewidth=1, label="val")
# plt.plot(x, y2, color="b", linestyle="-", marker="o", linewidth=1, label="train")
# plt.legend()
# plt.title('train and val loss vs. epoches')
# plt.ylabel('loss')
# plt.savefig("train and val loss vs epoches.jpg")
# plt.close('all') # 關閉圖 0

# y5 = Accuracy_list_species["train"]
# y6 = Accuracy_list_species["val"]
# plt.plot(x, y5, color="r", linestyle="-", marker=".", linewidth=1, label="train")
# plt.plot(x, y6, color="b", linestyle="-", marker=".", linewidth=1, label="val")
# plt.legend()
# plt.title('train and val Species acc vs. epoches')
# plt.ylabel('Species accuracy')
# plt.savefig("train and val Species acc vs epoches.jpg")
# plt.close('all')

######################################## Visualization ##################################
def visualize_model(model):
    model.eval()
    with torch.no_grad():
        for i, data in enumerate(data_loaders['val']):
            inputs = data['image']
            labels_species = data['species'].to(device)

            x_species = model(inputs.to(device))
            x_species = x_species.view( -1,2)
            _, preds_species = torch.max(x_species, 1)

            print(inputs.shape)
            plt.imshow(transforms.ToPILImage()(inputs.squeeze(0)))
            plt.title('predicted species: {}\n ground-truth species:{}'.format(SPECIES[preds_species],SPECIES[labels_species]))
            plt.show()

# visualize_model(model)