關於卷積神經網絡(CNN)的基礎知識此處就再也不多說,詳細的資料參考我在CSDN的說明
CNN卷積神經網絡原理流程整理python
如下是一個可視化展現卷積過程的網站
https://www.cs.ryerson.ca/~aharley/vis/conv/數組
import torch import numpy as np from torch.autograd import Variable from torchvision import datasets,transforms from torch.utils.data import DataLoader from torch import nn from torch import optim
train_dataset = datasets.MNIST(root='./data/06_MNIST/', train=True, transform=transforms.ToTensor(), download=True) test_dataset = datasets.MNIST(root='./data/06_MNIST/', train=False, # 載入測試集 transform=transforms.ToTensor(), download=True) batch_size = 64 train_loader = DataLoader(dataset = train_dataset, batch_size=batch_size, shuffle=True) test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)
# 定義網絡結構 class CNN(nn.Module): def __init__(self): ''' Conv2d的參數: nn.Conv2d( in_channels, # 輸入圖片的通道(黑白的圖片就是一個通道) out_channels, # 輸出數據通道:卷積層輸出的通道數 kernel_size, # 卷積核的大小 stride=1, # 步長 padding=0, # 填充0值的圈數(3*3窗口1圈,5*5兩圈獲得的結果與原圖相同) dilation=1, groups=1, bias=True, padding_mode='zeros',) ''' super(CNN,self).__init__() # 卷積+激活+池化 self.conv1 = nn.Sequential(nn.Conv2d(1,32,5,1,2),nn.ReLU(),nn.MaxPool2d(2,2)) # 1張輸入獲得32張14*14的特徵圖 self.conv2 = nn.Sequential(nn.Conv2d(32,64,5,1,2),nn.ReLU(),nn.MaxPool2d(2,2))# 獲得64張7*7的特徵圖 self.fc1 = nn.Sequential(nn.Linear(64*7*7,1000),nn.Dropout(p=0.5),nn.ReLU()) # 輸入64*7*7的數據,輸出一個大小1000的數組 self.fc2 = nn.Sequential(nn.Linear(1000,10),nn.Softmax(dim=1)) def forward(self,x): # [64,1,28,28] 傳入數據的格式 x = self.conv1(x) x = self.conv2(x) # [64,64,7,7] x = x.view(x.size()[0],-1) x = self.fc1(x) x = self.fc2(x) return x
# 定義模型 LR = 0.001 model = CNN() crossEntropyloss = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(),LR)
def train(): # 訓練狀態 model.train() for i,data in enumerate(train_loader): inputs,labels = data out = model(inputs) loss = crossEntropyloss(out,labels) optimizer.zero_grad() loss.backward() optimizer.step() correct = 0 for i,data in enumerate(train_loader): inputs,labels = data out = model(inputs) _,predictions = torch.max(out,1) correct +=(predictions == labels).sum() print("Train acc:{0}".format(correct.item()/len(train_dataset))) def test(): model.eval() correct = 0 for i,data in enumerate(test_loader): inputs,labels = data out = model(inputs) _,predictions = torch.max(out,1) correct +=(predictions == labels).sum() print("Test acc:{0}".format(correct.item()/len(test_dataset)))
if __name__=='__main__': for epoch in range(10): print('epoch:',epoch) train() test()
主要的步驟與上述方法相同網絡
# 導入數據 CIFAR_train_dataset = datasets.CIFAR10(root='./data/', train=True, download=True, transform = transforms.ToTensor() ) CIFAR_test_dataset = datasets.CIFAR10(root='./data/', train=False, download=True, transform = transforms.ToTensor() ) # 查看數據 imgdata,label = CIFAR_train_dataset[90] print('label:',label) print('imgdata類型:',type(imgdata)) print('測試集',CIFAR_train_dataset.data.shape) print('訓練集',CIFAR_test_dataset.data.shape) # 數據裝載 batch_size = 64 CIFAR_train_loader = DataLoader(dataset=CIFAR_train_dataset, batch_size=batch_size, shuffle=True) CIFAR_test_loader = DataLoader(dataset=CIFAR_test_dataset, batch_size=batch_size, shuffle=True)
fig, ax = plt.subplots( nrows=3, ncols=4, sharex=True, sharey=True) # sharex和sharey表示子圖是否是有相同的座標 ax = ax.flatten() for i in range(12): # 只查看了前面12張圖片 img = CIFAR_train_dataset.data[i] ax[i].imshow(img, cmap='Greys', interpolation='nearest') ax[i].set_title("".join([k for k,v in CIFAR_train_dataset.class_to_idx.items() if v==CIFAR_train_dataset.targets[i]])) ax[0].set_xticks([]) ax[0].set_yticks([]) plt.tight_layout() plt.show()
說明:CNN網絡卷積層數量、卷積層參數、卷積層以後的激活函數、池化層的參數都須要本身再調整,我所填寫的參數獲得的結果並非很好。ide
# 定義網絡結構 class CNN2(nn.Module): def __init__(self): ''' Conv2d的參數: nn.Conv2d( in_channels, # 輸入圖片的通道(黑白的圖片就是一個通道) out_channels, # 輸出數據通道:卷積層輸出的通道數 kernel_size, # 卷積核的大小 stride=1, # 步長 padding=0, # 填充0值的圈數(3*3窗口1圈,5*5兩圈獲得的結果與原圖相同) dilation=1, groups=1, bias=True, padding_mode='zeros',) ''' super(CNN2,self).__init__() # 卷積+激活+池化 self.conv1 = nn.Sequential(nn.Conv2d(3,32,5,1,2),nn.ReLU(),nn.MaxPool2d(2,2)) # 1張輸入獲得32張16*16的特徵圖 self.conv2 = nn.Sequential(nn.Conv2d(32,64,5,1,2),nn.ReLU(),nn.MaxPool2d(2,2))# 獲得64張8*8的特徵圖 self.fc1 = nn.Sequential(nn.Linear(64*8*8,1000),nn.Dropout(p=0.5),nn.ReLU()) # 輸入64*8*8的數據,輸出一個大小1000的數組 self.fc2 = nn.Sequential(nn.Linear(1000,10),nn.Softmax(dim=1)) def forward(self,x): # [64,3,32,32] 傳入數據的格式 x = self.conv1(x) x = self.conv2(x) # [64,64,8,8] x = x.view(x.size()[0],-1) x = self.fc1(x) x = self.fc2(x) return x
# 定義模型 LR = 0.001 model = CNN2() crossEntropyloss = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(),LR) def train(epoch): # 訓練狀態 model.train() running_loss = 0.0 for i,data in enumerate(CIFAR_train_loader): inputs,labels = data out = model(inputs) loss = crossEntropyloss(out,labels) optimizer.zero_grad() loss.backward() optimizer.step() _,predictions = torch.max(out,1) running_loss +=(predictions == labels).sum() if i % 10 == 0: # 每10批次打印一次(1批次64張圖) print('[%d, %5d] loss: %.3f' % (epoch+1, i + 1, running_loss.item()/640)) running_loss = 0.0 correct = 0 for i,data in enumerate(CIFAR_train_loader): inputs,labels = data out = model(inputs) _,predictions = torch.max(out,1) correct +=(predictions == labels).sum() print("Train acc:{0}".format(correct.item()/len(CIFAR_train_dataset))) def test(): model.eval() correct = 0 for i,data in enumerate(CIFAR_test_loader): inputs,labels = data out = model(inputs) _,predictions = torch.max(out,1) correct +=(predictions == labels).sum() print("Test acc:{0}".format(correct.item()/len(CIFAR_test_dataset))) if __name__=='__main__': for epoch in range(10): print('epoch:',epoch) train(epoch) test()
最後結果就不展現了,要獲得好的訓練結果就須要調整參數和神經網絡的結構。函數