參考:https://blog.csdn.net/weixin_37813036/article/details/90718310python
1 data_transform = transforms.Compose([ 2 transforms.Resize(256), 3 transforms.CenterCrop(224), 4 transforms.ToTensor(), 5 transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 6 ]) 7 8 train_dataset = datasets.ImageFolder(root='./data2/train/', 9 transform=data_transform) 10 train_loader = torch.utils.data.DataLoader(train_dataset, 11 batch_size=batch_size, 12 shuffle=True, 13 num_workers=num_workers)
爲方便訓練過程,須要把訓練集進行處理:把貓狗的圖片分別放在cat,dog文件夾中,並劃分出一部分圖片做爲測試集(與下載的測試集不一樣)。代碼以下:算法
1 #接下來的數據是把原訓練集90%的數據作訓練,10%作測試集,其中把分爲訓練集的數據內的貓和狗分開,分爲測試集的數據的貓和狗進行分開保存在新的各自的目錄下 2 # kaggle原始數據集地址 3 original_dataset_dir = 'D:\\Code\\Python\\Kaggle-Dogs_vs_Cats_PyTorch-master\\data\\train' #訓練集地址 4 total_num = int(len(os.listdir(original_dataset_dir)) ) #訓練集數據總數,包含貓和狗 5 random_idx = np.array(range(total_num)) 6 np.random.shuffle(random_idx)#打亂圖片順序 7 8 # 待處理的數據集地址 9 base_dir = 'D:\\Code\\dogvscat\\data2' #把原訓練集數據分類後的數據存儲在該目錄下 10 if not os.path.exists(base_dir): 11 os.mkdir(base_dir) 12 13 # 訓練集、測試集的劃分 14 sub_dirs = ['train', 'test'] 15 animals = ['cats', 'dogs'] 16 train_idx = random_idx[:int(total_num * 0.9)] #打亂後的數據的90%是訓練集,10是測試集 17 test_idx = random_idx[int(total_num * 0.9):int(total_num * 1)] 18 numbers = [train_idx, test_idx] 19 for idx, sub_dir in enumerate(sub_dirs): 20 dir = os.path.join(base_dir, sub_dir)#'D:\\Code\\dogvscat\\data2\\train'或'D:\\Code\\dogvscat\\data2\\test' 21 if not os.path.exists(dir): 22 os.mkdir(dir) 23 24 animal_dir = "" 25 26 #fnames = ['.{}.jpg'.format(i) for i in numbers[idx]] 27 fnames = "" 28 if sub_dir == 'train': 29 idx = 0 30 else: 31 idx =1 32 for i in numbers[idx]: 33 #print(i) 34 if i>=12500:#把數據保存在dogs目錄下 35 fnames = str('dog'+'.{}.jpg'.format(i)) 36 animal_dir = os.path.join(dir,'dogs') 37 38 if not os.path.exists(animal_dir): 39 os.mkdir(animal_dir) 40 if i<12500:#圖片是貓,數據保存在cats目錄下 41 fnames = str('cat'+'.{}.jpg'.format(i)) 42 animal_dir = os.path.join(dir, 'cats') 43 if not os.path.exists(animal_dir): 44 os.mkdir(animal_dir) 45 src = os.path.join(original_dataset_dir, str(fnames)) #原數據地址 46 #print(src) 47 dst = os.path.join(animal_dir, str(fnames))#新地址 48 #print(dst) 49 shutil.copyfile(src, dst)#複製 50 51 52 # 驗證訓練集、測試集的劃分的照片數目 53 print(dir + ' total images : %d' % (len(os.listdir(dir+'\\dogs'))+len(os.listdir(dir+'\\cats')))) 54 # coding=utf-8
# 建立模型
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.maxpool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 53 * 53, 1024)
self.fc2 = nn.Linear(1024, 512)
self.fc3 = nn.Linear(512, 2)
def forward(self, x):
x = self.maxpool(F.relu(self.conv1(x)))
x = self.maxpool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 53 * 53)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
class Net2(nn.Module):
def __init__(self):
super(Net2, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.maxpool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 53 * 53, 1024)
torch.nn.Dropout(0.5)
self.fc2 = nn.Linear(1024, 512)
torch.nn.Dropout(0.5)
self.fc3 = nn.Linear(512, 2)
def forward(self, x):
x = self.maxpool(F.relu(self.conv1(x)))
x = self.maxpool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 53 * 53)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
咱們從conv1提及。conv1實際上就是定義一個卷積層,3,6,5分別是什麼意思?3表明的是輸入圖像的像素數組的層數,通常來講就是你輸入的圖像的通道數,好比這裏使用的小貓圖像都是彩色圖像,由R、G、B三個通道組成,因此數值爲3;6表明的是咱們但願進行6次卷積,每一次卷積都能生成不一樣的特徵映射數組,用於提取小貓和小狗的6種特徵。每個特徵映射結果最終都會被堆疊在一塊兒造成一個圖像輸出,再做爲下一步的輸入;5就是過濾框架的尺寸,表示咱們但願用一個5 * 5的矩陣去和圖像中相同尺寸的矩陣進行點乘再相加,造成一個值。定義好了卷基層,咱們接着定義池化層。池化層所作的事說來簡單,其實就是由於大圖片生成的像素矩陣實在太大了,咱們須要用一個合理的方法在降維的同時又不失去物體特徵,因此深度學習學者們想出了一個稱爲池化的技術,說白了就是從左上角開始,每四個元素(2 * 2)合併成一個元素,用這一個元素去表明四個元素的值,因此圖像體積一會兒降爲原來的四分之一。再往下一行,咱們又一次遇見了一個卷積層:conv2,和conv1同樣,它的輸入也是一個多層像素數組,輸出也是一個多層像素數組,不一樣的是這一次完成的計算量更大了,咱們看這裏面的參數分別是6,16,5。之因此爲6是由於conv1的輸出層數爲6,因此這裏輸入的層數就是6;16表明conv2的輸出層數,和conv1同樣,16表明着這一次卷積操做將會學習小貓小狗的16種映射特徵,特徵越多理論上能學習的效果就越好,你們能夠嘗試一下別的值,看看效果是否真的編變好。conv2使用的過濾框尺寸和conv1同樣,因此再也不重複。數據庫
關於53這個數字能夠根據((n+2p-f)/ s)+1計算出來。而三個全鏈接層所作的事很相似,就是不斷訓練,最後輸出一個二分類數值。net類的forward函數表示前向計算的整個過程。forward接受一個input,返回一個網絡輸出值,中間的過程就是一個調用init函數中定義的層的過程。F.relu是一個激活函數,把全部的非零值轉化成零值。這次圖像識別的最後關鍵一步就是真正的循環訓練操做。redux
進行訓練的代碼:數組
1 def train(): 2 3 for epoch in range(epochs): 4 running_loss = 0.0 5 train_correct = 0 6 train_total = 0 7 for step, data in enumerate(train_loader, 0):#第二個參數表示指定索引從0開始 8 inputs, train_labels = data 9 if use_gpu: 10 inputs, labels = Variable(inputs.cuda()), Variable(train_labels.cuda()) 11 else: 12 inputs, labels = Variable(inputs), Variable(train_labels) 13 optimizer.zero_grad() 14 outputs = net(inputs) 15 _, train_predicted = torch.max(outputs.data, 1) #返回每一行最大值的數值和索引,索引對應分類 16 train_correct += (train_predicted == labels.data).sum() 17 loss = cirterion(outputs, labels) 18 loss.backward() 19 optimizer.step() 20 running_loss += loss.item() 21 train_total += train_labels.size(0) 22 23 print('train %d epoch loss: %.3f acc: %.3f ' % ( 24 epoch + 1, running_loss / train_total, 100 * train_correct / train_total)) 25 # 模型測試 26 correct = 0 27 test_loss = 0.0 28 test_total = 0 29 test_total = 0 30 net.eval() #測試的時候整個模型的參數再也不變化 31 for data in test_loader: 32 images, labels = data 33 if use_gpu: 34 images, labels = Variable(images.cuda()), Variable(labels.cuda()) 35 else: 36 images, labels = Variable(images), Variable(labels) 37 outputs = net(images) 38 _, predicted = torch.max(outputs.data, 1) 39 loss = cirterion(outputs, labels) 40 test_loss += loss.item() 41 test_total += labels.size(0) 42 correct += (predicted == labels.data).sum()
完整的代碼以下網絡
1 # coding=utf-8 2 import os 3 import numpy as np 4 import torch 5 import torch.nn as nn 6 import torch.nn.functional as F 7 import torch.optim as optim 8 from torch.autograd import Variable 9 from torch.utils.data import Dataset 10 from torchvision import transforms, datasets, models 11 import shutil 12 from matplotlib import pyplot as plt 13 # 隨機種子設置 14 random_state = 42 15 np.random.seed(random_state) 16 #接下來的數據是把原訓練集90%的數據作訓練,10%作測試集,其中把分爲訓練集的數據內的貓和狗分開,分爲測試集的數據的貓和狗進行分開保存在新的各自的目錄下 17 # kaggle原始數據集地址 18 original_dataset_dir = 'D:\\Code\\Python\\Kaggle-Dogs_vs_Cats_PyTorch-master\\data\\train' #訓練集地址 19 total_num = int(len(os.listdir(original_dataset_dir)) ) #訓練集數據總數,包含貓和狗 20 random_idx = np.array(range(total_num)) 21 np.random.shuffle(random_idx)#打亂圖片順序 22 23 # 待處理的數據集地址 24 base_dir = 'D:\\Code\\dogvscat\\data2' #把原訓練集數據分類後的數據存儲在該目錄下 25 if not os.path.exists(base_dir): 26 os.mkdir(base_dir) 27 28 # 訓練集、測試集的劃分 29 sub_dirs = ['train', 'test'] 30 animals = ['cats', 'dogs'] 31 train_idx = random_idx[:int(total_num * 0.9)] #打亂後的數據的90%是訓練集,10是測試集 32 test_idx = random_idx[int(total_num * 0.9):int(total_num * 1)] 33 numbers = [train_idx, test_idx] 34 for idx, sub_dir in enumerate(sub_dirs): 35 dir = os.path.join(base_dir, sub_dir)#'D:\\Code\\dogvscat\\data2\\train'或'D:\\Code\\dogvscat\\data2\\test' 36 if not os.path.exists(dir): 37 os.mkdir(dir) 38 39 animal_dir = "" 40 41 #fnames = ['.{}.jpg'.format(i) for i in numbers[idx]] 42 fnames = "" 43 if sub_dir == 'train': 44 idx = 0 45 else: 46 idx =1 47 for i in numbers[idx]: 48 #print(i) 49 if i>=12500:#把數據保存在dogs目錄下 50 fnames = str('dog'+'.{}.jpg'.format(i)) 51 animal_dir = os.path.join(dir,'dogs') 52 53 if not os.path.exists(animal_dir): 54 os.mkdir(animal_dir) 55 if i<12500:#圖片是貓,數據保存在cats目錄下 56 fnames = str('cat'+'.{}.jpg'.format(i)) 57 animal_dir = os.path.join(dir, 'cats') 58 if not os.path.exists(animal_dir): 59 os.mkdir(animal_dir) 60 src = os.path.join(original_dataset_dir, str(fnames)) #原數據地址 61 #print(src) 62 dst = os.path.join(animal_dir, str(fnames))#新地址 63 #print(dst) 64 shutil.copyfile(src, dst)#複製 65 66 67 # 驗證訓練集、測試集的劃分的照片數目 68 print(dir + ' total images : %d' % (len(os.listdir(dir+'\\dogs'))+len(os.listdir(dir+'\\cats')))) 69 # coding=utf-8 70 71 # 配置參數 72 random_state = 1 73 torch.manual_seed(random_state) # 設置隨機數種子,確保結果可重複 74 torch.cuda.manual_seed(random_state)# #爲GPU設置種子用於生成隨機數,以使得結果是肯定的 75 torch.cuda.manual_seed_all(random_state) #爲全部GPU設置種子用於生成隨機數,以使得結果是肯定的 76 np.random.seed(random_state) 77 # random.seed(random_state) 78 79 epochs = 10 # 訓練次數 80 batch_size = 4 # 批處理大小 81 num_workers = 0 # 多線程的數目 82 use_gpu = torch.cuda.is_available() 83 PATH='D:\\Code\\dogvscat\\model.pt' 84 # 對加載的圖像做歸一化處理, 並裁剪爲[224x224x3]大小的圖像 85 data_transform = transforms.Compose([ 86 transforms.Resize(256),#重置圖像分辨率 87 transforms.CenterCrop(224), #中心裁剪 88 transforms.ToTensor(), 89 transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) #歸一化 90 ]) 91 92 train_dataset = datasets.ImageFolder(root='D:\\Code\\dogvscat\\data2\\train', 93 transform=data_transform) 94 print(train_dataset) 95 train_loader = torch.utils.data.DataLoader(train_dataset, 96 batch_size=batch_size, 97 shuffle=True, 98 num_workers=num_workers) 99 100 test_dataset = datasets.ImageFolder(root='D:\\Code\\dogvscat\\data2\\test', transform=data_transform) 101 test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) 102 103 104 # 建立模型 105 class Net(nn.Module): 106 def __init__(self): 107 super(Net, self).__init__() 108 self.conv1 = nn.Conv2d(3, 6, 5) 109 self.maxpool = nn.MaxPool2d(2, 2) 110 self.conv2 = nn.Conv2d(6, 16, 5) 111 self.fc1 = nn.Linear(16 * 53 * 53, 1024) 112 self.fc2 = nn.Linear(1024, 512) 113 self.fc3 = nn.Linear(512, 2) 114 115 def forward(self, x): 116 x = self.maxpool(F.relu(self.conv1(x))) 117 x = self.maxpool(F.relu(self.conv2(x))) 118 x = x.view(-1, 16 * 53 * 53) 119 x = F.relu(self.fc1(x)) 120 x = F.relu(self.fc2(x)) 121 x = self.fc3(x) 122 123 return x 124 class Net2(nn.Module): 125 def __init__(self): 126 super(Net2, self).__init__() 127 self.conv1 = nn.Conv2d(3, 6, 5) 128 self.maxpool = nn.MaxPool2d(2, 2) 129 self.conv2 = nn.Conv2d(6, 16, 5) 130 self.fc1 = nn.Linear(16 * 53 * 53, 1024) 131 torch.nn.Dropout(0.5) 132 self.fc2 = nn.Linear(1024, 512) 133 torch.nn.Dropout(0.5) 134 self.fc3 = nn.Linear(512, 2) 135 136 def forward(self, x): 137 x = self.maxpool(F.relu(self.conv1(x))) 138 x = self.maxpool(F.relu(self.conv2(x))) 139 x = x.view(-1, 16 * 53 * 53) 140 x = F.relu(self.fc1(x)) 141 x = F.relu(self.fc2(x)) 142 x = self.fc3(x) 143 144 return x 145 146 147 net = Net2() 148 if(os.path.exists('D:\\Code\\dogvscat\\model.pt')): 149 net=torch.load('D:\\Code\\dogvscat\\model.pt') 150 151 if use_gpu: 152 print('gpu is available') 153 net = net.cuda() 154 else: 155 print('gpu is unavailable') 156 157 print(net) 158 trainLoss = [] 159 trainacc = [] 160 testLoss = [] 161 testacc = [] 162 x = np.arange(1,11) 163 # 定義loss和optimizer 164 cirterion = nn.CrossEntropyLoss() 165 optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9) 166 167 def train(): 168 169 for epoch in range(epochs): 170 running_loss = 0.0 171 train_correct = 0 172 train_total = 0 173 for step, data in enumerate(train_loader, 0):#第二個參數表示指定索引從0開始 174 inputs, train_labels = data 175 if use_gpu: 176 inputs, labels = Variable(inputs.cuda()), Variable(train_labels.cuda()) 177 else: 178 inputs, labels = Variable(inputs), Variable(train_labels) 179 optimizer.zero_grad() 180 outputs = net(inputs) 181 _, train_predicted = torch.max(outputs.data, 1) #返回每一行最大值的數值和索引,索引對應分類 182 train_correct += (train_predicted == labels.data).sum() 183 loss = cirterion(outputs, labels) 184 loss.backward() 185 optimizer.step() 186 running_loss += loss.item() 187 train_total += train_labels.size(0) 188 189 print('train %d epoch loss: %.3f acc: %.3f ' % ( 190 epoch + 1, running_loss / train_total, 100 * train_correct / train_total)) 191 # 模型測試 192 correct = 0 193 test_loss = 0.0 194 test_total = 0 195 test_total = 0 196 net.eval() #測試的時候整個模型的參數再也不變化 197 for data in test_loader: 198 images, labels = data 199 if use_gpu: 200 images, labels = Variable(images.cuda()), Variable(labels.cuda()) 201 else: 202 images, labels = Variable(images), Variable(labels) 203 outputs = net(images) 204 _, predicted = torch.max(outputs.data, 1) 205 loss = cirterion(outputs, labels) 206 test_loss += loss.item() 207 test_total += labels.size(0) 208 correct += (predicted == labels.data).sum() 209 210 print('test %d epoch loss: %.3f acc: %.3f ' % (epoch + 1, test_loss / test_total, 100 * correct / test_total)) 211 trainLoss.append(running_loss / train_total) 212 trainacc.append(100 * train_correct / train_total) 213 testLoss.append(test_loss / test_total) 214 testacc.append(100 * correct / test_total) 215 plt.figure(1) 216 plt.title('train') 217 plt.plot(x,trainacc,'r') 218 plt.plot(x,trainLoss,'b') 219 plt.show() 220 plt.figure(2) 221 plt.title('test') 222 plt.plot(x,testacc,'r') 223 plt.plot(x,testLoss,'b') 224 plt.show() 225 226 227 228 torch.save(net, 'D:\\Code\\dogvscat\\model.pt') 229 230 231 train()
看一下某次的運行結果多線程
D:\anaconda\anaconda\pythonw.exe D:/Code/Python/pytorch入門與實踐/第六章_pytorch實戰指南/貓和狗二分類.py D:\Code\dogvscat\data2\train\cats total images : 11253 D:\Code\dogvscat\data2\test\dogs total images : 1253 Net( (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1)) (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1)) (fc1): Linear(in_features=44944, out_features=1024, bias=True) (fc2): Linear(in_features=1024, out_features=512, bias=True) (fc3): Linear(in_features=512, out_features=2, bias=True) ) train 1 epoch loss: 0.162 acc: 61.000 test 1 epoch loss: 0.153 acc: 66.000 train 2 epoch loss: 0.148 acc: 68.000 test 2 epoch loss: 0.143 acc: 71.000 train 3 epoch loss: 0.138 acc: 71.000 test 3 epoch loss: 0.138 acc: 72.000 train 4 epoch loss: 0.130 acc: 74.000 test 4 epoch loss: 0.137 acc: 72.000 train 5 epoch loss: 0.119 acc: 77.000 test 5 epoch loss: 0.132 acc: 74.000 train 6 epoch loss: 0.104 acc: 81.000 test 6 epoch loss: 0.129 acc: 75.000 train 7 epoch loss: 0.085 acc: 85.000 test 7 epoch loss: 0.132 acc: 75.000 train 8 epoch loss: 0.060 acc: 90.000 test 8 epoch loss: 0.146 acc: 75.000 train 9 epoch loss: 0.036 acc: 94.000 test 9 epoch loss: 0.200 acc: 74.000 train 10 epoch loss: 0.022 acc: 97.000 test 10 epoch loss: 0.207 acc: 75.000 Process finished with exit code 0
發現這個程序運行結果訓練集準確率很高,測試集準確率爲75%左右,所以Net類有點過擬合,Net2加入了Dropout下降網絡複雜度處理過擬合。這個程序屬於最基礎的分類算法,所以準確率並非很高,可是我認爲初學者能夠先會這個程序,再繼續提升網絡的準確率。架構