版權聲明:本文爲博主原創文章,歡迎轉載,並請註明出處。聯繫方式:460356155@qq.com網絡
CNN的層數越多,可以提取到的特徵越豐富,可是簡單地增長卷積層數,訓練時會致使梯度彌散或梯度爆炸。app
何凱明2015年提出了殘差神經網絡,即Reset,並在ILSVRC-2015的分類比賽中得到冠軍。ide
ResNet能夠有效的消除卷積層數增長帶來的梯度彌散或梯度爆炸問題。函數
ResNet的核心思想是網絡輸出分爲2部分恆等映射(identity mapping)、殘差映射(residual mapping),即y = x + F(x),圖示以下:學習
ResNet經過改變學習目標,即由學習完整的輸出變爲學習殘差,解決了傳統卷積在信息傳遞時存在的信息丟失核損耗問題,經過將輸入直接繞道傳遞到輸出,保護了信息的完整性。此外學習目標的簡化也下降了學習難度。測試
常見的ResNet結構有:優化
34層的ResNet圖示以下:spa
pytorch實現核訓練ResNet-34的代碼以下:命令行
1 # -*- coding:utf-8 -*- 2 3 u"""ResNet訓練學習CIFAR10""" 4 5 __author__ = 'zhengbiqing 460356155@qq.com' 6 7 8 import torch as t 9 import torchvision as tv 10 import torch.nn as nn 11 import torch.optim as optim 12 import torchvision.transforms as transforms 13 from torchvision.transforms import ToPILImage 14 import torch.backends.cudnn as cudnn 15 16 import matplotlib.pyplot as plt 17 18 import datetime 19 import argparse 20 21 22 # 樣本讀取線程數 23 WORKERS = 4 24 25 # 網絡參賽保存文件名 26 PARAS_FN = 'cifar_resnet_params.pkl' 27 28 # minist數據存放位置 29 ROOT = '/home/zbq/PycharmProjects/cifar' 30 31 # 目標函數 32 loss_func = nn.CrossEntropyLoss() 33 34 # 最優結果 35 best_acc = 0 36 37 # 記錄準確率,顯示曲線 38 global_train_acc = [] 39 global_test_acc = [] 40 41 42 ''' 43 殘差塊 44 in_channels, out_channels:殘差塊的輸入、輸出通道數 45 對第一層,in out channel都是64,其餘層則不一樣 46 對每一層,若是in out channel不一樣, stride是1,其餘層則爲2 47 ''' 48 class ResBlock(nn.Module): 49 def __init__(self, in_channels, out_channels, stride=1): 50 super(ResBlock, self).__init__() 51 52 # 殘差塊的第一個卷積 53 # 通道數變換in->out,每一層(除第一層外)的第一個block 54 # 圖片尺寸變換:stride=2時,w-3+2 / 2 + 1 = w/2,w/2 * w/2 55 # stride=1時尺寸不變,w-3+2 / 1 + 1 = w 56 self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1) 57 self.bn1 = nn.BatchNorm2d(out_channels) 58 self.relu = nn.ReLU(inplace=True) 59 60 # 殘差塊的第二個卷積 61 # 通道數、圖片尺寸均不變 62 self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1) 63 self.bn2 = nn.BatchNorm2d(out_channels) 64 65 # 殘差塊的shortcut 66 # 若是殘差塊的輸入輸出通道數不一樣,則須要變換通道數及圖片尺寸,以和residual部分相加 67 # 輸出:通道數*2 圖片尺寸/2 68 if in_channels != out_channels: 69 self.downsample = nn.Sequential( 70 nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=2), 71 nn.BatchNorm2d(out_channels) 72 ) 73 else: 74 # 通道數相同,無需作變換,在forward中identity = x 75 self.downsample = None 76 77 def forward(self, x): 78 identity = x 79 80 out = self.conv1(x) 81 out = self.bn1(out) 82 out = self.relu(out) 83 84 out = self.conv2(out) 85 out = self.bn2(out) 86 87 if self.downsample is not None: 88 identity = self.downsample(x) 89 90 out += identity 91 out = self.relu(out) 92 93 return out 94 95 96 ''' 97 定義網絡結構 98 ''' 99 class ResNet34(nn.Module): 100 def __init__(self, block): 101 super(ResNet34, self).__init__() 102 103 # 初始卷積層核池化層 104 self.first = nn.Sequential( 105 # 卷基層1:7*7kernel,2stride,3padding,outmap:32-7+2*3 / 2 + 1,16*16 106 nn.Conv2d(3, 64, 7, 2, 3), 107 nn.BatchNorm2d(64), 108 nn.ReLU(inplace=True), 109 110 # 最大池化,3*3kernel,1stride(32的原始輸入圖片較小,再也不縮小尺寸),1padding, 111 # outmap:16-3+2*1 / 1 + 1,16*16 112 nn.MaxPool2d(3, 1, 1) 113 ) 114 115 # 第一層,通道數不變 116 self.layer1 = self.make_layer(block, 64, 64, 3, 1) 117 118 # 第二、三、4層,通道數*2,圖片尺寸/2 119 self.layer2 = self.make_layer(block, 64, 128, 4, 2) # 輸出8*8 120 self.layer3 = self.make_layer(block, 128, 256, 6, 2) # 輸出4*4 121 self.layer4 = self.make_layer(block, 256, 512, 3, 2) # 輸出2*2 122 123 self.avg_pool = nn.AvgPool2d(2) # 輸出512*1 124 self.fc = nn.Linear(512, 10) 125 126 def make_layer(self, block, in_channels, out_channels, block_num, stride): 127 layers = [] 128 129 # 每一層的第一個block,通道數可能不一樣 130 layers.append(block(in_channels, out_channels, stride)) 131 132 # 每一層的其餘block,通道數不變,圖片尺寸不變 133 for i in range(block_num - 1): 134 layers.append(block(out_channels, out_channels, 1)) 135 136 return nn.Sequential(*layers) 137 138 def forward(self, x): 139 x = self.first(x) 140 x = self.layer1(x) 141 x = self.layer2(x) 142 x = self.layer3(x) 143 x = self.layer4(x) 144 x = self.avg_pool(x) 145 146 # x.size()[0]: batch size 147 x = x.view(x.size()[0], -1) 148 x = self.fc(x) 149 150 return x 151 152 153 ''' 154 訓練並測試網絡 155 net:網絡模型 156 train_data_load:訓練數據集 157 optimizer:優化器 158 epoch:第幾回訓練迭代 159 log_interval:訓練過程當中損失函數值和準確率的打印頻率 160 ''' 161 def net_train(net, train_data_load, optimizer, epoch, log_interval): 162 net.train() 163 164 begin = datetime.datetime.now() 165 166 # 樣本總數 167 total = len(train_data_load.dataset) 168 169 # 樣本批次訓練的損失函數值的和 170 train_loss = 0 171 172 # 識別正確的樣本數 173 ok = 0 174 175 for i, data in enumerate(train_data_load, 0): 176 img, label = data 177 img, label = img.cuda(), label.cuda() 178 179 optimizer.zero_grad() 180 181 outs = net(img) 182 loss = loss_func(outs, label) 183 loss.backward() 184 optimizer.step() 185 186 # 累加損失值和訓練樣本數 187 train_loss += loss.item() 188 189 _, predicted = t.max(outs.data, 1) 190 # 累加識別正確的樣本數 191 ok += (predicted == label).sum() 192 193 if (i + 1) % log_interval == 0: 194 # 訓練結果輸出 195 196 # 已訓練的樣本數 197 traind_total = (i + 1) * len(label) 198 199 # 準確度 200 acc = 100. * ok / traind_total 201 202 # 記錄訓練準確率以輸出變化曲線 203 global_train_acc.append(acc) 204 205 end = datetime.datetime.now() 206 print('one epoch spend: ', end - begin) 207 208 209 ''' 210 用測試集檢查準確率 211 ''' 212 def net_test(net, test_data_load, epoch): 213 net.eval() 214 215 ok = 0 216 217 for i, data in enumerate(test_data_load): 218 img, label = data 219 img, label = img.cuda(), label.cuda() 220 221 outs = net(img) 222 _, pre = t.max(outs.data, 1) 223 ok += (pre == label).sum() 224 225 acc = ok.item() * 100. / (len(test_data_load.dataset)) 226 print('EPOCH:{}, ACC:{}\n'.format(epoch, acc)) 227 228 # 記錄測試準確率以輸出變化曲線 229 global_test_acc.append(acc) 230 231 # 最好準確度記錄 232 global best_acc 233 if acc > best_acc: 234 best_acc = acc 235 236 237 ''' 238 顯示數據集中一個圖片 239 ''' 240 def img_show(dataset, index): 241 classes = ('plane', 'car', 'bird', 'cat', 242 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') 243 244 show = ToPILImage() 245 246 data, label = dataset[index] 247 print('img is a ', classes[label]) 248 show((data + 1) / 2).resize((100, 100)).show() 249 250 251 ''' 252 顯示訓練準確率、測試準確率變化曲線 253 ''' 254 def show_acc_curv(ratio): 255 # 訓練準確率曲線的x、y 256 train_x = list(range(len(global_train_acc))) 257 train_y = global_train_acc 258 259 # 測試準確率曲線的x、y 260 # 每ratio個訓練準確率對應一個測試準確率 261 test_x = train_x[ratio-1::ratio] 262 test_y = global_test_acc 263 264 plt.title('CIFAR10 RESNET34 ACC') 265 266 plt.plot(train_x, train_y, color='green', label='training accuracy') 267 plt.plot(test_x, test_y, color='red', label='testing accuracy') 268 269 # 顯示圖例 270 plt.legend() 271 plt.xlabel('iterations') 272 plt.ylabel('accs') 273 274 plt.show() 275 276 277 def main(): 278 # 訓練超參數設置,可經過命令行設置 279 parser = argparse.ArgumentParser(description='PyTorch CIFA10 ResNet34 Example') 280 parser.add_argument('--batch-size', type=int, default=128, metavar='N', 281 help='input batch size for training (default: 128)') 282 parser.add_argument('--test-batch-size', type=int, default=100, metavar='N', 283 help='input batch size for testing (default: 100)') 284 parser.add_argument('--epochs', type=int, default=200, metavar='N', 285 help='number of epochs to train (default: 200)') 286 parser.add_argument('--lr', type=float, default=0.1, metavar='LR', 287 help='learning rate (default: 0.1)') 288 parser.add_argument('--momentum', type=float, default=0.9, metavar='M', 289 help='SGD momentum (default: 0.9)') 290 parser.add_argument('--log-interval', type=int, default=10, metavar='N', 291 help='how many batches to wait before logging training status (default: 10)') 292 parser.add_argument('--no-train', action='store_true', default=False, 293 help='If train the Model') 294 parser.add_argument('--save-model', action='store_true', default=False, 295 help='For Saving the current Model') 296 args = parser.parse_args() 297 298 # 圖像數值轉換,ToTensor源碼註釋 299 """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor. 300 Converts a PIL Image or numpy.ndarray (H x W x C) in the range 301 [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]. 302 """ 303 # 歸一化把[0.0, 1.0]變換爲[-1,1], ([0, 1] - 0.5) / 0.5 = [-1, 1] 304 transform = tv.transforms.Compose([ 305 transforms.ToTensor(), 306 transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]) 307 308 # 定義數據集 309 train_data = tv.datasets.CIFAR10(root=ROOT, train=True, download=True, transform=transform) 310 test_data = tv.datasets.CIFAR10(root=ROOT, train=False, download=False, transform=transform) 311 312 train_load = t.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=WORKERS) 313 test_load = t.utils.data.DataLoader(test_data, batch_size=args.test_batch_size, shuffle=False, num_workers=WORKERS) 314 315 net = ResNet34(ResBlock).cuda() 316 print(net) 317 318 # 並行計算提升運行速度 319 net = nn.DataParallel(net) 320 cudnn.benchmark = True 321 322 # 若是不訓練,直接加載保存的網絡參數進行測試集驗證 323 if args.no_train: 324 net.load_state_dict(t.load(PARAS_FN)) 325 net_test(net, test_load, 0) 326 return 327 328 optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum) 329 330 start_time = datetime.datetime.now() 331 332 for epoch in range(1, args.epochs + 1): 333 net_train(net, train_load, optimizer, epoch, args.log_interval) 334 335 # 每一個epoch結束後用測試集檢查識別準確度 336 net_test(net, test_load, epoch) 337 338 end_time = datetime.datetime.now() 339 340 global best_acc 341 print('CIFAR10 pytorch ResNet34 Train: EPOCH:{}, BATCH_SZ:{}, LR:{}, ACC:{}'.format(args.epochs, args.batch_size, args.lr, best_acc)) 342 print('train spend time: ', end_time - start_time) 343 344 # 每訓練一個迭代記錄的訓練準確率個數 345 ratio = len(train_data) / args.batch_size / args.log_interval 346 ratio = int(ratio) 347 348 # 顯示曲線 349 show_acc_curv(ratio) 350 351 if args.save_model: 352 t.save(net.state_dict(), PARAS_FN) 353 354 355 if __name__ == '__main__': 356 main()
運行結果:線程
Files already downloaded and verified
ResNet34(
(first): Sequential(
(0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace)
(3): MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=False)
)
(layer1): Sequential(
(0): ResBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(1): ResBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(2): ResBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(layer2): Sequential(
(0): ResBlock(
(conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2))
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): ResBlock(
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(2): ResBlock(
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(3): ResBlock(
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(layer3): Sequential(
(0): ResBlock(
(conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2))
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): ResBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(2): ResBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(3): ResBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(4): ResBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(5): ResBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(layer4): Sequential(
(0): ResBlock(
(conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2))
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): ResBlock(
(conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(2): ResBlock(
(conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(avg_pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
(fc): Linear(in_features=512, out_features=10, bias=True)
)
one epoch spend: 0:00:23.971338
EPOCH:1, ACC:48.54
one epoch spend: 0:00:22.339190
EPOCH:2, ACC:61.06
......
one epoch spend: 0:00:22.023034
EPOCH:199, ACC:79.84
one epoch spend: 0:00:22.057692
EPOCH:200, ACC:79.6
CIFAR10 pytorch ResNet34 Train: EPOCH:200, BATCH_SZ:128, LR:0.1, ACC:80.19
train spend time: 1:18:40.948080
運行200個迭代,每一個迭代耗時22秒,準確率不高,只有80%。準確率變化曲線以下: