背景:基於PyTorch
的模型,想固定主分支參數,只訓練子分支,結果發如今不一樣epoch
相同的測試數據通過主分支輸出的結果不一樣。html
緣由:未固定主分支BN
層中的running_mean
和running_var
。python
解決方法:將須要固定的BN
層狀態設置爲eval
。網絡
問題示例:dom
環境:torch
:1.7.0測試
# -*- coding:utf-8 -*- import torch import torch.nn as nn import torch.nn.functional as F class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2d(1, 6, 3) self.bn1 = nn.BatchNorm2d(6) self.conv2 = nn.Conv2d(6, 16, 3) self.bn2 = nn.BatchNorm2d(16) # an affine operation: y = Wx + b self.fc1 = nn.Linear(16 * 6 * 6, 120) # 6*6 from image dimension self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, 5) def forward(self, x): # Max pooling over a (2, 2) window x = F.max_pool2d(F.relu(self.bn1(self.conv1(x))), (2, 2)) # If the size is a square you can only specify a single number x = F.max_pool2d(F.relu(self.bn2(self.conv2(x))), 2) x = x.view(-1, self.num_flat_features(x)) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x def num_flat_features(self, x): size = x.size()[1:] # all dimensions except the batch dimension num_features = 1 for s in size: num_features *= s return num_features def print_parameter_grad_info(net): print('-------parameters requires grad info--------') for name, p in net.named_parameters(): print(f'{name}:\t{p.requires_grad}') def print_net_state_dict(net): for key, v in net.state_dict().items(): print(f'{key}') if __name__ == "__main__": net = Net() print_parameter_grad_info(net) net.requires_grad_(False) print_parameter_grad_info(net) torch.random.manual_seed(5) test_data = torch.rand(1, 1, 32