pytorch和tensorflow的愛恨情仇之基本數據類型html
pytorch版本:1.6.0dom
tensorflow版本:1.15.0函數
以前咱們就已經瞭解了pytorch和tensorflow中的變量,本節咱們深刻了解可訓練的參數-變量學習
接下來咱們將使用sklearn自帶的iris數據集來慢慢品味。測試
一、pytorch優化
(1)第一種方式,不使用nn.Module或nn.Sequntial()來創建模型的狀況下自定義參數;ui
加載數據集並轉換爲tensot:編碼
import torch import torch.nn.functional as F import numpy as np from sklearn.datasets import load_iris iris = load_iris() data=iris.data target = iris.target
data = torch.from_numpy(data).float() #(150,4) target = torch.from_numpy(target).long() #(150,3) batch_size=data.shape[0] #設置batchsize的大小就是全部數據 dataset = torch.utils.data.TensorDataset(data, target) # 設置數據集 train_iter = torch.utils.data.DataLoader(dataset, batch_size, shuffle=True) # 設置獲取數據方式
本身定義好要訓練的參數:spa
classes = 3 input = 4 hidden = 10 w_0 = torch.tensor(np.random.normal(0, 0.01, (input, hidden)), dtype=torch.float) b_0 = torch.zeros(hidden, dtype=torch.float) w_1 = torch.tensor(np.random.normal(0, 0.01, (hidden, classes)), dtype=torch.float) b_1 = torch.zeros(classes, dtype=torch.float)
咱們能夠在定義參數的時候指定requires_grad=True使其爲可訓練的參數,也可使用以下方式:
params = [w_0, b_0, w_1, b_1] for param in params: param.requires_grad_(requires_grad=True)
定義學習率、優化器、損失函數、網絡
lr = 5 optimizer = None criterion = torch.nn.CrossEntropyLoss() epoch = 1000 def sgd(params, lr, batch_size): for param in params: param.data -= lr * param.grad / batch_size # 注意這裏更改param時用的param.data def net(x): h = torch.matmul(x,w_0)+b_0 h = F.relu(h) output = torch.matmul(h,w_1)+b_1 #output = F.softmax(output,dim=1) return output
爲了更加清楚參數訓練的過程,這裏咱們不使用pytorch自帶的,而是咱們本身定義的隨機梯度降低。
定義訓練主函數:
def train(net,params,lr,train_iter): for i in range(1,epoch+1): for x,y in train_iter: output = net(x) loss = criterion(output,y) # 梯度清零 if optimizer is not None: optimizer.zero_grad() elif params is not None and params[0].grad is not None: for param in params: param.grad.data.zero_() loss.backward() if optimizer is None: sgd(params, lr, batch_size) else: optimizer.step() # 「softmax迴歸的簡潔實現」一節將用到 acc = (output.argmax(dim=1) == y).sum().item() / data.shape[0] print("epoch:{:03d} loss:{:.4f} acc:{:.4f}".format(i,loss.item(),acc)) train(net=net,params=params,lr=lr,train_iter=train_iter)
從這裏咱們也能夠看到optimizer.zero_grad()和optimizer.step()的做用了,以上即是咱們自定義訓練參數的完整過程了,看下結果:
epoch:994 loss:0.0928 acc:0.9800 epoch:995 loss:0.0927 acc:0.9800 epoch:996 loss:0.0926 acc:0.9800 epoch:997 loss:0.0926 acc:0.9800 epoch:998 loss:0.0925 acc:0.9800 epoch:999 loss:0.0925 acc:0.9800 epoch:1000 loss:0.0924 acc:0.9800
(2)使用nn.Sequential()來構建模型,進行參數初始化:
導入相應的包並加載數據集:
import torch import torch.nn as nn import torch.nn.init as init import torch.nn.functional as F import numpy as np from sklearn.datasets import load_iris iris = load_iris() data=iris.data target = iris.target
轉換爲pytorch數據格式:
data = torch.from_numpy(data).float() target = torch.from_numpy(target).long() batch_size=data.shape[0] dataset = torch.utils.data.TensorDataset(data, target) # 設置數據集 train_iter = torch.utils.data.DataLoader(dataset, batch_size, shuffle=True) # 設置獲取數據方式
定義相關超參數:
classes = 3 input = 4 hidden = 10 lr = 4 optimizer = None
定義網絡:
net = nn.Sequential(
nn.Linear(input,hidden),
nn.ReLU(),
nn.Linear(hidden,classes),
)
參數初始化:
for name,param in net.named_parameters(): #使用model.named_parameters()能夠得到相應層的名字的參數以及具體值 if "weight" in name: init.normal_(param, mean=0, std=0.01) if "bias" in name: init.zeros_(param)
自定義隨機梯度降低優化器:
def sgd(params, lr, batch_size): for param in params: param.data -= lr * param.grad / batch_size # 注意這裏更改param時用的param.data
訓練主循環:
epoch = 1000 criterion = torch.nn.CrossEntropyLoss() def train(net,lr,train_iter): for i in range(1,epoch+1): for x,y in train_iter: output = net(x) loss = criterion(output,y) # 梯度清零 if optimizer is not None: optimizer.zero_grad() elif net.parameters() is not None: for param in net.parameters(): if param.grad is not None: param.grad.data.zero_() loss.backward() if optimizer is None: sgd(net.parameters(), lr, batch_size) else: optimizer.step() # 「softmax迴歸的簡潔實現」一節將用到 acc = (output.argmax(dim=1) == y).sum().item() / data.shape[0] print("epoch:{:03d} loss:{:.4f} acc:{:.4f}".format(i,loss.item(),acc)) return train(net=net,lr=lr,train_iter=train_iter)
結果:
(3) 使用pytorch自帶的優化器
咱們只須要將optimizer設置爲如下便可:
optimizer = torch.optim.SGD(net.parameters(), lr=0.05)
須要注意的是學習率這裏須要設置的比較小一點,和上面設置的有所不一樣,結果以下:
(4) 使用nn.Module來構建網絡,自定義參數並進行初始化
咱們只須要修改如下地方便可:
class Net(nn.Module): def __init__(self,input,hidden,classes): super(Net, self).__init__() self.input = input self.hidden = hidden self.classes = classes self.w0 = nn.Parameter(torch.Tensor(self.input,self.hidden)) self.b0 = nn.Parameter(torch.Tensor(self.hidden)) self.w1 = nn.Parameter(torch.Tensor(self.hidden,self.classes)) self.b1 = nn.Parameter(torch.Tensor(self.classes)) self.reset_parameters() def reset_parameters(self): nn.init.normal_(self.w0) nn.init.constant_(self.b0,0) nn.init.normal_(self.w1) nn.init.constant_(self.b1,0) def forward(self,x): out = torch.matmul(x,self.w0)+self.b0 out = F.relu(out) out = torch.matmul(out,self.w1)+self.b1 return out net = Net(input,hidden,classes) optimizer = torch.optim.SGD(net.parameters(), lr=0.05)
結果:
(5) 使用nn.Module()構建網路,並使用各層中的參數並進行初始化
class Net(nn.Module): def __init__(self,input,hidden,classes): super(Net, self).__init__() self.input = input self.hidden = hidden self.classes = classes self.fc1 = nn.Linear(self.input,self.hidden) self.fc2 = nn.Linear(self.hidden,self.classes) for m in self.modules(): if isinstance(m, nn.Linear): nn.init.normal_(m.weight,0,0.01) nn.init.constant_(m.bias, 0) def forward(self,x): out = self.fc1(x) out = F.relu(out) out = self.fc2(out) return out net = Net(input,hidden,classes) optimizer = torch.optim.SGD(net.parameters(), lr=0.05)
結果:
PyTorch 中參數的默認初始化在各個層的 reset_parameters()
方法
咱們看下官方的Linear層的實現:
官方Linear層: class Linear(Module): def __init__(self, in_features, out_features, bias=True): super(Linear, self).__init__() self.in_features = in_features self.out_features = out_features self.weight = Parameter(torch.Tensor(out_features, in_features)) if bias: self.bias = Parameter(torch.Tensor(out_features)) else: self.register_parameter('bias', None) self.reset_parameters() def reset_parameters(self): stdv = 1. / math.sqrt(self.weight.size(1)) self.weight.data.uniform_(-stdv, stdv) if self.bias is not None: self.bias.data.uniform_(-stdv, stdv) def forward(self, input): return F.linear(input, self.weight, self.bias) def extra_repr(self): return 'in_features={}, out_features={}, bias={}'.format( self.in_features, self.out_features, self.bias is not None )
(6) 最後咱們來看下從網絡中獲取參數名字和參數值的一些例子
咱們以這個網絡爲例:
class Net(nn.Module): def __init__(self,input,hidden,classes): super(Net, self).__init__() self.input = input self.hidden = hidden self.classes = classes self.fc1 = nn.Linear(self.input,self.hidden) self.fc2 = nn.Linear(self.hidden,self.classes) for m in self.modules(): if isinstance(m, nn.Linear): nn.init.normal_(m.weight,0,0.01) nn.init.constant_(m.bias, 0) def forward(self,x): out = self.fc1(x) out = F.relu(out) out = self.fc2(out) return out net = Net(input,hidden,classes)
首先是model.state_dict():是一個參數字典,鍵是參數的名稱,值是參數的值:
for name,value in net.state_dict().items(): print(name,value)
接着是:model.parameters():返回的是一個generator,咱們以前也常常使用,經過param.data,param.data.grad來獲取參數的值以及梯度
for param in net.parameters(): print(param.data,param.grad)
接着是model.named_parameters():返回的是一個具名參數,也就是包含了參數的名稱
for name,param in net.named_parameters(): print(name,param)
最後講下的是self.modules():通常是在網絡初始化中使用,返回的是網絡中的具體層,咱們能夠經過其對不一樣層進行參數初始化,好比nn.Conv2d、nn.Linear等;
參考:
https://www.cnblogs.com/KaifengGuan/p/12332072.html
https://www.geekschool.org/2020/08/02/13455.html
https://blog.csdn.net/weixin_44058333/article/details/92691656
(2)tensorflow
導入相應的包並加載數據:
import tensorflow as tf import numpy as np from sklearn.datasets import load_iris from sklearn.preprocessing import OneHotEncoder iris = load_iris() data=iris.data target = iris.target
將標籤轉換爲onehot編碼:
oneHotEncoder = OneHotEncoder(sparse=False) onehot_target = oneHotEncoder.fit_transform(target.reshape(-1,1)) print(onehot_target)
定義超參數以及可訓練的參數:
input=4 hidden=10 classes=3 w0=tf.Variable(tf.random.normal([input,hidden],stddev=0.01,seed=1)) b0=tf.Variable(tf.zeros([hidden])) w1=tf.Variable(tf.random.normal([hidden,classes],stddev=0.01,seed=1)) b1=tf.Variable(tf.zeros([classes]))
定義計算圖中的佔位符:
x = tf.placeholder(tf.float32,shape=(None,input),name="x-input") #輸入數據 y_ = tf.placeholder(tf.float32,shape=(None,classes),name="y-input") #真實標籤
定義網絡、損失函數和優化器:
def net(x): hid = tf.add(tf.matmul(x,w0),b0) hid = tf.nn.relu(hid) out = tf.add(tf.matmul(hid,w1),b1) out = tf.nn.softmax(out) return out y = net(x) cross_entropy = -tf.reduce_mean(y_*tf.log(tf.clip_by_value(y,1e-10,1.0)) \ + (1-y_)*tf.log(tf.clip_by_value(1-y,1e-10,1.0))) optimizer=tf.compat.v1.train.GradientDescentOptimizer(learning_rate=0.05).minimize(cross_entropy)
訓練循環:
epoch = 1000 with tf.compat.v1.Session() as sess: #創建會話 init_op = tf.global_variables_initializer() #初始化參數 sess.run(init_op) for epoch in range(1,epoch+1): sess.run(optimizer,feed_dict={x:data,y_:onehot_target}) #傳入數據給優化器 y_pred = sess.run(y,feed_dict={x:data}) #計算輸出 total_cross_entropy = sess.run(cross_entropy,feed_dict={y:y_pred,y_:onehot_target}) #計算交叉熵 pred = tf.argmax(y_pred,axis = 1) # 取出行中最大值的索引,也就是取出其中機率最大的索引 correct = tf.cast(tf.equal(pred,target),dtype=tf.int32) # 判斷與測試集的標籤是否相等而且轉換bool爲int型 correct = tf.reduce_sum(correct) # 沿着指定維度的和,不指定axis則默認爲全部元素的和 acc = correct.eval() / data.shape[0] print("epoch:{} loss:{:.4f} acc:{:.4f}".format(epoch, total_cross_entropy,acc))
結果:
但感受訓練1000個epoch比pytorch慢好多。。