解析模型參數

解析模型參數

Caffe Model

caffemodel是使用protobuf進行保存的。html

1. 有prototxt

依賴於caffe。python

解析的時候若是有prototxt,caffemodel兩個文件,可使用caffe提供的接口load網絡,而後再解析網絡。ios

weigth是net.params[param_name][0].data,bias須要判斷net.params[param_name]的長度,若是大於1則有bias,是net.params[param_name][1].data,不然沒有bias參數。c++

#!/usr/bin/env python
import caffe
import numpy as np

# 使輸出的參數徹底顯示
# 若沒有這一句,由於參數太多,中間會以省略號「……」的形式代替
np.set_printoptions(threshold='nan')

# deploy文件
MODEL_FILE = 'caffe_deploy.prototxt'
# 預先訓練好的caffe模型
PRETRAIN_FILE = 'caffe_iter_10000.caffemodel'

# 保存參數的文件
params_txt = 'params.txt'
pf = open(params_txt, 'w')

# 讓caffe以測試模式讀取網絡參數
net = caffe.Net(MODEL_FILE, PRETRAIN_FILE, caffe.TEST)

# 遍歷每一層
for param_name in net.params.keys():
    # 權重參數
    weight = net.params[param_name][0].data
    # 偏置參數
    if len(net.params[param_name]) == 2:
    	bias = net.params[param_name][1].data

    # 該層在prototxt文件中對應「top」的名稱
    pf.write(param_name)
    pf.write('\n')

    # 寫權重參數
    pf.write('\n' + param_name + '_weight:\n\n')
    # 權重參數是多維數組,爲了方便輸出,轉爲單列數組
    weight.shape = (-1, 1)

    for w in weight:
        pf.write('%ff, ' % w)

    # 寫偏置參數
    if len(net.params[param_name]) == 2:
        pf.write('\n\n' + param_name + '_bias:\n\n')
        # 偏置參數是多維數組,爲了方便輸出,轉爲單列數組
        bias.shape = (-1, 1)
        for b in bias:
            pf.write('%ff, ' % b)

    pf.write('\n\n')

pf.close

參考:
http://blog.csdn.net/u011762313/article/details/49851795
http://www.javashuo.com/article/p-wyjbudks-o.html數組

2. 沒有prototxt

依賴於caffe.proto.caffe_pb2。網絡

若是不存在prototxt文件,那麼就須要直接調用caffe_pb2解析caffemodel文件,成員須要參照caffe.proto文件,在caffe工程的src/caffe/proto下面,使用protobuf語法,利用protoc編譯生成caffe.pb.cc, caffe.pb.h兩個文件供C++調用。ide

weight參數位置:
NetParameter -> LayerParameter -> BlobProto -> data
model -> layers -> blobs[0] -> data函數

bias參數位置(若是有):
NetParameter -> LayerParameter -> BlobProto -> data
model -> layers -> blobs[1] -> data學習

以下是caffe.proto文件定義的參數:
NetParameter主要包含name, input, input_shape, layer。
LayerParameter主要包含name, type, bottom, top, blobs, param(學習率), transform_param,convolution_param等。測試

message NetParameter {
  optional string name = 1; // consider giving the network a name
  // DEPRECATED. See InputParameter. The input blobs to the network.
  repeated string input = 3;
  // DEPRECATED. See InputParameter. The shape of the input blobs.
  repeated BlobShape input_shape = 8;

  // 4D input dimensions -- deprecated. Use "input_shape" instead.
  // If specified, for each input blob there should be four
  // values specifying the num, channels, height and width of the input blob.
  // Thus, there should be a total of (4 * #input) numbers.
  repeated int32 input_dim = 4;

  // Whether the network will force every layer to carry out backward operation.
  // If set False, then whether to carry out backward is determined
  // automatically according to the net structure and learning rates.
  optional bool force_backward = 5 [default = false];
  // The current "state" of the network, including the phase, level, and stage.
  // Some layers may be included/excluded depending on this state and the states
  // specified in the layers' include and exclude fields.
  optional NetState state = 6;

  // Print debugging information about results while running Net::Forward,
  // Net::Backward, and Net::Update.
  optional bool debug_info = 7 [default = false];

  // The layers that make up the net. Each of their configurations, including
  // connectivity and behavior, is specified as a LayerParameter.
  repeated LayerParameter layer = 100;  // ID 100 so layers are printed last.

  // DEPRECATED: use 'layer' instead.
  repeated V1LayerParameter layers = 2;
}

message LayerParameter {
  optional string name = 1; // the layer name
  optional string type = 2; // the layer type
  repeated string bottom = 3; // the name of each bottom blob
  repeated string top = 4; // the name of each top blob

  // The train / test phase for computation.
  optional Phase phase = 10;

  // The amount of weight to assign each top blob in the objective.
  // Each layer assigns a default value, usually of either 0 or 1,
  // to each top blob.
  repeated float loss_weight = 5;

  // Specifies training parameters (multipliers on global learning constants,
  // and the name and other settings used for weight sharing).
  repeated ParamSpec param = 6;

  // The blobs containing the numeric parameters of the layer.
  repeated BlobProto blobs = 7;

  // Specifies whether to backpropagate to each bottom. If unspecified,
  // Caffe will automatically infer whether each input needs backpropagation
  // to compute parameter gradients. If set to true for some inputs,
  // backpropagation to those inputs is forced; if set false for some inputs,
  // backpropagation to those inputs is skipped.
  //
  // The size must be either 0 or equal to the number of bottoms.
  repeated bool propagate_down = 11;

  // Rules controlling whether and when a layer is included in the network,
  // based on the current NetState. You may specify a non-zero number of rules
  // to include OR exclude, but not both. If no include or exclude rules are
  // specified, the layer is always included. If the current NetState meets
  // ANY (i.e., one or more) of the specified rules, the layer is
  // included/excluded.
  repeated NetStateRule include = 8;
  repeated NetStateRule exclude = 9;

  // Parameters for data pre-processing.
  optional TransformationParameter transform_param = 100;

  // Parameters shared by loss layers.
  optional LossParameter loss_param = 101;

  // Layer type-specific parameters.
  //
  // Note: certain layers may have more than one computational engine
  // for their implementation. These layers include an Engine type and
  // engine parameter for selecting the implementation.
  // The default for the engine is set by the ENGINE switch at compile-time.
  optional AccuracyParameter accuracy_param = 102;
  optional ArgMaxParameter argmax_param = 103;
  optional BatchNormParameter batch_norm_param = 139;
  optional BiasParameter bias_param = 141;
  optional ConcatParameter concat_param = 104;
  optional ContrastiveLossParameter contrastive_loss_param = 105;
  optional ConvolutionParameter convolution_param = 106;
  optional CropParameter crop_param = 144;
  optional DataParameter data_param = 107;
  optional DropoutParameter dropout_param = 108;
  optional DummyDataParameter dummy_data_param = 109;
  optional EltwiseParameter eltwise_param = 110;
  optional ELUParameter elu_param = 140;
  optional EmbedParameter embed_param = 137;
  optional ExpParameter exp_param = 111;
  optional FlattenParameter flatten_param = 135;
  optional HDF5DataParameter hdf5_data_param = 112;
  optional HDF5OutputParameter hdf5_output_param = 113;
  optional HingeLossParameter hinge_loss_param = 114;
  optional ImageDataParameter image_data_param = 115;
  optional InfogainLossParameter infogain_loss_param = 116;
  optional InnerProductParameter inner_product_param = 117;
  optional InputParameter input_param = 143;
  optional LogParameter log_param = 134;
  optional LRNParameter lrn_param = 118;
  optional MemoryDataParameter memory_data_param = 119;
  optional MVNParameter mvn_param = 120;
  optional ParameterParameter parameter_param = 145;
  optional PoolingParameter pooling_param = 121;
  optional PowerParameter power_param = 122;
  optional PReLUParameter prelu_param = 131;
  optional PythonParameter python_param = 130;
  optional RecurrentParameter recurrent_param = 146;
  optional ReductionParameter reduction_param = 136;
  optional ReLUParameter relu_param = 123;
  optional ReshapeParameter reshape_param = 133;
  optional ScaleParameter scale_param = 142;
  optional SigmoidParameter sigmoid_param = 124;
  optional SoftmaxParameter softmax_param = 125;
  optional SPPParameter spp_param = 132;
  optional SliceParameter slice_param = 126;
  optional TanHParameter tanh_param = 127;
  optional ThresholdParameter threshold_param = 128;
  optional TileParameter tile_param = 138;
  optional WindowDataParameter window_data_param = 129;
}

BlobProto裏面則是存儲了相關的訓練參數,重要的兩個成員是shape, data:
packed = true表示採用連續存儲的方式,在前面先寫一個字節長度,再在下面逐行記錄每一個數據;若是不用packed模式,須要在每一個數據前都聲明是data這個字段的數據,消耗空間。默認repeated字段就是使用packed模式。

message BlobProto {
  optional BlobShape shape = 7;
  repeated float data = 5 [packed = true];
  repeated float diff = 6 [packed = true];
  repeated double double_data = 8 [packed = true];
  repeated double double_diff = 9 [packed = true];
}

如下代碼能夠用來讀取caffemodel各字段:

import caffe.proto.caffe_pb2 as caffe_pb2
import pdb

caffemodel_filename = '/home/gr/deepwork/HyperLPR/lpr.caffemodel'
    
model = caffe_pb2.NetParameter()

f=open(caffemodel_filename, 'rb')
model.ParseFromString(f.read())
f.close()

layers = model.layer
print 'name: ' + model.name
pdb.set_trace()
layer_id=-1

for layer in layers:
    print layer.name + ':'
    if len(layer.blobs) > 0:
        print '\tweight filter ' + str(layer.blobs[0].shape.dim) + ':' + str(layer.blobs[0].data[0])
    	if len(layer.blobs) > 1:
        	print '\tbias filter ' + str(layer.blobs[1].shape.dim) + ':' + str(layer.blobs[1].data[0])
    else:
        print '\tequal 0'

另外也能夠解析生成對應的prototxt文件。

參考:
http://www.javashuo.com/article/p-zjitugqe-c.html
http://blog.csdn.net/jiongnima/article/details/72904526
http://blog.csdn.net/seven_first/article/details/47418887#message-layerparameter
http://www.javashuo.com/article/p-fyfexlqe-o.html

3. 使用c++解析

依賴於protobuf。

一樣須要對照caffe.proto進行解析:

weight參數位置:
NetParameter -> LayerParameter -> BlobProto -> data
msg -> layer -> blobs[0] -> data

bias參數位置(若是有):
NetParameter -> LayerParameter -> BlobProto -> data
msg -> layer -> blobs[1] -> data

protoc生成的C++代碼,每一個字段都提供了同名訪問接口,但若是是repeated字段,則在名字前面加上mutable_,以下:
經過string name = student->name();取得學生姓名。
經過RepeatedPtrField<string>* classes = student->mutable_classes();取得全部課程名, string first_class = classes->Get(0);取得第一門課程,而提供的classes()函數就是調用Get()

message Student {
    # 名字
    option string name = 1;
    # 課程
    repeated string classes = 2;
}

對於一個repeated float data字段,會對該字段生成以下成員函數:

inline int data_size() const;
 inline void clear_data();
 static const int kDataFieldNumber = 5;
 inline float data(int index) const;
 inline void set_data(int index, float value);
 inline void add_data(float value);
 inline const ::google::protobuf::RepeatedField< float >& data() const;
 inline ::google::protobuf::RepeatedField< float >* mutable_data();

提取參數代碼:

#include <stdio.h>
#include <string.h>
#include <fstream>
#include <iostream>
#include "caffe.pb.h"

using namespace std;
using namespace caffe;


int main(int argc, char* argv[]) {

    caffe::NetParameter msg;

    fstream input("/home/gr/deepwork/caffe-tensorflow/examples/mnist/lenet_iter_10000.caffemodel", ios::in | ios::binary);
    if (!msg.ParseFromIstream(&input))
    {
        cerr << "Failed to parse address book." << endl;
        return -1;
    }

    ::google::protobuf::RepeatedPtrField< LayerParameter >* layers = msg.mutable_layer();
    ::google::protobuf::RepeatedPtrField< LayerParameter >::iterator it = layers->begin();
    for (; it != layers->end(); ++it)
    {
        cout << it->name() << endl;
        cout << it->type() << endl;
        ::google::protobuf::RepeatedPtrField< BlobProto >* blobs = it->mutable_blobs();
        for (int i = 0; i < blobs->size(); ++i) {
            BlobProto blob = blobs->Get(i);
            ::google::protobuf::RepeatedField< float >* datas = blob.mutable_data();
            for (int j = 0; j < datas->size(); ++j) {
                cout << datas->Get(j) << " ";
            }
            cout << endl;
        }
    }

return 0;
}

http://blog.csdn.net/zr459927180/article/details/50904938
http://blog.csdn.net/dachao_xu/article/details/50899534

2、TensorFlow

conv2d:

if normalizer_fn is None and a biases_initializer is provided then a biases variable would be created and added the activations.

若是沒有提供normalizer_fn,而且提供了normalizer_fn就會建立初始化biases,不然其它狀況都沒有偏置。

TensorFlow若是使用高級接口slim,而不是本身定義Variable,能夠經過trainable_variables()獲取須要訓練的變量:

params=tf.trainable_variables()

for idx, v in enumerate(params):
    print(" param {:15}: {:15} {}".format(idx, str(v.get_shape()), v.name))

結果:

trainable_variables

trainable_variables

能夠利用tf.train.Saverload訓練好的權重,以後取得每一個參數:

saver = tf.train.Saver()
params = tf.trainable_variables()
fp = open('mnist_model.txt', 'w')

with tf.Session() as sess:
    saver.restore(sess, './tmp/mnist_model.ckpt')
    for param in params:
    	v = sess.run(param)
    	fp.write(param.name)
        fp.write(v)
        fp.write('\n')

fp.close()
相關文章
相關標籤/搜索