一,train_val.prototxtcss
name: "CIFAR10_quick" layer { name: "cifar" type: "Data" top: "data" top: "label" include { phase: TRAIN } transform_param { # mirror: true # mean_file: "examples/cifar10/mean.binaryproto"uu mean_file: "myself/00b/00bmean.binaryproto" } data_param { # source: "examples/cifar10/cifar10_train_lmdb" source: "myself/00b/00b_train_lmdb" batch_size: 50 backend: LMDB } } layer { name: "cifar" type: "Data" top: "data" top: "label" include { phase: TEST } transform_param { # mean_file: "examples/cifar10/mean.binaryproto" mean_file: "myself/00b/00bmean.binaryproto" } data_param { # source: "examples/cifar10/cifar10_test_lmdb" source: "myself/00b/00b_val_lmdb" batch_size: 50 backend: LMDB } } layer { name: "conv1" type: "Convolution" bottom: "data" top: "conv1" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 32 # pad: 1 kernel_size: 4 stride: 1 weight_filler { type: "gaussian" std: 0.0001 } bias_filler { type: "constant" } } } layer { name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { name: "relu1" type: "ReLU" bottom: "pool1" top: "pool1" } layer { name: "conv2" type: "Convolution" bottom: "pool1" top: "conv2" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 32 # pad: 2 kernel_size: 4 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" } } } layer { name: "relu2" type: "ReLU" bottom: "conv2" top: "conv2" } layer { name: "pool2" type: "Pooling" bottom: "conv2" top: "pool2" pooling_param { pool: AVE kernel_size: 2 stride: 2 } } layer { name: "conv3" type: "Convolution" bottom: "pool2" top: "conv3" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 32 # pad: 2 kernel_size: 4 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" } } } layer { name: "relu3" type: "ReLU" bottom: "conv3" top: "conv3" } layer { name: "pool3" type: "Pooling" bottom: "conv3" top: "pool3" pooling_param { pool: AVE kernel_size: 2 stride: 2 } } layer { name: "conv4" type: "Convolution" bottom: "pool3" top: "conv4" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 32 # pad: 2 kernel_size: 4 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" } } } layer { name: "relu4" type: "ReLU" bottom: "conv4" top: "conv4" } layer { name: "pool4" type: "Pooling" bottom: "conv4" top: "pool4" pooling_param { pool: AVE kernel_size: 2 stride: 2 } } layer { name: "ip1" type: "InnerProduct" bottom: "pool4" top: "ip1" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output: 200 weight_filler { type: "gaussian" std: 0.1 } bias_filler { type: "constant" } } } layer { name: "ip2" type: "InnerProduct" bottom: "ip1" top: "ip2" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output: 3 weight_filler { type: "gaussian" std: 0.1 } bias_filler { type: "constant" } } } layer { name: "accuracy" type: "Accuracy" bottom: "ip2" bottom: "label" top: "accuracy" include { phase: TEST } } layer { name: "loss" type: "SoftmaxWithLoss" bottom: "ip2" bottom: "label" top: "loss" }
二,solver.prototxthtml
# reduce the learning rate after 8 epochs (4000 iters) by a factor of 10 # The train/test net protocol buffer definition net: "myself/00b/train_val.prototxt" # test_iter specifies how many forward passes the test should carry out. # In the case of MNIST, we have test batch size 100 and 100 test iterations, # covering the full 10,000 testing images. test_iter: 10 # Carry out testing every 500 training iterations. test_interval: 70 # The base learning rate, momentum and the weight decay of the network. base_lr: 0.001 momentum: 0.9 weight_decay: 0.004 # The learning rate policy lr_policy: "fixed" # lr_policy: "step" gamma: 0.1 stepsize: 100 # Display every 100 iterations display: 10 # The maximum number of iterations max_iter: 2000 # snapshot intermediate results # snapshot: 3000 # snapshot_format: HDF5 snapshot_prefix: "myself/00b/00b" # solver mode: CPU or GPU solver_mode: CPU
三,deploy.prototxtwindows
name: "CIFAR10_quick" layer { name: "data" type: "Input" top: "data" input_param { shape: { dim: 1 dim: 3 dim: 101 dim: 101 } } } layer { name: "conv1" type: "Convolution" bottom: "data" top: "conv1" convolution_param { num_output: 32 kernel_size: 4 stride: 1 } } layer { name: "relu1" type: "ReLU" bottom: "conv1" top: "conv1" } layer { name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { name: "conv2" type: "Convolution" bottom: "pool1" top: "conv2" convolution_param { num_output: 32 kernel_size: 4 stride: 1 } } layer { name: "relu2" type: "ReLU" bottom: "conv2" top: "conv2" } layer { name: "pool2" type: "Pooling" bottom: "conv2" top: "pool2" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { name: "conv3" type: "Convolution" bottom: "pool2" top: "conv3" convolution_param { num_output: 32 kernel_size: 4 stride: 1 } } layer { name: "relu3" type: "ReLU" bottom: "conv3" top: "conv3" } layer { name: "pool3" type: "Pooling" bottom: "conv3" top: "pool3" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { name: "conv4" type: "Convolution" bottom: "pool3" top: "conv4" convolution_param { num_output: 32 kernel_size: 4 stride: 1 } } layer { name: "relu4" type: "ReLU" bottom: "conv4" top: "conv4" } layer { name: "pool4" type: "Pooling" bottom: "conv4" top: "pool4" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { name: "ip1" type: "InnerProduct" bottom: "pool4" top: "ip1" inner_product_param { num_output: 200 } } layer { name: "ip2" type: "InnerProduct" bottom: "ip1" top: "ip2" inner_product_param { num_output: 3 } } layer { #name: "loss" name: "prob" type: "Softmax" bottom: "ip2" top: "prob" #top: "loss" }
參考一:網絡
模型就用程序自帶的caffenet模型,位置在 models/bvlc_reference_caffenet/文件夾下, 將須要的兩個配置文件,複製到myfile文件夾內ide
# sudo cp models/bvlc_reference_caffenet/solver.prototxt examples/myfile/ # sudo cp models/bvlc_reference_caffenet/train_val.prototxt examples/myfile/
修改train_val.protxt,只須要修改兩個階段的data層就能夠了,其它能夠不用管。學習
name: "CaffeNet" layer { name: "data" type: "Data" top: "data" top: "label" include { phase: TRAIN } transform_param { mirror: true crop_size: 227 mean_file: "examples/myfile/mean.binaryproto" } data_param { source: "examples/myfile/img_train_lmdb" batch_size: 256 backend: LMDB } } layer { name: "data" type: "Data" top: "data" top: "label" include { phase: TEST } transform_param { mirror: false crop_size: 227 mean_file: "examples/myfile/mean.binaryproto" } data_param { source: "examples/myfile/img_test_lmdb" batch_size: 50 backend: LMDB } }
實際上就是修改兩個data layer的mean_file和source這兩個地方,其它都沒有變化 。測試
修改其中的solver.prototxtui
# sudo vi examples/myfile/solver.prototxt
net: "examples/myfile/train_val.prototxt" test_iter: 2 test_interval: 50 base_lr: 0.001 lr_policy: "step" gamma: 0.1 stepsize: 100 display: 20 max_iter: 500 momentum: 0.9 weight_decay: 0.005 solver_mode: GPU
100個測試數據,batch_size爲50,所以test_iter設置爲2,就能全cover了。在訓練過程當中,調整學習率,逐步變小。google
參考二:spa
前面作好了lmdb和均值文件,下面以Googlenet爲例修改網絡並訓練模型。
咱們將caffe-master\models下的bvlc_googlenet文件夾複製到caffe-master\examples\imagenet下。(由於咱們的lmdb和均值都在這裏,放一塊兒方便些)
打開train_val.txt,修改:
1.修改data層:
2.修改輸出:
因爲Googlenet有三個輸出,因此改三個地方,其餘網絡通常只有一個輸出,則改一個地方便可。
若是是微調,那麼輸出層的層名也要修改。(參數根據層名來初始化,因爲輸出改了,該層參數就不對應了,所以要更名)
layer { name: "loss1/classifier" type: "InnerProduct" bottom: "loss1/fc" top: "loss1/classifier" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 1000 #改爲你的數據集類別數 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } }
layer { name: "loss2/classifier" type: "InnerProduct" bottom: "loss2/fc" top: "loss2/classifier" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 1000 #改爲你的數據集類別數 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } }
layer { name: "loss3/classifier" type: "InnerProduct" bottom: "pool5/7x7_s1" top: "loss3/classifier" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 1000 #改爲你的數據集類別數 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } }
3.打開deploy.prototxt,修改:
layer { name: "loss3/classifier" type: "InnerProduct" bottom: "pool5/7x7_s1" top: "loss3/classifier" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 1000 #改爲你的數據集類別數 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } }
若是是微調,該層層名和train_val.prototxt修改一致。
接着,打開solver,修改:
net: "examples/imagenet/bvlc_googlenet/train_val.prototxt" #路徑不要錯 test_iter: 1000 #前面已說明該值 test_interval: 4000 #迭代多少次測試一次 test_initialization: false display: 40 average_loss: 40 base_lr: 0.01 lr_policy: "step" stepsize: 320000 #迭代多少次改變一次學習率 gamma: 0.96 max_iter: 10000000 #迭代次數 momentum: 0.9 weight_decay: 0.0002 snapshot: 40000 snapshot_prefix: "examples/imagenet/bvlc_googlenet" #生成的caffemodel保存在imagenet下,形如bvlc_googlenet_iter_***.caffemodel solver_mode: GPU
這時,咱們回到caffe-master\examples\imagenet下,打開train_caffenet.sh,修改:
(若是是微調,在腳本里加入-weights **/**/**.caffemodel便可,即用來微調的caffemodel路徑)
#!/usr/bin/env sh ./build/tools/caffe train \ -solver examples/imagenet/bvlc_googlenet/solver.prototxt -gpu 0
(若是有多個GPU,可自行選擇) 而後,在caffe-master下執行改腳本便可開始訓練:$caffe-master ./examples/imagenet/train_caffenet.sh
訓練獲得的caffemodel就能夠用來作圖像分類了,此時,須要(1)獲得的labels.txt,(2)獲得的mydata_mean.binaryproto,(3)獲得的caffemodel以及已經修改過的deploy.prototxt,共四個文件,具體過程看:http://blog.csdn.net/sinat_30071459/article/details/50974695
參考三:
一、*_train_test.prototxt文件
這是訓練與測試網絡配置文件
(1)在數據層中 參數include{
phase:TRAIN/TEST
}
TRAIN與TEST不能有「...」不然會報錯,還好提示信息裏,會提示哪一行出現了問題,以下圖:
數字8就表明配置文件的第8行出現了錯誤
(2)卷積層和全鏈接層類似:卷積層(Convolution),全鏈接層(InnerProduct,容易翻譯成內積層)類似處有兩個【1】:都有兩個param{lr_mult:1
decay_mult:1
}
param{lr_mult: 2
decay_mult: 0
}
【2】:convolution_param{}與inner_product_param{}裏面的參數類似,甚至相同
今天有事,明天再續!
續上!
(3)平均值文件*_mean.binaryproto要放在transform_param{}裏,訓練與測試數據集放在data_param{}裏
2.*_deploy.prototxt文件
【1】*_deploy.prototxt文件的構造和*_train_test.prototxt文件的構造稍有不一樣首先沒有test網絡中的test模塊,只有訓練模塊
【2】數據層的寫法和原來也有不一樣,更加簡潔:
input: "data" input_dim: 1 input_dim: 3 input_dim: 32 input_dim: 32
注意紅色部分,那是數據層的名字,沒有這個的話,第一卷積層沒法找到數據,我一開始沒有加這句就報錯。下面的四個參數有點相似batch_size(1,3,32,32)裏四個參數
【3】卷積層和全鏈接層中weight_filler{}與bias_filler{}兩個參數不用再填寫,應爲這兩個參數的值,由已經訓練好的模型*.caffemodel文件提供
【4】輸出層的變化(1)沒有了test模塊測試精度(2)輸出層
*_train_test.prototxt文件:
layer{ name: "loss" type: "SoftmaxWithLoss"#注意此處與下面的不一樣 bottom: "ip2" bottom: "label"#注意標籤項在下面沒有了,由於下面的預測屬於哪一個標籤,所以不能提供標籤 top: "loss" }
*_deploy.prototxt文件:
layer { name: "prob" type: "Softmax" bottom: "ip2" top: "prob" }
***注意在兩個文件中輸出層的類型都發生了變化一個是SoftmaxWithLoss,另外一個是Softmax。另外爲了方便區分訓練與應用輸出,訓練是輸出時是loss,應用時是prob。
三、*_slover.prototxt
net: "test.prototxt" #訓練網絡的配置文件 test_iter: 100 #test_iter 指明在測試階段有多上個前向過程(也就是有多少圖片)被執行。 在MNIST例子裏,在網絡配置文件裏已經設置test網絡的batch size=100,這裏test_iter 設置爲100,那在測試階段共有100*100=10000 圖片被處理 test_interval: 500 #每500次訓練迭代後,執行一次test base_lr: 0.01 #學習率初始化爲0.01 momentum:0.9 #u=0.9 weight_decay:0.0005 # lr_policy: "inv" gamma: 0.0001 power: 0.75 #以上三個參數都和下降學習率有關,詳細的學習策略和計算公式見下面 // The learning rate decay policy. The currently implemented learning rate
// policies are as follows:
// - fixed: always return base_lr.
// - step: return base_lr * gamma ^ (floor(iter / step))
// - exp: return base_lr * gamma ^ iter
// - inv: return base_lr * (1 + gamma * iter) ^ (- power)
// - multistep: similar to step but it allows non uniform steps defined by
// stepvalue
// - poly: the effective learning rate follows a polynomial decay, to be
// zero by the max_iter. return base_lr (1 - iter/max_iter) ^ (power)
// - sigmoid: the effective learning rate follows a sigmod decay
// return base_lr ( 1/(1 + exp(-gamma * (iter - stepsize))))
// where base_lr, max_iter, gamma, step, stepvalue and power are defined
// in the solver parameter protocol buffer, and iter is the current iteration. display:100 #每100次迭代,顯示結果 snapshot: 5000 #每5000次迭代,保存一次快照 snapshot_prefix: "path_prefix" #快照保存前綴:更準確的說是快照保存路徑+前綴,應爲文件名後的名字是固定的 solver_mode:GPU #選擇解算器是用cpu仍是gpu
批處理文件編寫:
F:/caffe/caffe-windows-master/bin/caffe.exe train --solver=C:/Users/Administrator/Desktop/caffe_test/cifar-10/cifar10_slover_prototxt --gpu=all pause
參考四:
1.刪除輸入數據(如:type:data...inckude{phase: TRAIN}),而後添加一個數據維度描述。
input: "data" input_dim: 1 input_dim: 3 input_dim: 224 input_dim: 224 force_backward: true
2.移除最後的「loss」 和「accuracy」 層,加入「prob」層。
layers { name: "prob" type: SOFTMAX bottom: "fc8" top: "prob" }若是train_val文件中還有其餘的預處理層,就稍微複雜點。以下,在'data'層,在‘data’層和‘conv1’層 (with bottom:」data」 / top:」conv1″). 插入一個層來計算輸入數據的均值。
<span style="line-height: 1.5; margin: 0px; padding: 0px; border: 0px currentcolor; vertical-align: baseline;">在deploy.prototxt文件中,「mean」 層必須保留,只是容器改變,相應的‘conv1’也要改變<span style="line-height: 24px; color: rgb(68, 68, 68); font-family: "Open Sans", Helvetica, Arial, sans-serif; font-size: 14px;"> ( <span style="margin: 0px; padding: 0px; border: 0px currentcolor; vertical-align: baseline;"><span style="line-height: 1.5; margin: 0px; padding: 0px; border: 0px currentcolor; vertical-align: baseline;">bottom:」mean」/ <span style="line-height: 24px; margin: 0px; padding: 0px; border: 0px currentcolor; vertical-align: baseline;">top:」conv1″ )。</span></span></span></span></span>