【目標檢測算法實現系列】Keras實現Faster R-CNN算法(一)python
上篇文章中,咱們主要實現了相關數據的解析,預處理等準備工做,此次咱們來搭建相關網絡模塊
算法
咱們使用VGG16網絡模型的卷積模塊(去掉最後一個池化層)做爲共享網絡,用來進行提取feature map。具體代碼以下:bash
from keras.layers import Input, Conv2D, MaxPool2D, Flatten, Dense
from keras import backend as K
from keras_faster_rcnn import RoiPoolingConv
def base_net_vgg(input_tensor):
if input_tensor is None:
input_tensor = Input(shape=(None,None,3))
else:
if not K.is_keras_tensor(input_tensor):
input_tensor = Input(tensor=input_tensor, shape=(None,None,3))
#開始構造基礎模型(VGG16的卷積模塊),到block5_conv3層,用來提取feature map
# Block 1
X = Conv2D(filters=64, kernel_size=(3,3), activation="relu",
padding="same", name="block1_conv1")(input_tensor)
X = Conv2D(filters=64, kernel_size=(3, 3), activation="relu",
padding="same", name="block1_conv2")(X)
X = MaxPool2D(pool_size=(2,2), strides=(2,2), name="block1_pool")(X)
# Block 2
X = Conv2D(filters=128, kernel_size=(3, 3), activation="relu",
padding="same", name="block2_conv1")(X) X = Conv2D(filters=128, kernel_size=(3, 3), activation="relu",
padding="same", name="block2_conv2")(X)
X = MaxPool2D(pool_size=(2, 2), strides=(2, 2), name="block2_pool")(X)
# Block 3
X = Conv2D(filters=256, kernel_size=(3, 3), activation="relu",
padding="same", name="block3_conv1")(X) X = Conv2D(filters=256, kernel_size=(3, 3), activation="relu",
padding="same", name="block3_conv2")(X)
X = Conv2D(filters=256, kernel_size=(3, 3), activation="relu",
padding="same", name="block3_conv3")(X)
X = MaxPool2D(pool_size=(2, 2), strides=(2, 2), name="block3_pool")(X)
# Block 4
X = Conv2D(filters=512, kernel_size=(3, 3), activation="relu",
padding="same", name="block4_conv1")(X) X = Conv2D(filters=512, kernel_size=(3, 3), activation="relu",
padding="same", name="block4_conv2")(X)
X = Conv2D(filters=512, kernel_size=(3, 3), activation="relu",
padding="same", name="block4_conv3")(X)
X = MaxPool2D(pool_size=(2, 2), strides=(2, 2), name="block4_pool")(X)
# Block 5
X = Conv2D(filters=512, kernel_size=(3, 3), activation="relu",
padding="same", name="block5_conv1")(X) X = Conv2D(filters=512, kernel_size=(3, 3), activation="relu",
padding="same", name="block5_conv2")(X)
X = Conv2D(filters=512, kernel_size=(3, 3), activation="relu",
padding="same", name="block5_conv3")(X)
return X複製代碼
RPN網絡的構造很簡單,代碼實現以下,具體原理部分,能夠看下以前的文章【目標檢測算法系列】4、Faster R-CNN算法網絡
def rpn_net(shared_layers, num_anchors):
''' RPN網絡 :param shared_layers: 共享層的輸出,做爲RPN網絡的輸入(也就是VGG的卷積模塊提取出來的feature map) :param num_anchors: feature map中每一個位置所對應的anchor個數(這塊爲9個) :return: [X_class, X_regr, shared_layers]:分類層輸出(二分類,這塊使用sigmoid),迴歸層輸出,共享層 '''
X = Conv2D(512, (3,3), padding="same", activation="relu",
kernel_initializer="normal", name="rpn_conv1")(shared_layers)
#採用多任務進行分類和迴歸
X_class = Conv2D(num_anchors, (1,1), activation="sigmoid",
kernel_initializer="uniform", name="rpn_out_class")(X)
X_regr = Conv2D(num_anchors*4, (1,1), activation="linear",
kernel_initializer="zero",name="rpn_out_regress")(X)
return [X_class, X_regr, shared_layers]複製代碼
Keras框架中並無現成的ROI pooling層來直接使用,須要咱們自定義,建立RoiPoolingConv類,繼承Layer,經過重寫相關方法來實現ROI pooling層的相關邏輯,具體代碼以下:session
''' 自定義ROI池化層 '''
from keras.engine.topology import Layer
import keras.backend as K
import tensorflow as tf
import numpy as np
class RoiPoolingConv(Layer):
''' 自定義ROIPooling層 '''
def __init__(self, pool_size, num_rois, **kwargs):
self.pool_size = pool_size
self.num_rois = num_rois
self.dim_ordering = "tf"
super(RoiPoolingConv, self).__init__(**kwargs)
def build(self, input_shape):
self.nb_channles = input_shape[0][3]
def compute_output_shape(self, input_shape):
''' 在compute_output_shape方法中實現ROIPooling層的輸出 :param input_shape: :return: '''
# 輸出5個維度,分別爲:[一個batch中的樣本個數(圖片個數),一個樣本對應roi個數,
# 每一個roi高度,每一個roi寬度,通道數]
return None, self.num_rois, self.pool_size, self.pool_size, self.nb_channles
def call(self, x, mask=None):
''' 在call方法中實現ROIpooling層的具體邏輯 :param x: :param mask: :return: '''
# x 即爲傳入的模型的輸入
assert(len(x) == 2)
feature_map = x[0] #feature map
rois = x[1] #輸入的全部roi shape=(batchsize, None, 4),最後一維4,表明着對應roi在feature map中的四個座標值(左上點座標和寬高)
input_shape = K.shape(feature_map)
roi_out_put = []
for roi_index in range(self.num_rois):
# print("roi_index=={}".format(roi_index))
x = rois[0, roi_index, 0]
y = rois[0, roi_index, 1]
w = rois[0, roi_index, 2]
h = rois[0, roi_index, 3]
x = K.cast(x, 'int32')
y = K.cast(y, 'int32')
w = K.cast(w, 'int32')
h = K.cast(h, 'int32')
one_roi_out = tf.image.resize_images(feature_map[:, y:y+h, x:x+w, :], (self.pool_size, self.pool_size))
roi_out_put.append(one_roi_out)
roi_out_put = tf.reshape(roi_out_put, (self.num_rois, self.pool_size, self.pool_size, self.nb_channles))
roi_out_put = tf.expand_dims(roi_out_put, axis=0)
return roi_out_put
複製代碼
咱們來對定義好的RoiPoolingConv進行測試看看:
app
if __name__ == '__main__':
batch_size = 2
img_height = 200
img_width = 100
n_channels = 1
n_rois = 2
pooled_size = 7
feature_maps_shape = (batch_size, img_height, img_width, n_channels)
feature_maps_tf = tf.placeholder(tf.float32, shape=feature_maps_shape)
feature_maps_np = np.ones(feature_maps_tf.shape, dtype='float32')
print(f"feature_maps_np.shape = {feature_maps_np.shape}")
roiss_tf = tf.placeholder(tf.float32, shape=(batch_size, n_rois, 4))
roiss_np = np.asarray([[[50, 40, 30, 90], [0, 0, 100, 200]], [[50, 40, 30, 90], [0, 0, 100, 200]]],
dtype='float32')
print(f"roiss_np.shape = {roiss_np.shape}")
# 建立ROI Pooling層
roi_layer = RoiPoolingConv(pooled_size, 2)
pooled_features = roi_layer([feature_maps_tf, roiss_tf])
print(f"output shape of layer call = {pooled_features.shape}")
# Run tensorflow session
with tf.Session() as session:
result = session.run(pooled_features,
feed_dict={feature_maps_tf: feature_maps_np,
roiss_tf: roiss_np})
print(f"result.shape = {result.shape}")複製代碼
def roi_classifier(shared_layers, input_rois, num_rois, nb_classes=21):
''' 最後的檢測網絡(包含ROI池化層 和 全鏈接層),進行最終的精分類和精迴歸 :param shared_layers: 進行特徵提取的基礎網絡(VGG的卷積模塊) :param input_rois: roi輸入 shape=(None, 4) :param num_rois: roi數量 :param nb_classes: 總共的待檢測類別,須要算上 背景類 :return: [out_class, out_regr]:最終分類層輸出和迴歸層輸出 '''
#ROI pooling層
print("roi_classifier")
pooling_regions = 7
roi_pool_out = RoiPoolingConv.RoiPoolingConv(pooling_regions, num_rois)([shared_layers, input_rois])
#全鏈接層
out = TimeDistributed(Flatten(name="flatten"))(roi_pool_out)
out = TimeDistributed(Dense(4096, activation="relu", name="fc1"))(out)
out = TimeDistributed(Dense(4096, activation="relu", name="fc2"))(out)
out_class = TimeDistributed(Dense(nb_classes, activation="softmax", kernel_initializer='zero'),name='dense_class_{}'.format(nb_classes))(out)
out_regr = TimeDistributed(Dense(4 * (nb_classes-1), activation="linear", kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))(out)
print(K.shape(out_class), K.shape(out_regr))
return [out_class, out_regr]
複製代碼
def rpn_regr_loss(num_anchors):
''' 計算RPN網絡迴歸的損失 :param num_anchors: :return: '''
def rpn_loss_regr_fixed_num(y_true, y_pred):
''' 對應實際實現的計算RPN網絡迴歸的損失方法 :param y_true: 即爲以前構造的rpn迴歸層的標籤Y值,shape=(batch_size,height,width, num_anchors*4*2) 對於y_true來講,最後一個通道的,前4 * num_anchors爲是不是正例樣本的標記, 後4 * num_anchors 爲實際樣本對應真實值。全部最後一個通道個數總共爲num_anchors*4*2 :param y_pred: 即爲樣本X通過basenet-rpn網絡迴歸層後的輸出值,shape=(batch_size,height,width, num_anchors*4) :return: '''
# 這塊主要,對於y_true來講,最後一個通道的,前4 * num_anchors爲是不是正例正樣本的標記,
# 後4 * num_anchors 爲實際樣本對應真實值。
x = y_true[:, :, :, 4 * num_anchors:] - y_pred
x_abs = K.abs(x)
x_bool = K.cast(K.less_equal(x_abs, 1.0), tf.float32)
return lambda_rpn_regr * K.sum(
y_true[:, :, :, :4 * num_anchors] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / K.sum(
epsilon + y_true[:, :, :, :4 * num_anchors])
return rpn_loss_regr_fixed_num複製代碼
def rpn_cls_loss(num_anchors):
''' 計算RPN網絡分類的損失 :param num_anchors: :return: '''
def rpn_loss_cls_fixed_num(y_true, y_pred):
# y_true最後一維是2*num_anchors,其中前num_anchors個用來標記對應anchor是否爲丟棄不進行訓練的anchor
# 後num_anchors個數據纔是真正表示對應anchor是正樣本仍是負樣本
return lambda_rpn_class * K.sum(
y_true[:, :, :, :num_anchors] * K.binary_crossentropy(y_pred[:, :, :, :],y_true[:, :, :,num_anchors:])) \
/ K.sum(epsilon + y_true[:, :, :, :num_anchors])
return rpn_loss_cls_fixed_num複製代碼
def rpn_cls_loss(num_anchors):
''' 計算RPN網絡分類的損失 :param num_anchors: :return: '''
def rpn_loss_cls_fixed_num(y_true, y_pred):
# y_true最後一維是2*num_anchors,其中前num_anchors個用來標記對應anchor是否爲丟棄不進行訓練的anchor
# 後num_anchors個數據纔是真正表示對應anchor是正樣本仍是負樣本
return lambda_rpn_class * K.sum(
y_true[:, :, :, :num_anchors] * K.binary_crossentropy(y_pred[:, :, :, :],y_true[:, :, :,num_anchors:])) \
/ K.sum(epsilon + y_true[:, :, :, :num_anchors])
return rpn_loss_cls_fixed_num複製代碼
def final_cls_loss(y_true, y_pred):
''' 計算整個網絡最後的分類層對應的損失,直接使用softmax對應的多分類損失函數 :param y_true: :param y_pred: :return: '''
return lambda_cls_class * K.mean(categorical_crossentropy(y_true[0, :, :], y_pred[0, :, :]))複製代碼
至此,Faster RCNN中各個獨立模塊都已創建,下次,咱們將實現創建RPN網絡與ROI Pooing層以前的鏈接。框架
未完待續less
相關本章完整代碼以及VOC2102數據集百度網盤下載,請關注我本身的公衆號 AI計算機視覺工坊,回覆【代碼】和【數據集】獲取。本公衆號不按期推送機器學習,深度學習,計算機視覺等相關文章,歡迎你們和我一塊兒學習,交流。機器學習