【目標檢測算法實現系列】Keras實現Faster R-CNN算法(一)python
【目標檢測算法實現系列】Keras實現Faster R-CNN算法(二)ios
在此以前,咱們主要實現了相關數據的解析,預處理等準備工做,以及對應Faster RCNN的相關網絡模塊搭建。接下來咱們接着實現其餘部分。算法
在上一篇中,咱們實現了一個自定義的ROIPooling層,此次咱們看下如何創建RPN與ROIpool層之間的聯繫。下面,咱們看下如何代碼實現,經過RPN網絡的輸出,來指定對應ROIPing層的輸入。數組
def rpn_to_roi(rpn_cls_layer, rpn_regr_layer, C, use_regr=True, max_boxes=300,overlap_thresh=0.9):
''' 創建rpn網絡與roi pooling層的鏈接 經過rpn網絡的輸出,找出對應的roi :param rpn_cls_layer: rpn網絡的分類輸出 :param rpn_regr_layer: rpn網絡的迴歸輸出 :param C: :param dim_ordering: :param use_regr: :param max_boxes: :param overlap_thresh: :return: '''
regr_layer = rpn_regr_layer / C.std_scaling
anchor_sizes = C.anchor_box_scales
anchor_ratios = C.anchor_box_ratios
assert rpn_cls_layer.shape[0] == 1
(rows, cols) = rpn_cls_layer.shape[1:3]
curr_layer = 0
# A.shape = (4個在feature_map上的對應位置信息(左上角和右下角座標), feature_map_height, feature_map_wigth, k(9))
A = np.zeros((4, rpn_cls_layer.shape[1], rpn_cls_layer.shape[2], rpn_cls_layer.shape[3]))
for anchor_size in anchor_sizes:
for anchor_ratio in anchor_ratios:
anchor_x = (anchor_size * anchor_ratio[0])/C.rpn_stride #對應anchor在feature map上的寬度
anchor_y = (anchor_size * anchor_ratio[1])/C.rpn_stride #對應anchor在feature map上的高度
# if dim_ordering == 'th':
# regr = regr_layer[0, 4 * curr_layer:4 * curr_layer + 4, :, :]
# else:
# regr = regr_layer[0, :, :, 4 * curr_layer:4 * curr_layer + 4] #當前anchor對應迴歸值
# regr = np.transpose(regr, (2, 0, 1))
regr = regr_layer[0, :, :, 4 * curr_layer:4 * curr_layer + 4] # 當前anchor對應迴歸值
X, Y = np.meshgrid(np.arange(cols), np.arange(rows))
A[0, :, :, curr_layer] = X - anchor_x/2 #左上點橫座標
A[1, :, :, curr_layer] = Y - anchor_y/2 #左上縱橫座標
A[2, :, :, curr_layer] = anchor_x #暫時存儲anchor 寬度
A[3, :, :, curr_layer] = anchor_y #暫時存儲anchor 高度
if use_regr:
#經過rpn網絡的迴歸層的預測值,來調整anchor位置
A[:, :, :, curr_layer] = apply_regr_np(A[:, :, :, curr_layer], regr)
A[2, :, :, curr_layer] = np.maximum(1, A[2, :, :, curr_layer])
A[3, :, :, curr_layer] = np.maximum(1, A[3, :, :, curr_layer])
A[2, :, :, curr_layer] += A[0, :, :, curr_layer] #右下角橫座標
A[3, :, :, curr_layer] += A[1, :, :, curr_layer] #右下角縱座標
#確保anchor不超過feature map尺寸
A[0, :, :, curr_layer] = np.maximum(0, A[0, :, :, curr_layer])
A[1, :, :, curr_layer] = np.maximum(0, A[1, :, :, curr_layer])
A[2, :, :, curr_layer] = np.minimum(cols-1, A[2, :, :, curr_layer])
A[3, :, :, curr_layer] = np.minimum(rows-1, A[3, :, :, curr_layer])
curr_layer += 1
#將對應shape調整到二維(anchor總共個數,4)
all_boxes = np.reshape(A.transpose((0, 3, 1,2)), (4, -1)).transpose((1, 0))
all_probs = rpn_cls_layer.transpose((0, 3, 1, 2)).reshape((-1))
x1 = all_boxes[:, 0]
y1 = all_boxes[:, 1]
x2 = all_boxes[:, 2]
y2 = all_boxes[:, 3]
#過濾掉一些異常的框
idxs = np.where((x1 - x2 >= 0) | (y1 - y2 >= 0))
all_boxes = np.delete(all_boxes, idxs, 0)
all_probs = np.delete(all_probs, idxs, 0)
#經過非極大值抑制,選取出一些anchor做爲roipooling層的輸入
result = non_max_suppression_fast(all_boxes, all_probs, overlap_thresh=overlap_thresh, max_boxes=max_boxes)[0]
return result複製代碼
上述代碼中有調用兩個方法,一個是apply_regr_np方法,用來經過rpn網絡的迴歸層的預測值,來調整anchor位置,另一個方法是 non_max_suppression_fast, 用來對全部anchor進行非極大值抑制,選取出實際須要的anchor,具體代碼實現以下:網絡
def apply_regr_np(X, T):
''' 經過rpn網絡的迴歸層的預測值,來調整anchor位置 :param X: :param T: :return: '''
try:
x = X[0, :, :]
y = X[1, :, :]
w = X[2, :, :]
h = X[3, :, :]
tx = T[:, :, 0]
ty = T[:, :, 1]
tw = T[:, :, 2]
th = T[:, :, 3]
# (cx, cy)原始anchor中心點位置
cx = x + w/2.
cy = y + h/2.
#(cx1, cy1)通過rpn網絡迴歸層調整後,anchor中心點位置
cx1 = tx * w + cx
cy1 = ty * h + cy
w1 = np.exp(tw.astype(np.float64)) * w #通過rpn網絡迴歸層調整後,anchor 寬度
h1 = np.exp(th.astype(np.float64)) * h #通過rpn網絡迴歸層調整後,anchor 高度
#(x1,y1)通過rpn網絡迴歸層調整後,anchor的左上點座標
x1 = cx1 - w1/2.
y1 = cy1 - h1/2.
x1 = np.round(x1)
y1 = np.round(y1)
w1 = np.round(w1)
h1 = np.round(h1)
return np.stack([x1, y1, w1, h1])
except Exception as e:
print(e)
return X
def non_max_suppression_fast(boxes, probs, overlap_thresh=0.9, max_boxes=300):
''' 非極大值抑制算法,提取出300個anchor做爲輸入roipooling層的roi 簡單介紹下非極大值抑制算法,假如當前有10個anchor,根據是正樣本的機率值進行升序排序爲[A,B,C,D,E,F,G,H,I,J] 1.從具備最大機率的anchor J開始,計算其他anchor與J之間的iou值 2.若是iou值大於overlap_thresh閾值,則刪除掉,並將當前J從新保留下來,使咱們須要的。 例如,若是D,F與J之間的iou大於閾值,則直接捨棄,同時把J從新保留,也從原始數組中刪除掉。 3.在剩餘的[A,B,C,E,G,H]中,繼續選取最大的機率值對應的anchor,而後重複上述過程。 4.最後,當數組爲空,或者保留下來的anchor個數達到設定的max_boxes,則中止迭代, 最終保留下的來的anchor 就是最終須要的。 :param boxes: #通過rpn網絡後生成的全部候選框,shape = (anchor個數,4) :param probs: #rpn網絡分類層的輸出值,value對應是正例樣本的機率,shape = (anchor個數,) :param overlap_thresh: iou閾值 :param max_boxes: 最大提取的roi個數 :return: '''
if len(boxes) == 0:
return []
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
np.testing.assert_array_less(x1, x2)
np.testing.assert_array_less(y1, y2)
boxes = boxes.astype("float")
pick = []
area = (x2 - x1) * (y2 - y1) #全部anchor的各自的區域面積(anchor個數,)
#將全部anchor根據機率值進行升序排序
idxs = np.argsort(probs) #默認是升序
while len(idxs) > 0:
last = len(idxs) - 1
i = idxs[last] #最後一個索引,即爲當前idxs中具體最大機率值(是否爲正例)的anchor的索引
pick.append(i) #保留當前anchor對應索引
# 計算當前選取出來的anchor與其餘anchor之間的交集
xx1_int = np.maximum(x1[i], x1[idxs[:last]])
yy1_int = np.maximum(y1[i], y1[idxs[:last]])
xx2_int = np.minimum(x2[i], x2[idxs[:last]])
yy2_int = np.minimum(y2[i], y2[idxs[:last]])
ww_int = np.maximum(0, xx2_int - xx1_int)
hh_int = np.maximum(0, yy2_int - yy1_int)
area_int = ww_int * hh_int #當前選取出來的索引對應的anchor,與其餘anchor之間的 交集
# 計算當前選取出來的索引對應的anchor 與其餘anchor之間的並集
area_union = area[i] + area[idxs[:last]] - area_int
#overlap 即爲當前選取出來的索引對應的anchor 與其餘anchor之間的交併比(iou)
overlap = area_int/(area_union + 1e-6)
#在idxs中刪除掉與當前選取出來的anchor之間iou大於overlap_thresh閾值的。
idxs = np.delete(idxs, np.concatenate(([last],
np.where(overlap > overlap_thresh)[0])))
if len(pick) >= max_boxes: #若是當前保留的anchor個數已經達到max_boxes,則直接跳出迭代
break
boxes = boxes[pick].astype("int")
probs = probs[pick]
return boxes, probs複製代碼
def calc_roi(R, img_data, C, class_mapping):
''' 生成roipooing層的輸入數據以及最終分類層的訓練數據Y值以及最終迴歸層的訓練數據Y值 :param R: 經過rpn網絡輸出結果,選取出來的對應rois,shape=(rois個數,4) :param img_data: 通過相關預處理後的原始數據,格式以下: {'width': 500, 'height': 500, 'bboxes': [{'y2': 500, 'y1': 27, 'x2': 183, 'x1': 20, 'class': 'person', 'difficult': False}, {'y2': 500, 'y1': 2, 'x2': 249, 'x1': 112, 'class': 'person', 'difficult': False}, {'y2': 490, 'y1': 233, 'x2': 376, 'x1': 246, 'class': 'person', 'difficult': False}, {'y2': 468, 'y1': 319, 'x2': 356, 'x1': 231, 'class': 'chair', 'difficult': False}, {'y2': 450, 'y1': 314, 'x2': 58, 'x1': 1, 'class': 'chair', 'difficult': True}], 'imageset': 'test', 'filepath': './datasets/VOC2007/JPEGImages/000910.jpg' } :param C: 存儲相關配置信息 :param class_mapping: 一個字典數據結構,key爲對應類別名稱,value爲對應類別的一個標識 :return: '''
bboxes = img_data['bboxes']
(width, height) = (img_data['width'], img_data['height'])
(resized_width, resized_height) = data_generators.get_new_img_size(width, height, C.im_size)
gta = np.zeros((len(bboxes), 4))
#得到真實標註框在feature map上的座標
for bbox_num, bbox in enumerate(bboxes):
gta[bbox_num, 0] = int(round(bbox['x1'] * (resized_width / float(width))/C.rpn_stride))
gta[bbox_num, 1] = int(round(bbox['x2'] * (resized_width / float(width))/C.rpn_stride))
gta[bbox_num, 2] = int(round(bbox['y1'] * (resized_height / float(height))/C.rpn_stride))
gta[bbox_num, 3] = int(round(bbox['y2'] * (resized_height / float(height))/C.rpn_stride))
x_roi = []
y_class_num = []
y_class_regr_coords = []
y_class_regr_label = []
IoUs = []
for ix in range(R.shape[0]): #遍歷全部Roi
(x1, y1, x2, y2) = R[ix, :]
x1 = int(round(x1))
y1 = int(round(y1))
x2 = int(round(x2))
y2 = int(round(y2))
best_iou = 0.0 #用來存儲當前roi(候選框)與全部真實標註框之間的最優iou值
best_bbox = -1 #當前roi(候選框)對應的最優候選框index
for bbox_num in range(len(bboxes)): #遍歷全部真實標註框
#計算真實標註框與roi(候選框)之間的iou值
curr_iou = data_generators.iou([gta[bbox_num, 0], gta[bbox_num, 2], gta[bbox_num, 1], gta[bbox_num, 3]], [x1, y1, x2, y2])
if curr_iou > best_iou:
best_iou = curr_iou
best_bbox = bbox_num
if best_iou < C.classifier_min_overlap:
continue
else:
w = x2 - x1
h = y2 - y1
x_roi.append([x1, y1, w, h])
IoUs.append(best_iou)
if C.classifier_min_overlap <= best_iou < C.classifier_max_overlap:
cls_name = 'bg'
elif C.classifier_max_overlap <= best_iou:
cls_name = bboxes[best_bbox]['class']
cxg = (gta[best_bbox, 0] + gta[best_bbox, 1]) / 2.0
cyg = (gta[best_bbox, 2] + gta[best_bbox, 3]) / 2.0
cx = x1 + w / 2.0
cy = y1 + h / 2.0
# (tx, ty, tw, th)即爲此roi到ground-truth(真實檢測框)的對應4個平移縮放參數
tx = (cxg - cx) / float(w)
ty = (cyg - cy) / float(h)
tw = np.log((gta[best_bbox, 1] - gta[best_bbox, 0]) / float(w))
th = np.log((gta[best_bbox, 3] - gta[best_bbox, 2]) / float(h))
else:
print('roi = {}'.format(best_iou))
raise RuntimeError
class_num = class_mapping[cls_name]
class_label = len(class_mapping) * [0]
class_label[class_num] = 1
y_class_num.append(copy.deepcopy(class_label)) # y_class_num即爲構造的最終分類層的訓練數據Y值
coords = [0] * 4 * (len(class_mapping) - 1) # 每一個類別4個座標值
labels = [0] * 4 * (len(class_mapping) - 1) # 對應存儲類別標籤值
if cls_name != 'bg':
label_pos = 4 * class_num
sx, sy, sw, sh = C.classifier_regr_std
coords[label_pos:4+label_pos] = [sx*tx, sy*ty, sw*tw, sh*th]
labels[label_pos:4+label_pos] = [1, 1, 1, 1]
y_class_regr_coords.append(copy.deepcopy(coords))
y_class_regr_label.append(copy.deepcopy(labels))
else:
y_class_regr_coords.append(copy.deepcopy(coords))
y_class_regr_label.append(copy.deepcopy(labels))
if len(x_roi) == 0:
return None, None, None, None
X = np.array(x_roi) #roipooling層輸入
Y1 = np.array(y_class_num) #最終分類層的訓練樣本Y值
Y2 = np.concatenate([np.array(y_class_regr_label),np.array(y_class_regr_coords)],axis=1) #最終迴歸層的訓練樣本Y值
# np.expand_dims 統一增長一維,minibatch
X = np.expand_dims(X, axis=0)
Y1 = np.expand_dims(Y1, axis=0)
Y2 = np.expand_dims(Y2, axis=0)
# neg_samples: 負樣本在第二維的全部index列表
# pos_samples: 正樣本在第二維的全部index列表
neg_samples = np.where(Y1[0, :, -1] == 1) # 最後一個數值爲1,說明是負樣本
pos_samples = np.where(Y1[0, :, -1] == 0)
if len(neg_samples) > 0:
neg_samples = neg_samples[0]
else:
neg_samples = []
if len(pos_samples) > 0:
pos_samples = pos_samples[0]
else:
pos_samples = []
# len(pos_samples) :負樣本個數
# len(pos_samples): 正樣本個數
if len(pos_samples) < C.num_rois // 2: # 若是正樣本個數少於150,則全部正樣本都參與訓練
selected_pos_samples = pos_samples.tolist()
else: # 不然的話,隨機抽取150個正樣本
selected_pos_samples = np.random.choice(pos_samples, C.num_rois // 2, replace=False).tolist()
try:
# replace=False 無放回抽取
selected_neg_samples = np.random.choice(neg_samples, C.num_rois - len(selected_pos_samples),
replace=False).tolist()
except:
# replace=True 有放回抽取
selected_neg_samples = np.random.choice(neg_samples, C.num_rois - len(selected_pos_samples),
replace=True).tolist()
# sel_samples: 參與訓練的roi樣本對應的下標
sel_samples = selected_pos_samples + selected_neg_samples
return X[:, sel_samples, :], Y1[:, sel_samples, :], Y2[:, sel_samples, :], IoUs複製代碼
到此,全部模塊都已開發完畢,後面將進行完整的模型訓練和預測過程數據結構
未完待續app
相關本章完整代碼以及VOC2102數據集百度網盤下載,請關注我本身的公衆號 AI計算機視覺工坊,回覆【代碼】和【數據集】獲取。本公衆號不按期推送機器學習,深度學習,計算機視覺等相關文章,歡迎你們和我一塊兒學習,交流。less