使用mask scoring RCNN訓練本身的數據集

時間 2020-04-18

標籤使用 mask scoring rcnn 訓練本身數據简体版

原文原文鏈接

本文主要參考下面兩篇博文，並在部分細節處作了修改。html

https://blog.csdn.net/XX_123_1_RJ/article/details/102733175?depth_1-utm_source=distribute.pc_relevant.none-task&utm_source=distribute.pc_relevant.none-taskpython

https://blog.csdn.net/linolzhang/article/details/97833354linux

1、數據集準備git

（訓練集驗證集測試集的數據分別準備）github

一、標註數據集json

大多數人會用labelme來標註數據集，而後用labelme將每張標註圖片都生成一個json文件。labelme教程網上不少，這裏再也不贅述。api

本人因爲原圖的標註目標很小，用labelme標註未免不精確，因此先用PS手動標註後再寫代碼把標註圖轉換成了labelme格式的json文件。服務器

結果如圖：app

二、將這些json文件轉換成coco格式ide

這一步我使用以下代碼可成功轉換。

# -*- coding:utf-8 -*-

import os, sys
import argparse
import json
import matplotlib.pyplot as plt
import skimage.io as io
from labelme import utils
import numpy as np
import glob
import PIL.Image


class MyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(MyEncoder, self).default(obj)

class labelme2coco(object):
    def __init__(self, labelme_json=[], save_json_path='./tran.json'):
        '''
        :param labelme_json: 全部labelme的json文件路徑組成的列表
        :param save_json_path: json保存位置
        '''
        self.labelme_json = labelme_json
        self.save_json_path = save_json_path
        self.images = []
        self.categories = []
        self.annotations = []
        # self.data_coco = {}
        self.label = []
        self.annID = 1
        self.height = 0
        self.width = 0
        self.save_json()
    def data_transfer(self):
        for num, json_file in enumerate(self.labelme_json):
            with open(json_file, 'r') as fp:
                data = json.load(fp)  # 加載json文件
                self.images.append(self.image(data, num))
                for shapes in data['shapes']:
                    label = shapes['label']
                    if label not in self.label:
                        self.categories.append(self.categorie(label))
                        self.label.append(label)
                    points = shapes['points']  # 這裏的point是用rectangle標註獲得的，只有兩個點，須要轉成四個點
                    points.append([points[0][0], points[1][1]])
                    points.append([points[1][0], points[0][1]])
                    self.annotations.append(self.annotation(points, label, num))
                    self.annID += 1
    def image(self, data, num):
        image = {}
        #img = utils.img_b64_to_arr(data['imageData'])  # 解析原圖片數據
        # img=io.imread(data['imagePath']) # 經過圖片路徑打開圖片
        # img = cv2.imread(data['imagePath'], 0)
       # height, width = img.shape[:2]
        height = data['imageHeight']
        width = data['imageWidth']
        image['height'] = height
        image['width'] = width
        image['id'] = num + 1
        image['file_name'] = data['imagePath'].split('/')[-1]
        self.height = height
        self.width = width
        return image
    def categorie(self, label):
        categorie = {}
        categorie['supercategory'] = 'Cancer'
        categorie['id'] = len(self.label) + 1  # 0 默認爲背景
        categorie['name'] = label
        return categorie
    def annotation(self, points, label, num):
        annotation = {}
        annotation['segmentation'] = [list(np.asarray(points).flatten())]
        annotation['iscrowd'] = 0
        annotation['image_id'] = num + 1
        # annotation['bbox'] = str(self.getbbox(points)) # 使用list保存json文件時報錯（不知道爲何）
        # list(map(int,a[1:-1].split(','))) a=annotation['bbox'] 使用該方式轉成list
        annotation['bbox'] = list(map(float, self.getbbox(points)))
        annotation['area'] = annotation['bbox'][2] * annotation['bbox'][3]
        # annotation['category_id'] = self.getcatid(label)
        annotation['category_id'] = self.getcatid(label)  # 注意，源代碼默認爲1
        annotation['id'] = self.annID
        return annotation
    def getcatid(self, label):
        for categorie in self.categories:
            if label == categorie['name']:
                return categorie['id']
        return 1
    def getbbox(self, points):
        # img = np.zeros([self.height,self.width],np.uint8)
        # cv2.polylines(img, [np.asarray(points)], True, 1, lineType=cv2.LINE_AA)  # 畫邊界線
        # cv2.fillPoly(img, [np.asarray(points)], 1)  # 畫多邊形 內部像素值爲1
        polygons = points
        mask = self.polygons_to_mask([self.height, self.width], polygons)
        return self.mask2box(mask)
    def mask2box(self, mask):
        '''從mask反算出其邊框
        mask：[h,w]  0、1組成的圖片
        1對應對象，只需計算1對應的行列號（左上角行列號，右下角行列號，就能夠算出其邊框）
        '''
        # np.where(mask==1)
        index = np.argwhere(mask == 1)
        rows = index[:, 0]
        clos = index[:, 1]
        # 解析左上角行列號
        left_top_r = np.min(rows)  # y
        left_top_c = np.min(clos)  # x
        # 解析右下角行列號
        right_bottom_r = np.max(rows)
        right_bottom_c = np.max(clos)
        # return [(left_top_r,left_top_c),(right_bottom_r,right_bottom_c)]
        # return [(left_top_c, left_top_r), (right_bottom_c, right_bottom_r)]
        # return [left_top_c, left_top_r, right_bottom_c, right_bottom_r]  # [x1,y1,x2,y2]
        return [left_top_c, left_top_r, right_bottom_c - left_top_c,
                right_bottom_r - left_top_r]  # [x1,y1,w,h] 對應COCO的bbox格式
    def polygons_to_mask(self, img_shape, polygons):
        mask = np.zeros(img_shape, dtype=np.uint8)
        mask = PIL.Image.fromarray(mask)
        xy = list(map(tuple, polygons))
        PIL.ImageDraw.Draw(mask).polygon(xy=xy, outline=1, fill=1)
        mask = np.array(mask, dtype=bool)
        return mask
    def data2coco(self):
        data_coco = {}
        data_coco['images'] = self.images
        data_coco['categories'] = self.categories
        data_coco['annotations'] = self.annotations
        return data_coco
    def save_json(self):
        self.data_transfer()
        self.data_coco = self.data2coco()
        # 保存json文件
        json.dump(self.data_coco, open(self.save_json_path, 'w'), indent=4, cls=MyEncoder)  # indent=4 更加美觀顯示

if __name__ == '__main__':
    src_folder = os.path.abspath(sys.argv[1])
    # load src - join json
    labelme_json = glob.glob(src_folder + '/*.json')
    labelme2coco(labelme_json, sys.argv[2])

在運行這個代碼時，只有把全部須要的模塊都安裝在anaconda當時安裝labelme的那個虛擬環境下才能運行成功。

2、環境搭建（linux）

一、建立pytorch環境

conda create --name maskrcnn_benchmark
source activate maskrcnn_benchmark #全部模塊的安裝都在此虛擬環境下
conda install ipython
pip install ninja yacs cython matplotlib pyqt5
conda install pytorch-nightly torchvision=0.2.1 cudatoolkit=9.0

上面的步驟執行完以後還要離線安裝torch1.0.1。由於某種牆的存在，在線下載torch不太容易實現，國內鏡像源又沒有1.0.1這個版本。而通過博主長期的踩坑發現torch1.0.1和torchvision=0.2.1加上numpy1.17纔是可用組合。這是torch1.0.1的下載連接： http://download.pytorch.org/whl/cu100/torch-1.0.1-cp36-cp36m-linux_x86_64.whl，建議直接迅雷下載。下載完成後，cd到模塊所在目錄而後pip install torch-1.0.1-cp36-cp36m-linux_x86_64.whl便可。（本人的python是3.6，請酌情修改下載連接）

二、安裝cocoapi及apex

export INSTALL_DIR=$PWD

# install pycocotools
git clone https://github.com/cocodataset/cocoapi.git
cd cocoapi/PythonAPI
python setup.py build_ext install
 
# install apex
cd $INSTALL_DIR
git clone https://github.com/NVIDIA/apex.git
cd apex
python setup.py install --cuda_ext --cpp_ext

三、編譯模型代碼

# install PyTorch Detection
cd $INSTALL_DIR

#maskrcnn-benchmark
#git clone https://github.com/facebookresearch/maskrcnn-benchmark.git

git clone https://github.com/zjhuang22/maskscoring_rcnn

cd maskscoring_rcnn
python setup.py build develop

3、訓練前的準備

一、數據和預訓練模型準備

在下載的maskscoring_rcnn中新建一個datasets目錄，可按以下結構放置你的json文件和原始圖像

─ datasets
   └── annotations
              ├── coco_train.json
              └── coco_test.json
   └── coco_train    #該文件夾放置訓練集的原始圖像
   └── coco_test     #該文件夾放置測試集的原始圖像

另外，maskscoring_rcnn的pretrained_models目錄下須要放置R-101.pkl和R-50.pkl這兩個預訓練模型，若是服務器連了網，在開始訓練模型以前會自動下載這兩個模型，若是服務器沒有網就須要手動下載放到pretrained_models下了。做者在GitHub也放了有這些模型的百度網盤連接。

二、修改參數

（1）修改 maskscoring_rcnn/configs 目錄下的配置文件，選擇其中的 e2e_ms_rcnn_R_50_FPN_1x.yaml訓練腳本，修改以下：

MODEL:
  META_ARCHITECTURE: "GeneralizedRCNN"
  WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
  PRETRAINED_MODELS: 'pretrained_models'
DATASETS:
  TRAIN: ("coco_train_xxx",) # 1.設置訓練驗證集,名字能夠隨意起，和其餘配置文件對應便可。
  TEST: ("coco_val_xxx",)
……（省略數行）
SOLVER:
  BASE_LR: 0.002 #設置基礎學習率，原爲0.02
  WEIGHT_DECAY: 0.0001
  STEPS: (60000, 80000)
  MAX_ITER: 5000 #2.設置最大迭代次數，可根據圖片數量酌情增減，改小也能夠更快看到結果。原爲90000

（2）修改 maskscoring_rcnn/maskrcnn_benchmark/config 下的 paths_catalog.py 文件：

DATASETS = {
        "coco_2014_train": ( "coco/train2014", "coco/annotations/instances_train2014.json",),
        "coco_2014_val": ("coco/val2014", "coco/annotations/instances_val2014.json"),
        "coco_2014_minival": ( "coco/val2014", "coco/annotations/instances_minival2014.json", ),
        "coco_2014_valminusminival": (
        "coco/val2014", "coco/annotations/instances_valminusminival2014.json", ),
        
#添加本身的數據集路徑信息，在相應的代碼段後面添加兩行便可
        "coco_train_xxx": ("coco_mydata_train", "annotations/coco_mydata_train.json"),
        "coco_val_xxx": ("coco_mydata_test", "annotations/coco_mydata_test.json"),
    }

（3）修改 maskscoring_rcnn/maskrcnn_benchmark/config 下的 defaults.py 配置文件：

# Size of the smallest side of the image during training
_C.INPUT.MIN_SIZE_TRAIN = 800  # (800,)訓練集中圖片的最小邊長，酌情修改
# Maximum size of the side of the image during training
_C.INPUT.MAX_SIZE_TRAIN = 1333 #訓練集中圖片的最大邊長，酌情修改
# Size of the smallest side of the image during testing
_C.INPUT.MIN_SIZE_TEST = 800 #測試集中圖片的最小邊長，酌情修改
# Maximum size of the side of the image during testing
_C.INPUT.MAX_SIZE_TEST = 1333 #測試集中圖片的最大邊長，酌情修改

……省略數行……

_C.MODEL.ROI_BOX_HEAD.NUM_CLASSES = 3 # 修改分類數量，coco對應81（80+1），注意1加的是背景
 
_C.SOLVER.BASE_LR = 0.005   # 修改學習率，默認爲0.001
_C.SOLVER.CHECKPOINT_PERIOD = 1000  # 修改check point數量，根據須要自定義
_C.SOLVER.IMS_PER_BATCH = 1   # 修改batch size，默認16
 
_C.TEST.IMS_PER_BATCH = 1   # 修改test batch size，默認8
 
_C.OUTPUT_DIR = "weights/"   # 設置模型保存路徑（對應自定義文件夾）

4、開始訓練

到maskscoring_rcnn所在目錄下執行：

python tools/train_net.py --config-file configs/e2e_ms_rcnn_R_50_FPN_1x.yaml
python tools/test_net.py --config-file configs/e2e_ms_rcnn_R_50_FPN_1x.yaml

在models裏面能夠查看訓練日誌。

5、模型預測

一、修改maskscoring_rcnn/configs 路徑下的對應的yaml文件的權重路徑。

MODEL:
  META_ARCHITECTURE: "GeneralizedRCNN"
  WEIGHT: "weights/model_0005000.pth" # 訓練好的模型路徑
  BACKBONE:
    CONV_BODY: "R-50-FPN"
    OUT_CHANNELS: 256

二、修改maskscoring_rcnn/demo 路徑下的 predictor.py 文件，添加類別信息。這個文件在原來的demo目錄下是沒有的，從mask rcnn benchmark的demo文件下複製過來便可。

class COCODemo(object):
    # COCO categories for pretty print
    CATEGORIES = [
        "__background",
        "cla_a",#根據本身的數據集修改類別信息 "cla_b",
        "cla_c",
    ]

三、在maskscoring_rcnn/demo 下新建 predict.py，用於預測。

#!/usr/bin/env python
# coding=UTF-8

import os, sys
import numpy as np
import cv2
from maskrcnn_benchmark.config import cfg
from predictor import COCODemo

# 1.修改後的配置文件
config_file = "configs/e2e_ms_rcnn_R_50_FPN_1x.yaml"

# 2.配置
cfg.merge_from_file(config_file) # merge配置文件
cfg.merge_from_list(["MODEL.MASK_ON", True]) # 打開mask開關
cfg.merge_from_list(["MODEL.DEVICE", "cuda"]) # or設置爲CPU ["MODEL.DEVICE", "cpu"]
#cfg.merge_from_list(["MODEL.DEVICE", "cpu"])

coco_demo = COCODemo(
    cfg,
    min_image_size=800,
    confidence_threshold=0.5, # 3.設置置信度
)

if __name__ == '__main__':

    in_folder = './datasets/test_images/'
    out_folder = './datasets/test_images_out/'

    if not os.path.exists(out_folder):
        os.makedirs(out_folder)

    for file_name in os.listdir(in_folder):
        if not file_name.endswith(('jpg', 'png')):
            continue # load file
        img_path = os.path.join(in_folder, file_name)
        image = cv2.imread(img_path)

        # method1. 直接獲得opencv圖片結果 #predictions = coco_demo.run_on_opencv_image(image)
        #save_path = os.path.join(out_folder, file_name) #cv2.imwrite(save_path, predictions) # method2. 獲取預測結果
        predictions = coco_demo.compute_prediction(image)
        top_predictions = coco_demo.select_top_predictions(predictions)

     # draw
        img = coco_demo.overlay_boxes(image, top_predictions)
        img = coco_demo.overlay_mask(img, predictions)
        img = coco_demo.overlay_class_names(img, top_predictions)
        save_path = os.path.join(out_folder, file_name)
        cv2.imwrite(save_path, img)
 # print results
        boxes = top_predictions.bbox.numpy()
        labels = top_predictions.get_field("labels").numpy()  #label = labelList[np.argmax(scores)]
        scores = top_predictions.get_field("scores").numpy()
        masks = top_predictions.get_field("mask").numpy()

        for i in range(len(boxes)):
            print('box:', i, ' label:', labels[i])
            x1,y1,x2,y2 = [round(x) for x in boxes[i]] # = map(int, boxes[i])
            print('x1,y1,x2,y2:', x1,y1,x2,y2)