學習目標檢測已經有段時間了,之前都是拿着別人寫好的相關代碼(api)來用,沒有本身好好總結琢磨,想到本身之後工做後估計仍是要去用到,這不,再次從最基本的數據和標籤準備環節進行。html
目標檢測領域基本數據類型用的多無非就是VOC、COCO兩種,下面就記錄一下這兩種數據類型的獲取。json
VOC 數據(VOC2007)的目錄api
├── Annotations 進行 detection 任務時的標籤文件,xml 形式,文件名與圖片名一一對應
├── ImageSets 包含三個子文件夾 Layout、Main、Segmentation,其中 Main 存放的是分類和檢測的數據集分割文件
├── JPEGImages 存放 .jpg 格式的圖片文件
├── SegmentationClass 存放按照 class 分割的圖片
└── SegmentationObject 存放按照 object 分割的圖片
├── Main
│ ├── train.txt 寫着用於訓練的圖片名稱, 共 2501 個
│ ├── val.txt 寫着用於驗證的圖片名稱,共 2510 個
│ ├── trainval.txt train與val的合集。共 5011 個
│ ├── test.txt 寫着用於測試的圖片名稱,共 4952 個
Annotations 下的XML數組
<annotation>
<folder>VOC2012</folder>
<filename>2007_000392.jpg</filename> //文件名
<source> //圖像來源(不重要)
<database>The VOC2007 Database</database>
<annotation>PASCAL VOC2007</annotation>
<image>flickr</image>
</source>
<size> //圖像尺寸(長寬以及通道數)
<width>500</width>
<height>332</height>
<depth>3</depth>
</size>
<segmented>1</segmented> //是否用於分割(在圖像物體識別中01無所謂)
<object> //檢測到的物體
<name>horse</name> //物體類別
<pose>Right</pose> //拍攝角度
<truncated>0</truncated> //是否被截斷(0表示完整)
<difficult>0</difficult> //目標是否難以識別(0表示容易識別)
<bndbox> //bounding-box(包含左下角和右上角xy座標)
<xmin>100</xmin>
<ymin>96</ymin>
<xmax>355</xmax>
<ymax>324</ymax>
</bndbox>
</object>
<object> //檢測到多個物體
<name>person</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>198</xmin>
<ymin>58</ymin>
<xmax>286</xmax>
<ymax>197</ymax>
</bndbox>
</object>
</annotation>
1,總體JSON文件格式app
好比instances_train2017.json、instances_val2017.json這兩個文件就是這種格式。dom
Object Instance這種格式的文件從頭到尾按照順序分爲這幾個模塊學習
{
"info": info,
"licenses": [license],
"images": [image],
"annotations": [annotation],
"categories": [category]
}
下面介紹一下這個字典所包含的信息測試
images數組元素的數量等同於劃入訓練集(或者測試集)的圖片的數量;url
annotations數組元素的數量等同於訓練集(或者測試集)中bounding box的數量;spa
categories數組元素的數量爲80(2017年);
>>> ann_train_file='annotations/instances_train2017.json'
>>> coco_train = COCO(ann_train_file)
loading annotations into memory...
Done (t=19.30s)
creating index...
index created!
>>> len(coco_train.dataset['categories'])
80
>>> len(coco_train.dataset['images'])
118287
>>> len(coco_train.dataset['annotations'])
860001
info 字段:數據集介紹說明
info: {
"year": int,
"version": str,
"description": str,
"contributor": str,
"url": str,
"date_created": datetime,
}
images: 圖片的信息,包括圖像名稱、圖像大小,來源等。
{
"license":3,
"file_name":"COCO_val2014_000000391895.jpg",
"coco_url":"http:\/\/mscoco.org\/images\/391895",
"height":360,"width":640,"date_captured":"2013-11-14 11:18:45",
"flickr_url":"http:\/\/farm9.staticflickr.com\/8186\/8119368305_4e622c8349_z.jpg",
"id":391895
},
annotations: 目標的一些標籤信息。
annotation{
"id": int,
"image_id": int,
"category_id": int,
"segmentation": RLE or [polygon],
"area": float,
"bbox": [x,y,width,height],
"iscrowd": 0 or 1,
}
categories:一個包含多個category實例的數組,而category結構體描述以下:
{
"id": int,
"name": str,
"supercategory": str,
}
import os
import json
import numpy as np
import glob
import shutil
from sklearn.model_selection import train_test_split
np.random.seed(41)
#0爲背景
classname_to_id = {"person": 1}
class Lableme2CoCo:
def __init__(self):
self.images = []
self.annotations = []
self.categories = []
self.img_id = 0
self.ann_id = 0
def save_coco_json(self, instance, save_path):
json.dump(instance, open(save_path, 'w', encoding='utf-8'), ensure_ascii=False, indent=1) # indent=2 更加美觀顯示
# 由json文件構建COCO
def to_coco(self, json_path_list):
self._init_categories()
for json_path in json_path_list:
obj = self.read_jsonfile(json_path)
self.images.append(self._image(obj, json_path))
shapes = obj['shapes']
for shape in shapes:
annotation = self._annotation(shape)
self.annotations.append(annotation)
self.ann_id += 1
self.img_id += 1
instance = {}
instance['info'] = 'spytensor created'
instance['license'] = ['license']
instance['images'] = self.images
instance['annotations'] = self.annotations
instance['categories'] = self.categories
return instance
# 構建類別
def _init_categories(self):
for k, v in classname_to_id.items():
category = {}
category['id'] = v
category['name'] = k
self.categories.append(category)
# 構建COCO的image字段
def _image(self, obj, path):
image = {}
from labelme import utils
img_x = utils.img_b64_to_arr(obj['imageData'])
h, w = img_x.shape[:-1]
image['height'] = h
image['width'] = w
image['id'] = self.img_id
image['file_name'] = os.path.basename(path).replace(".json", ".jpg")
return image
# 構建COCO的annotation字段
def _annotation(self, shape):
label = shape['label']
points = shape['points']
annotation = {}
annotation['id'] = self.ann_id
annotation['image_id'] = self.img_id
annotation['category_id'] = int(classname_to_id[label])
annotation['segmentation'] = [np.asarray(points).flatten().tolist()]
annotation['bbox'] = self._get_box(points)
annotation['iscrowd'] = 0
annotation['area'] = 1.0
return annotation
# 讀取json文件,返回一個json對象
def read_jsonfile(self, path):
with open(path, "r", encoding='utf-8') as f:
return json.load(f)
# COCO的格式: [x1,y1,w,h] 對應COCO的bbox格式
def _get_box(self, points):
min_x = min_y = np.inf
max_x = max_y = 0
for x, y in points:
min_x = min(min_x, x)
min_y = min(min_y, y)
max_x = max(max_x, x)
max_y = max(max_y, y)
return [min_x, min_y, max_x - min_x, max_y - min_y]
if __name__ == '__main__':
labelme_path = "labelme/"
saved_coco_path = "./"
# 建立文件
if not os.path.exists("%scoco/annotations/"%saved_coco_path):
os.makedirs("%scoco/annotations/"%saved_coco_path)
if not os.path.exists("%scoco/images/train2017/"%saved_coco_path):
os.makedirs("%scoco/images/train2017"%saved_coco_path)
if not os.path.exists("%scoco/images/val2017/"%saved_coco_path):
os.makedirs("%scoco/images/val2017"%saved_coco_path)
# 獲取images目錄下全部的joson文件列表
json_list_path = glob.glob(labelme_path + "/*.json")
# 數據劃分,這裏沒有區分val2017和tran2017目錄,全部圖片都放在images目錄下
train_path, val_path = train_test_split(json_list_path, test_size=0.12)
print("train_n:", len(train_path), 'val_n:', len(val_path))
# 把訓練集轉化爲COCO的json格式
l2c_train = Lableme2CoCo()
train_instance = l2c_train.to_coco(train_path)
l2c_train.save_coco_json(train_instance, '%scoco/annotations/instances_train2017.json'%saved_coco_path)
for file in train_path:
shutil.copy(file.replace("json","jpg"),"%scoco/images/train2017/"%saved_coco_path)
for file in val_path:
shutil.copy(file.replace("json","jpg"),"%scoco/images/val2017/"%saved_coco_path)
# 把驗證集轉化爲COCO的json格式
l2c_val = Lableme2CoCo()
val_instance = l2c_val.to_coco(val_path)
l2c_val.save_coco_json(val_instance, '%scoco/annotations/instances_val2017.json'%saved_coco_path)
import os
import numpy as np
import codecs
import json
from glob import glob
import cv2
import shutil
from sklearn.model_selection import train_test_split
#1.標籤路徑
labelme_path = "./labelme/" #原始labelme標註數據路徑
saved_path = "./VOCdevkit/VOC2007/" #保存路徑
#2.建立要求文件夾
if not os.path.exists(saved_path + "Annotations"):
os.makedirs(saved_path + "Annotations")
if not os.path.exists(saved_path + "JPEGImages/"):
os.makedirs(saved_path + "JPEGImages/")
if not os.path.exists(saved_path + "ImageSets/Main/"):
os.makedirs(saved_path + "ImageSets/Main/")
#3.獲取待處理文件
files = glob(labelme_path + "*.json")
files = [i.split("/")[-1].split(".json")[0] for i in files]
#4.讀取標註信息並寫入 xml
for json_file_ in files:
json_filename = labelme_path + json_file_ + ".json"
json_file = json.load(open(json_filename,"r",encoding="utf-8"))
height, width, channels = cv2.imread(labelme_path + json_file_ +".jpg").shape
with codecs.open(saved_path + "Annotations/"+json_file_ + ".xml","w","utf-8") as xml:
xml.write('<annotation>\n')
xml.write('\t<folder>' + 'UAV_data' + '</folder>\n')
xml.write('\t<filename>' + json_file_ + ".jpg" + '</filename>\n')
xml.write('\t<source>\n')
xml.write('\t\t<database>The UAV autolanding</database>\n')
xml.write('\t\t<annotation>UAV AutoLanding</annotation>\n')
xml.write('\t\t<image>flickr</image>\n')
xml.write('\t\t<flickrid>NULL</flickrid>\n')
xml.write('\t</source>\n')
xml.write('\t<owner>\n')
xml.write('\t\t<flickrid>NULL</flickrid>\n')
xml.write('\t\t<name>ChaojieZhu</name>\n')
xml.write('\t</owner>\n')
xml.write('\t<size>\n')
xml.write('\t\t<width>'+ str(width) + '</width>\n')
xml.write('\t\t<height>'+ str(height) + '</height>\n')
xml.write('\t\t<depth>' + str(channels) + '</depth>\n')
xml.write('\t</size>\n')
xml.write('\t\t<segmented>0</segmented>\n')
for multi in json_file["shapes"]:
points = np.array(multi["points"])
xmin = min(points[:,0])
xmax = max(points[:,0])
ymin = min(points[:,1])
ymax = max(points[:,1])
label = multi["label"]
if xmax <= xmin:
pass
elif ymax <= ymin:
pass
else:
xml.write('\t<object>\n')
xml.write('\t\t<name>'+label+'</name>\n')
xml.write('\t\t<pose>Unspecified</pose>\n')
xml.write('\t\t<truncated>1</truncated>\n')
xml.write('\t\t<difficult>0</difficult>\n')
xml.write('\t\t<bndbox>\n')
xml.write('\t\t\t<xmin>' + str(xmin) + '</xmin>\n')
xml.write('\t\t\t<ymin>' + str(ymin) + '</ymin>\n')
xml.write('\t\t\t<xmax>' + str(xmax) + '</xmax>\n')
xml.write('\t\t\t<ymax>' + str(ymax) + '</ymax>\n')
xml.write('\t\t</bndbox>\n')
xml.write('\t</object>\n')
print(json_filename,xmin,ymin,xmax,ymax,label)
xml.write('</annotation>')
#5.複製圖片到 VOC2007/JPEGImages/下
image_files = glob(labelme_path + "*.jpg")
print("copy image files to VOC007/JPEGImages/")
for image in image_files:
shutil.copy(image,saved_path +"JPEGImages/")
#6.split files for txt
txtsavepath = saved_path + "ImageSets/Main/"
ftrainval = open(txtsavepath+'/trainval.txt', 'w')
ftest = open(txtsavepath+'/test.txt', 'w')
ftrain = open(txtsavepath+'/train.txt', 'w')
fval = open(txtsavepath+'/val.txt', 'w')
total_files = glob("./VOC2007/Annotations/*.xml")
total_files = [i.split("/")[-1].split(".xml")[0] for i in total_files]
#test_filepath = ""
for file in total_files:
ftrainval.write(file + "\n")
#test
#for file in os.listdir(test_filepath):
# ftest.write(file.split(".jpg")[0] + "\n")
#split
train_files,val_files = train_test_split(total_files,test_size=0.15,random_state=42)
#train
for file in train_files:
ftrain.write(file + "\n")
#val
for file in val_files:
fval.write(file + "\n")
ftrainval.close()
ftrain.close()
fval.close()
#ftest.close()