本文主要對ultralytics\yolov5在訓練時的數據加載模塊的dataset.py代碼進行註釋和解析。固然dataset.py中還有其餘時候(例如detect時)所用到的加載方法(例如LoadImages、LoadWebcam等),本文主要是對訓練時用到的LoadImagesAndLabels類的相關注釋。python
mosaic加強
在這裏要說一下,mosaic數據加強就是將四張圖片拼接在一塊兒傳入網絡訓練,具體能夠查看YOLOV4-mosaic數據加強詳解。(該文章是基於pytorch YOLOV4代碼作的解析)ios
矩形訓練
能夠看到yolov5會對圖片進行填充,填充爲正方形從而傳入網絡進行訓練,能夠看到這裏面有不少冗餘的信息,會讓網絡產生不少無心義的候選框,矩形訓練就是減小這些冗餘信息,減小網絡產生的無心義的框的數量,加快網絡訓練速度。yolov5網絡的總步長爲32,因此其實只要圖片邊長可以整除32就能夠了,不必定徹底須要正方形圖片傳入網絡,矩形訓練就是將圖片填充爲最小的32的倍數邊長,從而減少冗餘信息。
git
值得一提的是,除了矩形訓練,還有矩形推理,也就是在作檢測的時候也這樣填充,從而加快推理速度,減小推理時間。github
import glob import math import os import random import shutil import time from pathlib import Path from threading import Thread import cv2 import numpy as np import torch from PIL import Image, ExifTags from torch.utils.data import Dataset from tqdm import tqdm from utils.utils import xyxy2xywh, xywh2xyxy help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data' img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.dng'] vid_formats = ['.mov', '.avi', '.mp4', '.mpg', '.mpeg', '.m4v', '.wmv', '.mkv'] # Get orientation exif tag for orientation in ExifTags.TAGS.keys(): if ExifTags.TAGS[orientation] == 'Orientation': break # 此函數根據圖片的信息獲取圖片的寬、高信息 def exif_size(img): # Returns exif-corrected PIL size s = img.size # (width, height) try: rotation = dict(img._getexif().items())[orientation] if rotation == 6: # rotation 270 s = (s[1], s[0]) elif rotation == 8: # rotation 90 s = (s[1], s[0]) except: pass return s # 根據LoadImagesAndLabels建立dataloader def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False): """ 參數解析: path:包含圖片路徑的txt文件或者包含圖片的文件夾路徑 imgsz:網絡輸入圖片大小 batch_size: 批次大小 stride:網絡下采樣最大總步長 opt:調用train.py時傳入的參數,這裏主要用到opt.single_cls,是不是單類數據集 hyp:網絡訓練時的一些超參數,包括學習率等,這裏主要用到裏面一些關於數據加強(旋轉、平移等)的係數 augment:是否進行數據加強 cache:是否提早緩存圖片到內存,以便加快訓練速度 pad:設置矩形訓練的shape時進行的填充 rect:是否進行矩形訓練 """ dataset = LoadImagesAndLabels(path, imgsz, batch_size, augment=augment, # augment images hyp=hyp, # augmentation hyperparameters rect=rect, # rectangular training cache_images=cache, single_cls=opt.single_cls, stride=int(stride), pad=pad) batch_size = min(batch_size, len(dataset)) nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, num_workers=nw, pin_memory=True, collate_fn=LoadImagesAndLabels.collate_fn) return dataloader, dataset class LoadImagesAndLabels(Dataset): # for training/testing def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, cache_images=False, single_cls=False, stride=32, pad=0.0): try: f = [] for p in path if isinstance(path, list) else [path]: # 獲取數據集路徑path,包含圖片路徑的txt文件或者包含圖片的文件夾路徑 # 使用pathlib.Path生成與操做系統無關的路徑,由於不一樣操做系統路徑的‘/’會有所不一樣 p = str(Path(p)) # os-agnostic # 獲取數據集路徑的上級父目錄,os.sep爲路徑裏的破折號(不一樣系統路徑破折號不一樣,os.sep根據系統自適應) parent = str(Path(p).parent) + os.sep # 若是路徑path爲包含圖片路徑的txt文件 if os.path.isfile(p): # file with open(p, 'r') as t: # 獲取圖片路徑,更換相對路徑 t = t.read().splitlines() f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path # 若是路徑path爲包含圖片的文件夾路徑 elif os.path.isdir(p): # folder f += glob.iglob(p + os.sep + '*.*') else: raise Exception('%s does not exist' % p) path = p # *.npy dir # 破折號替換爲os.sep,os.path.splitext(x)將文件名與擴展名分開並返回一個列表 self.img_files = [x.replace('/', os.sep) for x in f if os.path.splitext(x)[-1].lower() in img_formats] except Exception as e: raise Exception('Error loading data from %s: %s\nSee %s' % (path, e, help_url)) # 數據集的數量 n = len(self.img_files) assert n > 0, 'No images found in %s. See %s' % (path, help_url) # 獲取batch的索引 bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index # 一個輪次batch的數量 nb = bi[-1] + 1 # number of batches self.n = n # number of images self.batch = bi # batch index of image self.img_size = img_size # 輸入圖片分辨率大小 self.augment = augment # 數據加強 self.hyp = hyp # 超參數 self.image_weights = image_weights # 圖片採樣 self.rect = False if image_weights else rect # 矩形訓練 self.mosaic = self.augment and not self.rect # mosaic數據加強 self.mosaic_border = [-img_size // 2, -img_size // 2] # mosaic加強的邊界 self.stride = stride # 模型下采樣的總步長 # 獲取數據集的標籤 self.label_files = [x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt') for x in self.img_files] # 保存圖片shape的路徑 sp = path.replace('.txt', '') + '.shapes' # shapefile path try: # 若是存在該路徑,則讀取 with open(sp, 'r') as f: # read existing shapefile s = [x.split() for x in f.read().splitlines()] assert len(s) == n, 'Shapefile out of sync' except: # 若是不存在,則讀取圖片shape再保存 s = [exif_size(Image.open(f)) for f in tqdm(self.img_files, desc='Reading image shapes')] np.savetxt(sp, s, fmt='%g') # overwrites existing (if any) self.shapes = np.array(s, dtype=np.float64) # Rectangular Training https://github.com/ultralytics/yolov3/issues/232 # 矩形訓練 if self.rect: # Sort by aspect ratio s = self.shapes # wh ar = s[:, 1] / s[:, 0] # h/w # 獲取根據ar從小到大排序的索引 irect = ar.argsort() # 根據索引排序數據集與標籤路徑、shape、h/w self.img_files = [self.img_files[i] for i in irect] self.label_files = [self.label_files[i] for i in irect] self.shapes = s[irect] # wh ar = ar[irect] # Set training image shapes # 初始化shapes,nb爲一輪批次batch的數量 shapes = [[1, 1]] * nb for i in range(nb): ari = ar[bi == i] mini, maxi = ari.min(), ari.max() # 若是一個batch中最大的h/w小於1,則此batch的shape爲(img_size*maxi, img_size) if maxi < 1: shapes[i] = [maxi, 1] # 若是一個batch中最小的h/w大於1,則此batch的shape爲(img_size, img_size/mini) elif mini > 1: shapes[i] = [1, 1 / mini] self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride # Cache labels # 初始化圖片與標籤,爲緩存圖片、標籤作準備 self.imgs = [None] * n self.labels = [np.zeros((0, 5), dtype=np.float32)] * n # 設置是否建立數據子集、提取目標檢測框作再次分類,labels是否已加載 create_datasubset, extract_bounding_boxes, labels_loaded = False, False, False # 漏掉的標籤數量,找到的標籤數量,空的標籤數量,數據子集的數量,相同的標籤數量 nm, nf, ne, ns, nd = 0, 0, 0, 0, 0 # number missing, found, empty, datasubset, duplicate # 保存labels的numpy文件路徑 np_labels_path = str(Path(self.label_files[0]).parent) + '.npy' # saved labels in *.npy file # 若是存在labels.npy,則直接加載,並設置labels_loaded=True if os.path.isfile(np_labels_path): s = np_labels_path # print string x = np.load(np_labels_path, allow_pickle=True) if len(x) == n: self.labels = x labels_loaded = True else: s = path.replace('images', 'labels') # 對每個標籤文件作處理 pbar = tqdm(self.label_files) for i, file in enumerate(pbar): # 若是labels已經預加載了,直接取出來 if labels_loaded: l = self.labels[i] # np.savetxt(file, l, '%g') # save *.txt from *.npy file else: try: # 讀取標籤txt文件,讀取失敗則nm+1,標籤格式爲:class x y w h with open(file, 'r') as f: l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) except: nm += 1 # print('missing labels for image %s' % self.img_files[i]) # file missing continue if l.shape[0]: # 判斷標籤是否有五列 assert l.shape[1] == 5, '> 5 label columns: %s' % file # 判斷標籤是否所有>=0 assert (l >= 0).all(), 'negative labels: %s' % file # 判斷標籤座標x y w h是否歸一化 assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file # 找出標籤中重複的座標 if np.unique(l, axis=0).shape[0] < l.shape[0]: # duplicate rows nd += 1 # print('WARNING: duplicate rows in %s' % self.label_files[i]) # duplicate rows # 若是數據集只有一個類,設置類別標籤爲0 if single_cls: l[:, 0] = 0 # force dataset into single-class mode self.labels[i] = l nf += 1 # file found # Create subdataset (a smaller dataset) # 建立一個數據子集(默認不調用) if create_datasubset and ns < 1E4: # 建立文件夾 if ns == 0: create_folder(path='./datasubset') os.makedirs('./datasubset/images') exclude_classes = 43 # 保存圖片路徑到本地 if exclude_classes not in l[:, 0]: ns += 1 # shutil.copy(src=self.img_files[i], dst='./datasubset/images/') # copy image with open('./datasubset/images.txt', 'a') as f: f.write(self.img_files[i] + '\n') # Extract object detection boxes for a second stage classifier # 獲取目標框與圖片,並將框從圖片截取下來保存到本地(默認不使用) if extract_bounding_boxes: p = Path(self.img_files[i]) img = cv2.imread(str(p)) h, w = img.shape[:2] for j, x in enumerate(l): f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent, os.sep, os.sep, x[0], j, p.name) if not os.path.exists(Path(f).parent): os.makedirs(Path(f).parent) # make new output folder # 對歸一化的座標乘以w,h b = x[1:] * [w, h, w, h] # box b[2:] = b[2:].max() # rectangle to square b[2:] = b[2:] * 1.3 + 30 # pad # xywh格式轉xyxy b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int) # 修正圖片外的框 b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image b[[1, 3]] = np.clip(b[[1, 3]], 0, h) assert cv2.imwrite(f, img[b[1]:b[3], b[0]:b[2]]), 'Failure extracting classifier boxes' else: # l.shape[0] == 0則爲空的標籤,ne+1 ne += 1 # print('empty labels for image %s' % self.img_files[i]) # file empty # os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i])) # remove # 顯示信息 pbar.desc = 'Caching labels %s (%g found, %g missing, %g empty, %g duplicate, for %g images)' % ( s, nf, nm, ne, nd, n) assert nf > 0 or n == 20288, 'No labels found in %s. See %s' % (os.path.dirname(file) + os.sep, help_url) # 保存labels到本地 if not labels_loaded and n > 1000: print('Saving labels to %s for faster future loading' % np_labels_path) np.save(np_labels_path, self.labels) # save for next time # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM) # 提早緩存圖片到內存中,能夠提高訓練速度 if cache_images: # if training gb = 0 # Gigabytes of cached images pbar = tqdm(range(len(self.img_files)), desc='Caching images') self.img_hw0, self.img_hw = [None] * n, [None] * n for i in pbar: # max 10k images self.imgs[i], self.img_hw0[i], self.img_hw[i] = load_image(self, i) # img, hw_original, hw_resized gb += self.imgs[i].nbytes pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9) # Detect corrupted images https://medium.com/joelthchao/programmatically-detect-corrupted-image-8c1b2006c3d3 # 檢測損壞的圖片文件 detect_corrupted_images = False if detect_corrupted_images: from skimage import io # conda install -c conda-forge scikit-image for file in tqdm(self.img_files, desc='Detecting corrupted images'): try: _ = io.imread(file) except: print('Corrupted image detected: %s' % file) def __len__(self): return len(self.img_files) # def __iter__(self): # self.count = -1 # print('ran dataset iter') # #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF) # return self def __getitem__(self, index): # 若是image_weights,則獲取新的下標 if self.image_weights: # print(index, self.indices[index]) """ self.indices在train.py中設置 要配合着train.py中的如下代碼配合使用 image_weights爲根據標籤中每一個類別的數量設置的圖片採樣權重 若是image_weights=True,則根據圖片採樣權重獲取新的下標 # Update image weights (optional) if dataset.image_weights: w = model.class_weights.cpu().numpy() * (1 - maps) ** 2 # class weights image_weights = labels_to_image_weights(dataset.labels, nc=nc, class_weights=w) dataset.indices = random.choices(range(dataset.n), weights=image_weights, k=dataset.n) # rand weighted idx """ index = self.indices[index] # 超參數 hyp = self.hyp if self.mosaic: # Load mosaic # 使用mosaic數據加強方式加載 img, labels = load_mosaic(self, index) shapes = None else: # Load image # 加載圖片並根據設定的輸入大小與圖片原大小的比例ratio進行resize(未作填充pad到正方形) img, (h0, w0), (h, w) = load_image(self, index) # Letterbox # 若是進行矩形訓練,則獲取每一個batch的輸入圖片的shape shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape # 根據shape對圖片作resize和pad填充,返回resize+pad以後的圖片、縮放因子ratio、填充大小pad # 若是未進行矩形訓練,則只進行pad填充到正方形 img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling # Load labels labels = [] x = self.labels[index] if x.size > 0: # Normalized xywh to pixel xyxy format # 根據pad調整框的標籤座標,並從歸一化的xywh->未歸一化的xyxy labels = x.copy() labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0] labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1] if self.augment: # Augment imagespace if not self.mosaic: # 隨機對圖片進行旋轉,平移,縮放,裁剪 img, labels = random_affine(img, labels, degrees=hyp['degrees'], translate=hyp['translate'], scale=hyp['scale'], shear=hyp['shear']) # Augment colorspace # 隨機改變圖片的色調(H),飽和度(S),亮度(V) augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) # Apply cutouts # if random.random() < 0.9: # labels = cutout(img, labels) nL = len(labels) # number of labels if nL: # 調整框的標籤,xyxy to xywh # convert xyxy to xywh labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # 從新歸一化標籤0 - 1 # Normalize coordinates 0 - 1 labels[:, [2, 4]] /= img.shape[0] # height labels[:, [1, 3]] /= img.shape[1] # width if self.augment: # 圖片隨機左右翻轉 # random left-right flip lr_flip = True if lr_flip and random.random() < 0.5: img = np.fliplr(img) if nL: labels[:, 1] = 1 - labels[:, 1] # random up-down flip # 圖片隨機上下翻轉 ud_flip = False if ud_flip and random.random() < 0.5: img = np.flipud(img) if nL: labels[:, 2] = 1 - labels[:, 2] # 初始化標籤框對應的圖片序號,配合下面的collate_fn使用 labels_out = torch.zeros((nL, 6)) if nL: labels_out[:, 1:] = torch.from_numpy(labels) # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) return torch.from_numpy(img), labels_out, self.img_files[index], shapes """ pytorch的DataLoader打包一個batch的數據集時要通過此函數進行打包 經過重寫此函數實現標籤與圖片對應的劃分,一個batch中哪些標籤屬於哪一張圖片,形如 [[0, 6, 0.5, 0.5, 0.26, 0.35], [0, 6, 0.5, 0.5, 0.26, 0.35], [1, 6, 0.5, 0.5, 0.26, 0.35], [2, 6, 0.5, 0.5, 0.26, 0.35],] 前兩行標籤屬於第一張圖片,第三行屬於第二張。。。 """ @staticmethod def collate_fn(batch): img, label, path, shapes = zip(*batch) # transposed for i, l in enumerate(label): l[:, 0] = i # add target image index for build_targets() return torch.stack(img, 0), torch.cat(label, 0), path, shapes def load_image(self, index): # loads 1 image from dataset, returns img, original hw, resized hw img = self.imgs[index] if img is None: # not cached path = self.img_files[index] img = cv2.imread(path) # BGR assert img is not None, 'Image Not Found ' + path h0, w0 = img.shape[:2] # orig hw r = self.img_size / max(h0, w0) # resize image to img_size # 根據ratio選擇不一樣的插值方式 if r != 1: # always resize down, only resize up if training with augmentation interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp) return img, (h0, w0), img.shape[:2] # img, hw_original, hw_resized else: return self.imgs[index], self.img_hw0[index], self.img_hw[index] # img, hw_original, hw_resized def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5): # 隨機取-1到1三個實數,乘以hyp中的hsv三通道的係數 r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains # 分離通道 hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV)) dtype = img.dtype # uint8 x = np.arange(0, 256, dtype=np.int16) lut_hue = ((x * r[0]) % 180).astype(dtype) lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) lut_val = np.clip(x * r[2], 0, 255).astype(dtype) # 隨機調整hsv以後從新組合通道 img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype) # 將hsv格式轉爲BGR格式 cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed # Histogram equalization # if random.random() < 0.2: # for i in range(3): # img[:, :, i] = cv2.equalizeHist(img[:, :, i]) def load_mosaic(self, index): # loads images in a mosaic labels4 = [] s = self.img_size # 隨機取mosaic中心點 yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y # 隨機取其餘三張圖片的索引 indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)] # 3 additional image indices for i, index in enumerate(indices): # Load image img, _, (h, w) = load_image(self, index) # place img in img4 if i == 0: # top left # 初始化大圖 img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles # 設置大圖上的位置(左上角) x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) # 選取小圖上的位置 x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) elif i == 1: # top right右上角 x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h elif i == 2: # bottom left左下角 x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h) x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, max(xc, w), min(y2a - y1a, h) elif i == 3: # bottom right右下角 x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h) x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) # 將小圖上截取的部分貼到大圖上 img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] # 計算小圖到大圖上時所產生的偏移,用來計算mosaic加強後的標籤框的位置 padw = x1a - x1b padh = y1a - y1b # Labels x = self.labels[index] labels = x.copy() # 從新調整標籤框的位置 if x.size > 0: # Normalized xywh to pixel xyxy format labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padw labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + padh labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padw labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + padh labels4.append(labels) # Concat/clip labels if len(labels4): # 調整標籤框在圖片內部 labels4 = np.concatenate(labels4, 0) # np.clip(labels4[:, 1:] - s / 2, 0, s, out=labels4[:, 1:]) # use with center crop np.clip(labels4[:, 1:], 0, 2 * s, out=labels4[:, 1:]) # use with random_affine # Replicate # img4, labels4 = replicate(img4, labels4) # Augment # img4 = img4[s // 2: int(s * 1.5), s // 2:int(s * 1.5)] # center crop (WARNING, requires box pruning) # print('mosica:', img4.shape) # 進行mosaic的時候將四張圖片整合到一塊兒以後shape爲[2*img_size, 2*img_size] # 對mosaic整合的圖片進行隨機旋轉、平移、縮放、裁剪,並resize爲輸入大小img_size img4, labels4 = random_affine(img4, labels4, degrees=self.hyp['degrees'], translate=self.hyp['translate'], scale=self.hyp['scale'], shear=self.hyp['shear'], border=self.mosaic_border) # border to remove # print('mosica:', img4.shape) return img4, labels4 def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True): # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232 shape = img.shape[:2] # current shape [height, width] if isinstance(new_shape, int): new_shape = (new_shape, new_shape) # Scale ratio (new / old) # 計算縮放因子 r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) """ 縮放(resize)到輸入大小img_size的時候,若是沒有設置上採樣的話,則只進行下采樣 由於上採樣圖片會讓圖片模糊,對訓練不友好影響性能。 """ if not scaleup: # only scale down, do not scale up (for better test mAP) r = min(r, 1.0) # Compute padding ratio = r, r # width, height ratios new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding # 獲取最小的矩形填充 if auto: # minimum rectangle dw, dh = np.mod(dw, 64), np.mod(dh, 64) # wh padding # 若是scaleFill=True,則不進行填充,直接resize成img_size,任由圖片進行拉伸和壓縮 elif scaleFill: # stretch dw, dh = 0.0, 0.0 new_unpad = (new_shape[1], new_shape[0]) ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios # 計算上下左右填充大小 dw /= 2 # divide padding into 2 sides dh /= 2 if shape[::-1] != new_unpad: # resize img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) # 進行填充 img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border return img, ratio, (dw, dh) # import torchvision # torchvision.transforms.RandomAffine def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, border=(0, 0)): # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4 # targets = [cls, xyxy] height = img.shape[0] + border[0] * 2 # shape(h,w,c) width = img.shape[1] + border[1] * 2 # 設置旋轉和縮放的仿射矩陣 # Rotation and Scale R = np.eye(3) a = random.uniform(-degrees, degrees) # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations s = random.uniform(1 - scale, 1 + scale) # s = 2 ** random.uniform(-scale, scale) R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s) # 設置平移的仿射係數 # Translation T = np.eye(3) T[0, 2] = random.uniform(-translate, translate) * img.shape[1] + border[1] # x translation (pixels) T[1, 2] = random.uniform(-translate, translate) * img.shape[0] + border[0] # y translation (pixels) # 設置裁剪的仿射矩陣係數 # Shear S = np.eye(3) S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg) S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg) # Combined rotation matrix # 融合仿射矩陣並做用在圖片上 M = S @ T @ R # ORDER IS IMPORTANT HERE!! if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed img = cv2.warpAffine(img, M[:2], dsize=(width, height), flags=cv2.INTER_LINEAR, borderValue=(114, 114, 114)) # Transform label coordinates # 調整框的標籤 n = len(targets) if n: # warp points xy = np.ones((n * 4, 3)) xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 xy = (xy @ M.T)[:, :2].reshape(n, 8) # create new boxes x = xy[:, [0, 2, 4, 6]] y = xy[:, [1, 3, 5, 7]] xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T # # apply angle-based reduction of bounding boxes # radians = a * math.pi / 180 # reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5 # x = (xy[:, 2] + xy[:, 0]) / 2 # y = (xy[:, 3] + xy[:, 1]) / 2 # w = (xy[:, 2] - xy[:, 0]) * reduction # h = (xy[:, 3] - xy[:, 1]) * reduction # xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T # reject warped points outside of image xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width) xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height) w = xy[:, 2] - xy[:, 0] h = xy[:, 3] - xy[:, 1] area = w * h area0 = (targets[:, 3] - targets[:, 1]) * (targets[:, 4] - targets[:, 2]) ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16)) # aspect ratio i = (w > 2) & (h > 2) & (area / (area0 * s + 1e-16) > 0.2) & (ar < 20) targets = targets[i] targets[:, 1:5] = xy[i] return img, targets def create_folder(path='./new_folder'): # Create folder if os.path.exists(path): shutil.rmtree(path) # delete output folder os.makedirs(path) # make new output folder
以上我根據ultralytics\yolov5的dataset.py代碼,本身的理解,若是有錯,歡迎指正,謝謝。
如今yolov5還在改進,一些代碼隨時會更新,例如train.py等,後續我會更新yolov5其餘代碼的解析和註釋。
緩存