0 引言
本文是以前爲了解決如何將文字貼到圖片上而編寫的代碼,默認是如發票一類的,因此並未考慮透視變換等。且採用的是pygame粘貼方式,以前也嘗試過opencv的seamlessClone粘貼。python
值得注意的是,經過修改參數,增長各類干擾操做(羽化,噪音等等),生成的數據集看似豐富,其實由於內在的數據分佈仍是十分單一,也就是用該數據集去做爲ocr的模型訓練集,獲得的模型仍然沒法在現實生活場景上使用。由於在現實世界中,你光照角度,拍攝角度,打印機用墨等等都是一種變量,而這些變量都會讓現實世界的票據呈現的文字內在數據分佈是十分豐富的。而經過簡單的代碼生成的數據分佈卻並不能覆蓋,或者說重疊其中一部分。故而,經過代碼生成數據集的方式是沒法解決ocr現實數據集不夠的問題的。git
所須要的操做: 1 - 下載colors_new.cp; 2 - 將下面兩份代碼存成對應的get_color.py 和pygame_main.py; 3 - python 運行pygame_main.py便可。github
# get_color.py import cv2 import pickle as cp import numpy as np class ColorSample(object): def __init__(self): '''colors_new.cp來自https://github.com/JarveeLee/SynthText_Chinese_version/tree/master/data/models/colors_new.cp ''' with open('colors_new.cp','rb') as f: self.colorsRGB = cp.load(f,encoding='latin-1') self.ncol = self.colorsRGB.shape[0]#4941 # convert color-means from RGB to LAB for better nearest neighbour # computations: self.colorsLAB = np.r_[self.colorsRGB[:,0:3], self.colorsRGB[:,6:9]].astype('uint8') self.colorsLAB = np.squeeze(cv2.cvtColor(self.colorsLAB[None,:,:],cv2.COLOR_RGB2Lab)) def sample_normal(self, col_mean, col_std): """ sample from a normal distribution centered around COL_MEAN with standard deviation = COL_STD. """ col_sample = col_mean + col_std * np.random.randn() return np.clip(col_sample, 0, 255).astype('uint8') def sample_from_data(self,bg_mat): """ bg_mat : this is a nxmx3 RGB image. returns a tuple : (RGB_foreground, RGB_background) each of these is a 3-vector. """ bg_orig = bg_mat.copy() bg_mat = cv2.cvtColor(bg_mat, cv2.COLOR_RGB2Lab) bg_mat = np.reshape(bg_mat, (np.prod(bg_mat.shape[:2]),3)) bg_mean = np.mean(bg_mat,axis=0) norms = np.linalg.norm(self.colorsLAB-bg_mean[None,:], axis=1) # choose a random color amongst the top 3 closest matches: #nn = np.random.choice(np.argsort(norms)[:3]) nn = np.argmin(norms) ## nearest neighbour color: data_col = self.colorsRGB[np.mod(nn,self.ncol),:] col1 = self.sample_normal(data_col[:3],data_col[3:6]) col2 = self.sample_normal(data_col[6:9],data_col[9:12]) if nn < self.ncol: return (col2, col1) else: # need to swap to make the second color close to the input backgroun color return (col1, col2) if __name__ =='__main__': fg_col,bg_col = sample_from_data(bgi)
# -*- coding: utf-8 -*- #pygame replace Image import os import cv2 import glob import math import random import numpy as np import os.path as osp from xml.dom.minidom import Document import multiprocessing as mp import logging from PIL import Image,ImageDraw,ImageFont import secrets import pygame from pygame.locals import * from pygame import freetype import get_color resultImgsDir = '/home/result_imgs' # 生成的圖片存放位置 resultXmlDir = '/home/result_xmls' # 生產的xml存放位置 bgiDir = '/home/background_images' # 添加背景圖片 gTtf= '/home/ttfs' # 添加字體庫 totalFile = '/home/zzc/data/synth_recepit_text/result_200.txt' # 所須要添加的文字,一行一句(或者一行一個單詞) FORMAT = '%(asctime)-15s [%(processName)s] %(message)s' logging.basicConfig(format = FORMAT) gBlockSize = 20 #每個進程一次處理的句子 ttfSize = [28,30,35,40,45,50,55,60,65] #====test #charset = [line.strip().split('\t')[1] for line in open('text/chars_gb2312').readlines()[:-1]] def _addSaltNoise(block,level = 10): '''添加椒鹽噪聲 ''' ran = np.random.randint(0,level,block.shape) salt = ran == 0 pepper = ran == level block[salt]= 0 block[pepper] = 255 return block def _addNoise(block,below=4,high =20): ''' 添加噪聲''' randValue = np.random.randn(*block.shape)*np.random.randint(below,high) block = block+randValue block[block<0] = 0.0 block[block>255] = 255.0 block = block.astype('uint8') return block def _feather(block, height): ''' 對圖片進行羽化''' # determine the gaussian-blur std: if height <= 30 : bsz = 0.25 ksz=1 elif 30 < height < 50: bsz = max(0.30, 0.5 + 0.1*np.random.randn()) ksz = 3 else: bsz = max(0.5, 1.5 + 0.5*np.random.randn()) ksz = 5#np.random.choice([1,3,5])#5 return cv2.GaussianBlur(block,(ksz,ksz),bsz) def _seamlessClone(obj,dst,center): ''' 進行前背景合成''' mask = 255 * np.ones(obj.shape, obj.dtype) #print('obj,shape:',obj.shape,' dst.shape:',dst.shape,' center:',center) try: mixed_clone = cv2.seamlessClone(obj, dst, mask, center, cv2.MIXED_CLONE) except Exception as e: print('exception:',obj.shape,dst.shape,mask.shape,center) raise e return mixed_clone def _rander(bgiGame,string,rowStart,font,get_color): ''' 進行渲染''' isFailed = False width, height = bgiGame.get_size() '''sample the color ''' bgiNp = pygame.surfarray.array3d(bgiGame) fg_col,bg_col = get_color.sample_from_data(bgiNp) #fg_col = fg_col + np.random.randint(-3,3,[1,3]) fg_col = fg_col.squeeze() '''change the property of font ''' font.oblique = secrets.choice([False,True]) font.rotation = secrets.choice(range(-5,5)) test = font.render(string) txtwidth,txtheight = test[1].size if width-txtwidth < 0: isFailed = True colStart = secrets.randbelow(max(1, width-txtwidth)) if rowStart+txtheight > height or colStart+txtwidth>width or isFailed: return bgiGame,rowStart,0,0,0 '''render the text ''' try: font.render_to(bgiGame,(colStart,rowStart), string, fg_col) except: print('fg_col',fg_col) '''surface 2 numpy ''' bgiNp = pygame.surfarray.array3d(bgiGame) bgiNp = cv2.cvtColor(bgiNp.transpose([1,0,2]),cv2.COLOR_RGB2BGR) '''add noise and blur ''' block = bgiNp[rowStart:rowStart+txtheight,colStart:colStart+txtwidth,:] block = _addNoise(block,4,20) if secrets.choice(range(4))==0: block = _addSaltNoise(block,np.random.randint(70,80)) block = _feather(block,txtheight) block = _addNoise(block,2,20) if secrets.choice(range(4))==0: block = _addSaltNoise(block,np.random.randint(70,80)) #===== bgiNp[rowStart:rowStart+txtheight,colStart:colStart+txtwidth,:] = block '''numpy 2 surface ''' bgiNp = cv2.cvtColor(bgiNp.transpose([1,0,2]),cv2.COLOR_BGR2RGB) bgiGame = pygame.surfarray.make_surface(bgiNp) return bgiGame,rowStart,colStart,txtwidth,txtheight ''' bgi = _seamlessClone(block,bgi,center) return bgi ''' def _paste(bgiGame,ttf,size,rowStart,curText,cols,get_color): #ttfont = ImageFont.truetype(ttf,size) ttfont = freetype.Font(ttf,size) curText = curText.strip() '''random the digit ''' numberLength = 10 digits = ['0','1','2','3','4','5','6','7','8','9'] if secrets.randbelow(numberLength) == 0: #curText = ''.join([str(random.randint(0,9)) for _ in range(shouldMaxNumTxt)]) curText = ''.join([secrets.choice(digits) for _ in range(numberLength)] ) string = curText '''random the dot ''' if secrets.randbelow(numberLength-2) == 0: dotInd = random.randint(1,numberLength-2) string = curText[:dotInd]+'.'+curText[dotInd+1:] else: string = curText '''若是maxNumText小於10,則跳過 ''' numText = len(string) if numText != numberLength: string = '' return None,None,None,None bgiGame,rowStart,colStart,txtwidth,txtheight = _rander(bgiGame,string,rowStart,ttfont,get_color) return bgiGame,string,rowStart,colStart,txtwidth,txtheight def _xml(doc,anno,string,xminT,yminT,xmaxT,ymaxT): ''' 生成對應的xml''' if not string: return body = doc.createElement('object') anno.appendChild(body) name = doc.createElement('name') nameText = doc.createTextNode('text') name.appendChild(nameText) body.appendChild(name) content = doc.createElement('textContent') contentText = doc.createTextNode(string) content.appendChild(contentText) body.appendChild(content) bndbox = doc.createElement('bndbox') xmin = doc.createElement('xmin') ymin = doc.createElement('ymin') xmax = doc.createElement('xmax') ymax = doc.createElement('ymax') xminText = doc.createTextNode(str(xminT)) yminText = doc.createTextNode(str(yminT)) xmaxText = doc.createTextNode(str(xmaxT)) ymaxText = doc.createTextNode(str(ymaxT)) xmin.appendChild(xminText) ymin.appendChild(yminText) xmax.appendChild(xmaxText) ymax.appendChild(ymaxText) bndbox.appendChild(xmin) bndbox.appendChild(ymin) bndbox.appendChild(xmax) bndbox.appendChild(ymax) body.appendChild(bndbox) def paste(imgname,bgi,text,ttf,get_color): pygame.init() bgiGame = pygame.image.load(bgi) width,height = bgiGame.get_size() depth = bgiGame.get_bitsize()//8 # 選擇當前行的間距 curRow = 0 curRowInter = random.randint(3,7) curRow += curRowInter # 隨機選擇字體大小 curTtfSize = random.choice(ttfSize) # 建立xml的文件頭 doc = Document() anno = doc.createElement('Annotations') doc.appendChild(anno) imgNameNode = doc.createElement('imgName') imgNameNode.appendChild(doc.createTextNode(imgname)) anno.appendChild(imgNameNode) sizeNode = doc.createElement('size') widthNode = doc.createElement('width') widthNode.appendChild(doc.createTextNode(str(width))) sizeNode.appendChild(widthNode) heightNode = doc.createElement('height') heightNode.appendChild(doc.createTextNode(str(height))) sizeNode.appendChild(heightNode) depthNode = doc.createElement('depth') depthNode.appendChild(doc.createTextNode(str(depth))) sizeNode.appendChild(depthNode) anno.appendChild(sizeNode) # 循環的一行一行去將文字粘貼到對應的圖片上 curCol = 0; numTextDone = 0 while curRow+curTtfSize <= width: # cur col point # cur row point '''paste the text on bgiGame ''' if curRow+curTtfSize <= width: # if curcols is bigger than 0.9*cols,then do not paste the line curText = secrets.choice(text) bgiGame,string,curRow,colStart,txtwidth,txtheight = _paste(bgiGame,ttf,curTtfSize,curRow,curText,width,get_color) if not string: continue numTextDone += 1 _xml(doc,anno,string,xminT = colStart,yminT = curRow,xmaxT = colStart+txtwidth,ymaxT = curRow+txtheight) curRow += txtheight curRow += curRowInter # cur intervel curRowInter = random.randint(3,6) # cur ttf size curTtfSize = random.choice(ttfSize) bgi = pygame.surfarray.array3d(bgiGame).transpose([1,0,2]) bgi = cv2.cvtColor(bgi,cv2.COLOR_RGB2BGR) return bgi, doc, numTextDone def handle(indTexts): ind, texts = indTexts # 獲取進程號 pid = os.getpid() # 隨機獲取顏色 getcolor = get_color.ColorSample() bgis = glob.glob( osp.join(bgiDir, '*.jpg') ) # 隨機選擇當前一張背景圖 bgipath = random.choice(bgis) # 隨機獲取字體 ttf = random.choice(ttfs) # 調用paste函數進行操做 imgname = 'bgi{}_ind{}_pid{}_ttf{}.jpg'.format(osp.basename(bgipath),ind,pid,osp.basename(ttf)) bgiNp,doc,numTextDone = paste(imgname,bgipath,texts,ttf,getcolor) imgnamep = 'bgi{}_ind{}_{}Of{}_ttf{}.jpg'.format(osp.basename(bgipath),ind,numTextDone,len(texts),osp.basename(ttf)) logging.warn(imgnamep) # 將圖片和xml寫入到對應位置 cv2.imwrite(osp.join(resultImgsDir,imgname),bgiNp) xmlFileName = osp.join(resultXmlDir,'{}.xml'.format(imgname[:-4])) with open(xmlFileName, "w") as fxml: fxml.write(str(doc.toprettyxml(indent = " ", newl = "\n", encoding = "utf-8"),encoding = 'utf-8')) pygame.quit() return if __name__ == '__main__': ''' 1 - 先讀取文字行,而後按照進程個數進行劃分''' total = [line.strip() for line in open(totalFile)] numP = 30 totalSP = [] inter = math.ceil(len(total)/gBlockSize) for i in range(inter): totalSP.append(total[i::inter]) '''2 - 開啓多進程進行處理 ''' print('begin',len(totalSP)) p = mp.Pool(numP) p.map(handle, enumerate(totalSP))
結果如圖:app
<center/>![](https://img2018.cnblogs.com/blog/441382/201811/441382-20181115110620683-1752088276.png) </center> <center/>bgi39.jpg_ind0_pid8387_ttf銳字工房雲字庫小標宋GBK.ttf.jpg </center>less
<center/> ![](https://img2018.cnblogs.com/blog/441382/201811/441382-20181115110517430-2110781087.png)</center> <center/> bgi39.jpg_ind0_pid8387_ttf銳字工房雲字庫小標宋GBK.ttf.xml</center>dom
下面是採用PIL和opencv的seamlessClone粘貼方式,只是PIL這個包進行文字粘貼的時候,不支持文字旋轉,且簡單的文字粘貼,好像pygame的結果和seamlessClone效果差很少。函數
# -*- coding: utf-8 -*- import os import cv2 import glob import math import random import numpy as np import os.path as osp from xml.dom.minidom import Document import multiprocessing as mp import logging from PIL import Image,ImageDraw,ImageFont import pygame from pygame.locals import * from pygame import freetype import get_color resultImgsDir = 'crnn_result_imgs1' resultXmlDir = 'crnn_result_xmls1' bgiDir = 'bgi' gTtf= 'ttfs' totalFile = 'texts.txt' FORMAT = '%(asctime)-15s [%(processName)s] %(message)s' logging.basicConfig(format = FORMAT) gBlockSize = 20#num of each process's sentences ttfSize = [28,30,35,40,45,50,55,60,65] def _addSaltNoise(block,level = 10): ran = np.random.randint(0,level,block.shape) salt = ran == 0 pepper = ran == level block[salt]= 0 block[pepper] = 255 return block def _addNoise(block): randValue = np.random.randn(*block.shape)*np.random.randint(2,20) block = block+randValue block[block<0] = 0.0 block[block>255] = 255.0 block = block.astype('uint8') return block def _feather(block, height): # determine the gaussian-blur std: if height <= 30 : bsz = 0.25 ksz=1 elif 30 < height < 50: bsz = max(0.30, 0.5 + 0.1*np.random.randn()) ksz = 3 else: bsz = max(0.5, 1.5 + 0.5*np.random.randn()) ksz = 5#np.random.choice([1,3,5])#5 return cv2.GaussianBlur(block,(ksz,ksz),bsz) def _seamlessClone(obj,dst,center): mask = 255 * np.ones(obj.shape, obj.dtype) #print('obj,shape:',obj.shape,' dst.shape:',dst.shape,' center:',center) try: mixed_clone = cv2.seamlessClone(obj, dst, mask, center, cv2.MIXED_CLONE) except Exception as e: print('exception:',obj.shape,dst.shape,mask.shape,center) raise e return mixed_clone def _rander(rawbgi,string,bgr,point,font,get_color): bgi = Image.fromarray(rawbgi) draw = ImageDraw.Draw(bgi) curCol,curRow = point fg_col,bg_col = get_color.sample_from_data(rawbgi) fg_col = fg_col + np.random.randint(-3,3,[1,3]) draw.text((curCol,curRow),string, tuple(fg_col.squeeze()), font=font) width,height = font.getsize(string) region = curCol,curRow,curCol+width,curRow+height bgi = np.array(bgi) block = bgi[curRow:curRow+height,curCol:curCol+width,:] block = _addNoise(block) block = _feather(block,height) block = _addNoise(block) block = _addSaltNoise(block,50) #===== # bgi[curRow:curRow+height,curCol:curCol+width,:] = block # return bgi #cv2.imwrite('/home/zzc/tmp111.jpg',block) center = (curCol+curCol+width)//2,(curRow+curRow+height)//2 # width, height, channels = bgi.shape # center = height//2,width//2 bgi = _seamlessClone(block,bgi,center) return bgi def _paste(bgi,ttf,size,curRow,curCol,curText,cols,get_color): ttfont = ImageFont.truetype(ttf,size) maxNumText = math.floor((cols-curCol)/size) curText = curText.strip() '''random the digit ''' shouldMaxNumTxt = 10 if random.randint(0,9)==9 and maxNumText >= 4: curText = ''.join([str(random.randint(0,9)) for _ in range(shouldMaxNumTxt)]) string = curText '''random the dot ''' if random.randint(0,7)==7: dotInd = random.randint(1,shouldMaxNumTxt-2) string = curText[:dotInd]+'.'+curText[dotInd+1:] else: startInd = random.randint(0,max(0,len(curText)-shouldMaxNumTxt-1)) string = curText[startInd:startInd+shouldMaxNumTxt].strip() string= curText '''若是maxNumText小於10,則跳過 ''' if maxNumText < 10 or len(curText)<10: string = '' numText = len(string) if numText == 10 : bgr = [random.randint(100,254) for i in range(3)] bgi = _rander(bgi,string,bgr,(curCol,curRow),ttfont,get_color) else: string = '' #===== '''get printed width height ''' width,height = ttfont.getsize(string) return bgi,string,width,height def _xml(doc,anno,string,xminT,yminT,xmaxT,ymaxT): if not string: return body = doc.createElement('object') anno.appendChild(body) name = doc.createElement('name') nameText = doc.createTextNode('text') name.appendChild(nameText) body.appendChild(name) content = doc.createElement('textContent') contentText = doc.createTextNode(string) content.appendChild(contentText) body.appendChild(content) bndbox = doc.createElement('bndbox') xmin = doc.createElement('xmin') ymin = doc.createElement('ymin') xmax = doc.createElement('xmax') ymax = doc.createElement('ymax') xminText = doc.createTextNode(str(xminT)) yminText = doc.createTextNode(str(yminT)) xmaxText = doc.createTextNode(str(xmaxT)) ymaxText = doc.createTextNode(str(ymaxT)) xmin.appendChild(xminText) ymin.appendChild(yminText) xmax.appendChild(xmaxText) ymax.appendChild(ymaxText) bndbox.appendChild(xmin) bndbox.appendChild(ymin) bndbox.appendChild(xmax) bndbox.appendChild(ymax) body.appendChild(bndbox) def paste(imgname,bgi,text,ttf,ttfRandom,get_color): bgi = cv2.imread(bgi) rows,cols,depth = bgi.shape # bgi = Image.fromarray(bgi) # draw = ImageDraw.Draw(bgi) curRow = 0 curRowInter = random.randint(3,7) curRow += curRowInter curTtfSize = random.randint(0,len(ttfRandom)-1) #create the xml head doc = Document() anno = doc.createElement('Annotations') doc.appendChild(anno) imgNameNode = doc.createElement('imgName') imgNameNode.appendChild(doc.createTextNode(imgname)) anno.appendChild(imgNameNode) height,width,depth = rows,cols,depth sizeNode = doc.createElement('size') widthNode = doc.createElement('width') widthNode.appendChild(doc.createTextNode(str(width))) sizeNode.appendChild(widthNode) heightNode = doc.createElement('height') heightNode.appendChild(doc.createTextNode(str(height))) sizeNode.appendChild(heightNode) depthNode = doc.createElement('depth') depthNode.appendChild(doc.createTextNode(str(depth))) sizeNode.appendChild(depthNode) anno.appendChild(sizeNode) while curRow + ttfRandom[curTtfSize] <=rows: #cur col point curCol = random.randint(0,cols-1) #cur row point '''paste the text on bgi ''' if curCol < cols*0.9 and curRow+ttfRandom[curTtfSize] <= rows: #if curcols is bigger than 0.9*cols,then do not paste the line curText = text[random.randint(0,len(text)-1)] bgi,string,width,height = _paste(bgi,ttf,ttfRandom[curTtfSize],curRow,curCol,curText,cols,get_color) if not string: continue _xml(doc,anno,string,xminT = curCol,yminT = curRow,xmaxT = curCol+width,ymaxT = curRow+height) curRow += curRowInter curRow += ttfRandom[curTtfSize] #cur intervel curRowInter = random.randint(3,7) #cur ttf size curTtfSize = random.randint(0,len(ttfRandom)-1) return np.array(bgi), doc def handle(text): ind, text = text #pid pid = os.getpid() #background image getcolor = get_color.ColorSample() bgis = glob.glob( osp.join(bgiDir,'*.jpg') ) #select one background image curBgi = random.randint(0,len(bgis)-1) bgi = bgis[curBgi] #ttf ttfs = glob.glob(osp.join(gTtf,'*.ttf')) curTtf = random.randint(0,len(ttfs)-1) ttf = ttfs[curTtf] #ttf size random ttfRandom = [1]+[ random.randint(0,1) for i in range(len(ttfSize)-1)] ttfRandom = [ran*size for ran,size in zip(ttfRandom, ttfSize)] ttfRandom = [i for i in ttfRandom if i != 0] imgname = '{}_{}_{}.jpg'.format(ind,pid,curTtf) bgi,doc = paste(imgname,bgi,text,ttf,ttfRandom,getcolor) cv2.imwrite(osp.join(resultImgsDir,imgname),bgi) xmlFileName = osp.join(resultXmlDir,'{}.xml'.format(imgname[:-4])) with open(xmlFileName, "w") as fxml: fxml.write(str(doc.toprettyxml(indent = " ", newl = "\n", encoding = "utf-8"),encoding = 'utf-8')) logging.warn('{}'.format(ind)) return if __name__ == '__main__': total = [line.strip() for line in open(totalFile)] numP = 30 totalSP = [] inter = math.ceil(len(total)/gBlockSize) for i in range(inter): totalSP.append(total[i::inter]) print('begin') p = mp.Pool(numP) p.map(handle, enumerate(totalSP[:1000]))