0 引言
所須要的操做: 1 - 下載colors_new.cp; 2 - 將下面兩份代碼存成對應的get_color.py 和pygame_main.py; 3 - python 運行pygame_main.py便可。github
# get_color.py import cv2 import pickle as cp import numpy as np class ColorSample(object): def __init__(self): '''colors_new.cp來自https://github.com/JarveeLee/SynthText_Chinese_version/tree/master/data/models/colors_new.cp ''' with open('colors_new.cp','rb') as f: self.colorsRGB = cp.load(f,encoding='latin-1') self.ncol = self.colorsRGB.shape[0]#4941 # convert color-means from RGB to LAB for better nearest neighbour # computations: self.colorsLAB = np.r_[self.colorsRGB[:,0:3], self.colorsRGB[:,6:9]].astype('uint8') self.colorsLAB = np.squeeze(cv2.cvtColor(self.colorsLAB[None,:,:],cv2.COLOR_RGB2Lab)) def sample_normal(self, col_mean, col_std): """ sample from a normal distribution centered around COL_MEAN with standard deviation = COL_STD. """ col_sample = col_mean + col_std * np.random.randn() return np.clip(col_sample, 0, 255).astype('uint8') def sample_from_data(self,bg_mat): """ bg_mat : this is a nxmx3 RGB image. returns a tuple : (RGB_foreground, RGB_background) each of these is a 3-vector. """ bg_orig = bg_mat.copy() bg_mat = cv2.cvtColor(bg_mat, cv2.COLOR_RGB2Lab) bg_mat = np.reshape(bg_mat, (np.prod(bg_mat.shape[:2]),3)) bg_mean = np.mean(bg_mat,axis=0) norms = np.linalg.norm(self.colorsLAB-bg_mean[None,:], axis=1) # choose a random color amongst the top 3 closest matches: #nn = np.random.choice(np.argsort(norms)[:3]) nn = np.argmin(norms) ## nearest neighbour color: data_col = self.colorsRGB[np.mod(nn,self.ncol),:] col1 = self.sample_normal(data_col[:3],data_col[3:6]) col2 = self.sample_normal(data_col[6:9],data_col[9:12]) if nn < self.ncol: return (col2, col1) else: # need to swap to make the second color close to the input backgroun color return (col1, col2) if __name__ =='__main__': fg_col,bg_col = sample_from_data(bgi)
# -*- coding: utf-8 -*- #pygame replace Image import os import cv2 import glob import math import random import numpy as np import os.path as osp from xml.dom.minidom import Document import multiprocessing as mp import logging from PIL import Image,ImageDraw,ImageFont import secrets import pygame from pygame.locals import * from pygame import freetype import get_color resultImgsDir = '/home/result_imgs' # 生成的圖片存放位置 resultXmlDir = '/home/result_xmls' # 生產的xml存放位置 bgiDir = '/home/background_images' # 添加背景圖片 gTtf= '/home/ttfs' # 添加字體庫 totalFile = '/home/zzc/data/synth_recepit_text/result_200.txt' # 所須要添加的文字,一行一句(或者一行一個單詞) FORMAT = '%(asctime)-15s [%(processName)s] %(message)s' logging.basicConfig(format = FORMAT) gBlockSize = 20 #每個進程一次處理的句子 ttfSize = [28,30,35,40,45,50,55,60,65] #====test #charset = [line.strip().split('\t')[1] for line in open('text/chars_gb2312').readlines()[:-1]] def _addSaltNoise(block,level = 10): '''添加椒鹽噪聲 ''' ran = np.random.randint(0,level,block.shape) salt = ran == 0 pepper = ran == level block[salt]= 0 block[pepper] = 255 return block def _addNoise(block,below=4,high =20): ''' 添加噪聲''' randValue = np.random.randn(*block.shape)*np.random.randint(below,high) block = block+randValue block[block<0] = 0.0 block[block>255] = 255.0 block = block.astype('uint8') return block def _feather(block, height): ''' 對圖片進行羽化''' # determine the gaussian-blur std: if height <= 30 : bsz = 0.25 ksz=1 elif 30 < height < 50: bsz = max(0.30, 0.5 + 0.1*np.random.randn()) ksz = 3 else: bsz = max(0.5, 1.5 + 0.5*np.random.randn()) ksz = 5#np.random.choice([1,3,5])#5 return cv2.GaussianBlur(block,(ksz,ksz),bsz) def _seamlessClone(obj,dst,center): ''' 進行前背景合成''' mask = 255 * np.ones(obj.shape, obj.dtype) #print('obj,shape:',obj.shape,' dst.shape:',dst.shape,' center:',center) try: mixed_clone = cv2.seamlessClone(obj, dst, mask, center, cv2.MIXED_CLONE) except Exception as e: print('exception:',obj.shape,dst.shape,mask.shape,center) raise e return mixed_clone def _rander(bgiGame,string,rowStart,font,get_color): ''' 進行渲染''' isFailed = False width, height = bgiGame.get_size() '''sample the color ''' bgiNp = pygame.surfarray.array3d(bgiGame) fg_col,bg_col = get_color.sample_from_data(bgiNp) #fg_col = fg_col + np.random.randint(-3,3,[1,3]) fg_col = fg_col.squeeze() '''change the property of font ''' font.oblique = secrets.choice([False,True]) font.rotation = secrets.choice(range(-5,5)) test = font.render(string) txtwidth,txtheight = test[1].size if width-txtwidth < 0: isFailed = True colStart = secrets.randbelow(max(1, width-txtwidth)) if rowStart+txtheight > height or colStart+txtwidth>width or isFailed: return bgiGame,rowStart,0,0,0 '''render the text ''' try: font.render_to(bgiGame,(colStart,rowStart), string, fg_col) except: print('fg_col',fg_col) '''surface 2 numpy ''' bgiNp = pygame.surfarray.array3d(bgiGame) bgiNp = cv2.cvtColor(bgiNp.transpose([1,0,2]),cv2.COLOR_RGB2BGR) '''add noise and blur ''' block = bgiNp[rowStart:rowStart+txtheight,colStart:colStart+txtwidth,:] block = _addNoise(block,4,20) if secrets.choice(range(4))==0: block = _addSaltNoise(block,np.random.randint(70,80)) block = _feather(block,txtheight) block = _addNoise(block,2,20) if secrets.choice(range(4))==0: block = _addSaltNoise(block,np.random.randint(70,80)) #===== bgiNp[rowStart:rowStart+txtheight,colStart:colStart+txtwidth,:] = block '''numpy 2 surface ''' bgiNp = cv2.cvtColor(bgiNp.transpose([1,0,2]),cv2.COLOR_BGR2RGB) bgiGame = pygame.surfarray.make_surface(bgiNp) return bgiGame,rowStart,colStart,txtwidth,txtheight ''' bgi = _seamlessClone(block,bgi,center) return bgi ''' def _paste(bgiGame,ttf,size,rowStart,curText,cols,get_color): #ttfont = ImageFont.truetype(ttf,size) ttfont = freetype.Font(ttf,size) curText = curText.strip() '''random the digit ''' numberLength = 10 digits = ['0','1','2','3','4','5','6','7','8','9'] if secrets.randbelow(numberLength) == 0: #curText = ''.join([str(random.randint(0,9)) for _ in range(shouldMaxNumTxt)]) curText = ''.join([secrets.choice(digits) for _ in range(numberLength)] ) string = curText '''random the dot ''' if secrets.randbelow(numberLength-2) == 0: dotInd = random.randint(1,numberLength-2) string = curText[:dotInd]+'.'+curText[dotInd+1:] else: string = curText '''若是maxNumText小於10,則跳過 ''' numText = len(string) if numText != numberLength: string = '' return None,None,None,None bgiGame,rowStart,colStart,txtwidth,txtheight = _rander(bgiGame,string,rowStart,ttfont,get_color) return bgiGame,string,rowStart,colStart,txtwidth,txtheight def _xml(doc,anno,string,xminT,yminT,xmaxT,ymaxT): ''' 生成對應的xml''' if not string: return body = doc.createElement('object') anno.appendChild(body) name = doc.createElement('name') nameText = doc.createTextNode('text') name.appendChild(nameText) body.appendChild(name) content = doc.createElement('textContent') contentText = doc.createTextNode(string) content.appendChild(contentText) body.appendChild(content) bndbox = doc.createElement('bndbox') xmin = doc.createElement('xmin') ymin = doc.createElement('ymin') xmax = doc.createElement('xmax') ymax = doc.createElement('ymax') xminText = doc.createTextNode(str(xminT)) yminText = doc.createTextNode(str(yminT)) xmaxText = doc.createTextNode(str(xmaxT)) ymaxText = doc.createTextNode(str(ymaxT)) xmin.appendChild(xminText) ymin.appendChild(yminText) xmax.appendChild(xmaxText) ymax.appendChild(ymaxText) bndbox.appendChild(xmin) bndbox.appendChild(ymin) bndbox.appendChild(xmax) bndbox.appendChild(ymax) body.appendChild(bndbox) def paste(imgname,bgi,text,ttf,get_color): pygame.init() bgiGame = pygame.image.load(bgi) width,height = bgiGame.get_size() depth = bgiGame.get_bitsize()//8 # 選擇當前行的間距 curRow = 0 curRowInter = random.randint(3,7) curRow += curRowInter # 隨機選擇字體大小 curTtfSize = random.choice(ttfSize) # 建立xml的文件頭 doc = Document() anno = doc.createElement('Annotations') doc.appendChild(anno) imgNameNode = doc.createElement('imgName') imgNameNode.appendChild(doc.createTextNode(imgname)) anno.appendChild(imgNameNode) sizeNode = doc.createElement('size') widthNode = doc.createElement('width') widthNode.appendChild(doc.createTextNode(str(width))) sizeNode.appendChild(widthNode) heightNode = doc.createElement('height') heightNode.appendChild(doc.createTextNode(str(height))) sizeNode.appendChild(heightNode) depthNode = doc.createElement('depth') depthNode.appendChild(doc.createTextNode(str(depth))) sizeNode.appendChild(depthNode) anno.appendChild(sizeNode) # 循環的一行一行去將文字粘貼到對應的圖片上 curCol = 0; numTextDone = 0 while curRow+curTtfSize <= width: # cur col point # cur row point '''paste the text on bgiGame ''' if curRow+curTtfSize <= width: # if curcols is bigger than 0.9*cols,then do not paste the line curText = secrets.choice(text) bgiGame,string,curRow,colStart,txtwidth,txtheight = _paste(bgiGame,ttf,curTtfSize,curRow,curText,width,get_color) if not string: continue numTextDone += 1 _xml(doc,anno,string,xminT = colStart,yminT = curRow,xmaxT = colStart+txtwidth,ymaxT = curRow+txtheight) curRow += txtheight curRow += curRowInter # cur intervel curRowInter = random.randint(3,6) # cur ttf size curTtfSize = random.choice(ttfSize) bgi = pygame.surfarray.array3d(bgiGame).transpose([1,0,2]) bgi = cv2.cvtColor(bgi,cv2.COLOR_RGB2BGR) return bgi, doc, numTextDone def handle(indTexts): ind, texts = indTexts # 獲取進程號 pid = os.getpid() # 隨機獲取顏色 getcolor = get_color.ColorSample() bgis = glob.glob( osp.join(bgiDir, '*.jpg') ) # 隨機選擇當前一張背景圖 bgipath = random.choice(bgis) # 隨機獲取字體 ttf = random.choice(ttfs) # 調用paste函數進行操做 imgname = 'bgi{}_ind{}_pid{}_ttf{}.jpg'.format(osp.basename(bgipath),ind,pid,osp.basename(ttf)) bgiNp,doc,numTextDone = paste(imgname,bgipath,texts,ttf,getcolor) imgnamep = 'bgi{}_ind{}_{}Of{}_ttf{}.jpg'.format(osp.basename(bgipath),ind,numTextDone,len(texts),osp.basename(ttf)) logging.warn(imgnamep) # 將圖片和xml寫入到對應位置 cv2.imwrite(osp.join(resultImgsDir,imgname),bgiNp) xmlFileName = osp.join(resultXmlDir,'{}.xml'.format(imgname[:-4])) with open(xmlFileName, "w") as fxml: fxml.write(str(doc.toprettyxml(indent = " ", newl = "\n", encoding = "utf-8"),encoding = 'utf-8')) pygame.quit() return if __name__ == '__main__': ''' 1 - 先讀取文字行,而後按照進程個數進行劃分''' total = [line.strip() for line in open(totalFile)] numP = 30 totalSP = [] inter = math.ceil(len(total)/gBlockSize) for i in range(inter): totalSP.append(total[i::inter]) '''2 - 開啓多進程進行處理 ''' print('begin',len(totalSP)) p = mp.Pool(numP) p.map(handle, enumerate(totalSP))
# -*- coding: utf-8 -*- import os import cv2 import glob import math import random import numpy as np import os.path as osp from xml.dom.minidom import Document import multiprocessing as mp import logging from PIL import Image,ImageDraw,ImageFont import pygame from pygame.locals import * from pygame import freetype import get_color resultImgsDir = 'crnn_result_imgs1' resultXmlDir = 'crnn_result_xmls1' bgiDir = 'bgi' gTtf= 'ttfs' totalFile = 'texts.txt' FORMAT = '%(asctime)-15s [%(processName)s] %(message)s' logging.basicConfig(format = FORMAT) gBlockSize = 20#num of each process's sentences ttfSize = [28,30,35,40,45,50,55,60,65] def _addSaltNoise(block,level = 10): ran = np.random.randint(0,level,block.shape) salt = ran == 0 pepper = ran == level block[salt]= 0 block[pepper] = 255 return block def _addNoise(block): randValue = np.random.randn(*block.shape)*np.random.randint(2,20) block = block+randValue block[block<0] = 0.0 block[block>255] = 255.0 block = block.astype('uint8') return block def _feather(block, height): # determine the gaussian-blur std: if height <= 30 : bsz = 0.25 ksz=1 elif 30 < height < 50: bsz = max(0.30, 0.5 + 0.1*np.random.randn()) ksz = 3 else: bsz = max(0.5, 1.5 + 0.5*np.random.randn()) ksz = 5#np.random.choice([1,3,5])#5 return cv2.GaussianBlur(block,(ksz,ksz),bsz) def _seamlessClone(obj,dst,center): mask = 255 * np.ones(obj.shape, obj.dtype) #print('obj,shape:',obj.shape,' dst.shape:',dst.shape,' center:',center) try: mixed_clone = cv2.seamlessClone(obj, dst, mask, center, cv2.MIXED_CLONE) except Exception as e: print('exception:',obj.shape,dst.shape,mask.shape,center) raise e return mixed_clone def _rander(rawbgi,string,bgr,point,font,get_color): bgi = Image.fromarray(rawbgi) draw = ImageDraw.Draw(bgi) curCol,curRow = point fg_col,bg_col = get_color.sample_from_data(rawbgi) fg_col = fg_col + np.random.randint(-3,3,[1,3]) draw.text((curCol,curRow),string, tuple(fg_col.squeeze()), font=font) width,height = font.getsize(string) region = curCol,curRow,curCol+width,curRow+height bgi = np.array(bgi) block = bgi[curRow:curRow+height,curCol:curCol+width,:] block = _addNoise(block) block = _feather(block,height) block = _addNoise(block) block = _addSaltNoise(block,50) #===== # bgi[curRow:curRow+height,curCol:curCol+width,:] = block # return bgi #cv2.imwrite('/home/zzc/tmp111.jpg',block) center = (curCol+curCol+width)//2,(curRow+curRow+height)//2 # width, height, channels = bgi.shape # center = height//2,width//2 bgi = _seamlessClone(block,bgi,center) return bgi def _paste(bgi,ttf,size,curRow,curCol,curText,cols,get_color): ttfont = ImageFont.truetype(ttf,size) maxNumText = math.floor((cols-curCol)/size) curText = curText.strip() '''random the digit ''' shouldMaxNumTxt = 10 if random.randint(0,9)==9 and maxNumText >= 4: curText = ''.join([str(random.randint(0,9)) for _ in range(shouldMaxNumTxt)]) string = curText '''random the dot ''' if random.randint(0,7)==7: dotInd = random.randint(1,shouldMaxNumTxt-2) string = curText[:dotInd]+'.'+curText[dotInd+1:] else: startInd = random.randint(0,max(0,len(curText)-shouldMaxNumTxt-1)) string = curText[startInd:startInd+shouldMaxNumTxt].strip() string= curText '''若是maxNumText小於10,則跳過 ''' if maxNumText < 10 or len(curText)<10: string = '' numText = len(string) if numText == 10 : bgr = [random.randint(100,254) for i in range(3)] bgi = _rander(bgi,string,bgr,(curCol,curRow),ttfont,get_color) else: string = '' #===== '''get printed width height ''' width,height = ttfont.getsize(string) return bgi,string,width,height def _xml(doc,anno,string,xminT,yminT,xmaxT,ymaxT): if not string: return body = doc.createElement('object') anno.appendChild(body) name = doc.createElement('name') nameText = doc.createTextNode('text') name.appendChild(nameText) body.appendChild(name) content = doc.createElement('textContent') contentText = doc.createTextNode(string) content.appendChild(contentText) body.appendChild(content) bndbox = doc.createElement('bndbox') xmin = doc.createElement('xmin') ymin = doc.createElement('ymin') xmax = doc.createElement('xmax') ymax = doc.createElement('ymax') xminText = doc.createTextNode(str(xminT)) yminText = doc.createTextNode(str(yminT)) xmaxText = doc.createTextNode(str(xmaxT)) ymaxText = doc.createTextNode(str(ymaxT)) xmin.appendChild(xminText) ymin.appendChild(yminText) xmax.appendChild(xmaxText) ymax.appendChild(ymaxText) bndbox.appendChild(xmin) bndbox.appendChild(ymin) bndbox.appendChild(xmax) bndbox.appendChild(ymax) body.appendChild(bndbox) def paste(imgname,bgi,text,ttf,ttfRandom,get_color): bgi = cv2.imread(bgi) rows,cols,depth = bgi.shape # bgi = Image.fromarray(bgi) # draw = ImageDraw.Draw(bgi) curRow = 0 curRowInter = random.randint(3,7) curRow += curRowInter curTtfSize = random.randint(0,len(ttfRandom)-1) #create the xml head doc = Document() anno = doc.createElement('Annotations') doc.appendChild(anno) imgNameNode = doc.createElement('imgName') imgNameNode.appendChild(doc.createTextNode(imgname)) anno.appendChild(imgNameNode) height,width,depth = rows,cols,depth sizeNode = doc.createElement('size') widthNode = doc.createElement('width') widthNode.appendChild(doc.createTextNode(str(width))) sizeNode.appendChild(widthNode) heightNode = doc.createElement('height') heightNode.appendChild(doc.createTextNode(str(height))) sizeNode.appendChild(heightNode) depthNode = doc.createElement('depth') depthNode.appendChild(doc.createTextNode(str(depth))) sizeNode.appendChild(depthNode) anno.appendChild(sizeNode) while curRow + ttfRandom[curTtfSize] <=rows: #cur col point curCol = random.randint(0,cols-1) #cur row point '''paste the text on bgi ''' if curCol < cols*0.9 and curRow+ttfRandom[curTtfSize] <= rows: #if curcols is bigger than 0.9*cols,then do not paste the line curText = text[random.randint(0,len(text)-1)] bgi,string,width,height = _paste(bgi,ttf,ttfRandom[curTtfSize],curRow,curCol,curText,cols,get_color) if not string: continue _xml(doc,anno,string,xminT = curCol,yminT = curRow,xmaxT = curCol+width,ymaxT = curRow+height) curRow += curRowInter curRow += ttfRandom[curTtfSize] #cur intervel curRowInter = random.randint(3,7) #cur ttf size curTtfSize = random.randint(0,len(ttfRandom)-1) return np.array(bgi), doc def handle(text): ind, text = text #pid pid = os.getpid() #background image getcolor = get_color.ColorSample() bgis = glob.glob( osp.join(bgiDir,'*.jpg') ) #select one background image curBgi = random.randint(0,len(bgis)-1) bgi = bgis[curBgi] #ttf ttfs = glob.glob(osp.join(gTtf,'*.ttf')) curTtf = random.randint(0,len(ttfs)-1) ttf = ttfs[curTtf] #ttf size random ttfRandom = [1]+[ random.randint(0,1) for i in range(len(ttfSize)-1)] ttfRandom = [ran*size for ran,size in zip(ttfRandom, ttfSize)] ttfRandom = [i for i in ttfRandom if i != 0] imgname = '{}_{}_{}.jpg'.format(ind,pid,curTtf) bgi,doc = paste(imgname,bgi,text,ttf,ttfRandom,getcolor) cv2.imwrite(osp.join(resultImgsDir,imgname),bgi) xmlFileName = osp.join(resultXmlDir,'{}.xml'.format(imgname[:-4])) with open(xmlFileName, "w") as fxml: fxml.write(str(doc.toprettyxml(indent = " ", newl = "\n", encoding = "utf-8"),encoding = 'utf-8')) logging.warn('{}'.format(ind)) return if __name__ == '__main__': total = [line.strip() for line in open(totalFile)] numP = 30 totalSP = [] inter = math.ceil(len(total)/gBlockSize) for i in range(inter): totalSP.append(total[i::inter]) print('begin') p = mp.Pool(numP) p.map(handle, enumerate(totalSP[:1000]))