一個處理圖片的python腳本:移除全部的非jpg文件

在機器學習中免不了和圖片打交道。有時候收集來的圖片後綴名和實際類型會存在不一致的狀況,這裏可能就須要涉及到圖片文件的批量處理。簡單粗暴上代碼:bash

import struct 
import os
def gci(filepath):
  files = os.listdir(filepath)
  for fi in files:
    fi_d = os.path.join(filepath,fi)            
    if os.path.isdir(fi_d):
      gci(fi_d)                  
    else:
      tempP = os.path.join(filepath,fi_d)
      tp = filetype(tempP)
      if tp != 'JPEG':
        print(tempP)
        os.remove(tempP)
        #print tempP

def typeList():  
  return {  
    "FFD8FF": "JPEG",  
    "89504E47": "PNG",
    "47494638": "GIF"}  
    

def bytes2hex(bytes):  
  num = len(bytes)  
  hexstr = u""  
  for i in range(num):  
    t = u"%x" % bytes[i]  
    if len(t) % 2:  
      hexstr += u"0"
    hexstr += t  
  return hexstr.upper()  

def filetype(filename):  
  binfile = open(filename, 'rb') 
  tl = typeList()  
  ftype = 'unknown'
  for hcode in tl.keys():  
    numOfBytes = len(hcode) / 2 
    binfile.seek(0) 
    hbytes = struct.unpack_from("B"*numOfBytes, binfile.read(numOfBytes))
    f_hcode = bytes2hex(hbytes)  
    if f_hcode == hcode:  
      ftype = tl[hcode]  
      break
  binfile.close()  
  return ftype  



gci('your_img_folder_path')
複製代碼
相關文章
相關標籤/搜索