''' 因爲oracl數據庫導出的日期格式存在問題,如TO_TIMESTAMP('2017040110310003100000', 'YYYYMMDDHH24MISSFF6'),日期多了兩個0 須要將文件中不符合規範的數據使用正則表達式找出並將處理完畢的數據寫入新的文件 ''' def FilePronew(): f = open(r'D:/jie/filepro/1.sql') fw = open(r'D:/jie/filepro/111.sql','w+') for line in f.readlines(): for i in range(10): if(re.findall(r'TO_TIMESTAMP\(\'*\d{21}', line)): str = re.findall(r'TO_TIMESTAMP\(\'*\d{21}', line) for s in str: line = line.replace(s,s[:-1]) else: break fw.write(line) f.close() fw.close()
''' 一個文件中存在多個日期的記錄,且記錄不是按照日期順序存儲,須要將不一樣日期的數據分割 使用正則表達式匹配,每一行寫一次文件 ''' def SplitFileByDate(): f = open(r'D:/jie/data/gongxiang/TRANS_REFUND.sql','r') temp = '' for line in f.readlines(): if(re.findall(r'TO_TIMESTAMP\(\' \d{4}-\d{2}-\d{2}', line)): temp = (re.findall(r'TO_TIMESTAMP\(\' \d{4}-\d{2}-\d{2}', line)[0][15:25]).replace('-','') print temp fw = open('TRANS_REFUND_'+temp+'.sql','a') fw.write(line) fw.close()
''' 將文件按照文件名規則分類到不一樣的文件夾下 並將整理後的文件夾壓縮 ''' def RARFilebyname(): targetDir = 'D:/etc0901/etc0901' for files in os.listdir('D:/etc0901/etc0901'): if os.path.isfile(targetDir+'/'+files): s = files[11:17] if not os.path.exists(targetDir+'/'+s): os.makedirs(targetDir+'/'+s) shutil.copyfile(targetDir+'/'+files,targetDir+'/'+s+'/'+files) dirlist=os.listdir(targetDir) for dir in dirlist: print targetDir+'/'+dir if os.path.isdir(targetDir+'/'+dir): shutil.make_archive('etc_'+dir, 'zip', targetDir+'/'+dir)
from struct import * ''' 大端小端問題:http://blog.csdn.net/fan_hai_ping/article/details/8424360 http://blog.csdn.net/lis_12/article/details/52777983 ''' def ReadYKTFile(): A3 = '<bqqhhhiibhiq' f = open(u'D:\\jie\\02_測試數據\\YKT\\20170901\\52013170901000000001J0','rb') #<小端模式 print unpack('<bbbhhhiibbqqqqqqqqqq',f.read(1+1+1+2+2+2+4+4+1+1+8+8+8+8+8+8+8+8+8+8)) f.close() f = open(u'D:\\jie\\02_測試數據\\YKT\\20170901\\52013170901000000001J0','rb') print unpack('@bbbhhhiibbqqqqqq',f.read(72)) f.close() #print calcsize("bbbhhhiibbqqqqqqqqqq") #print calcsize("<bbbhhhiibbqqqqqqqqqq") print BCDtoDatetime(17372960,4) #20170901,日期格式 print BCDtoDatetime(320869749, 4) ''' BCD編碼是4個bit位表示一個數字 參數:x須要轉換的數字,y字節數 ''' def BCDtoDatetime(x,y): #s = str(bin(x)) s = '' temp = '' for i in range(y*2): #print i,x m,n = divmod(x,pow(16,y*2-(i+1))) #print m,n x = n if((i)%2>0): temp = temp + str(m) #print temp s = str(temp)+s temp = '' else: temp = str(m) return s