用Python實現大文件分割

python代碼以下:python

import sys,os kilobytes = 1024 megabytes = kilobytes*1000 chunksize = int(200*megabytes)#default chunksize

def split(fromfile,todir,chunksize=chunksize): if not os.path.exists(todir):#check whether todir exists or not
 os.mkdir(todir) else: for fname in os.listdir(todir): os.remove(os.path.join(todir,fname)) partnum = 0 inputfile = open(fromfile,'rb')#open the fromfile
    while True: chunk = inputfile.read(chunksize) if not chunk:             #check the chunk is empty
            break partnum += 1 filename = os.path.join(todir,('data%04d'%partnum)) fileobj = open(filename,'wb')#make partfile
        fileobj.write(chunk)         #write data into partfile
 fileobj.close() return partnum if __name__=='__main__': fromfile = input('File to be split?') todir = input('Directory to store part files?') chunksize = int(input('Chunksize to be split?')) absfrom,absto = map(os.path.abspath,[fromfile,todir]) print('Splitting',absfrom,'to',absto,'by',chunksize) try: parts = split(fromfile,todir,chunksize) except: print('Error during split:') print(sys.exc_info()[0],sys.exc_info()[1]) else: print('split finished:',parts,'parts are in',absto)

以data.txt文件爲例,此文件是由python隨機生成的數字構成的數據集,大小爲1.1G,現將它等分割成多個128M子文件,運行結果以下:spa

相關文章
相關標籤/搜索