常常遇到因爲樣本巨大,須要sample一部分文件進行案例分析的狀況,下面的程序主要爲了隨機抽取一個大文件中的N行。python
1 #!/usr/bin/python 2 # -*- coding: <encoding name> -*- 3 import sys 4 reload(sys) 5 sys.setdefaultencoding('utf-8') 6 7 import os 8 import json 9 import codecs 10 import random 11 import linecache 12 13 ## from 1 to 1000w or ... 14 Random_number_list=[x for x in range(815003)] 15 ## select 100 or ... 16 Random_data_line = random.sample(Random_number_list, 100) 17 ## open a file 18 write_fname=open('aa.txt','a+') 19 for tmp_line in Random_data_line: 20 tmpdata=linecache.getline('readfile',tmp_line) 21 write_fname.write(tmpdata) 22 write_fname.close()