def getDiskno(line_key): diskno = -1 if line_key.rfind('disk') != -1: if (line_key.rfind('disk')+4) == len(line_key): return -1 c = line_key[line_key.rfind('disk')+4] cc = ord(c)#convert Char to Integer if cc<49 or cc>57:#not a number return -1 if (line_key.rfind('disk')+5) < len(line_key): t = line_key[line_key.rfind('disk')+5] tt = ord(t)#convert Char to Integer if (tt==48 or tt==49) and cc==49: diskno = (cc-48)*10+(tt-48) else: diskno = cc-48 else: diskno = cc-48 elif line_key.rfind('Disk') != -1: if (line_key.rfind('Disk')+4) == len(line_key): return -1 c = line_key[line_key.rfind('Disk')+4] cc = ord(c)#convert Char to Integer if (cc < 49) or (cc > 57):#not a number return -1 if (line_key.rfind('Disk')+5) < len(line_key): t = line_key[line_key.rfind('Disk')+5] tt = ord(t)#convert Char to Integer if (tt==48 or tt==49) and cc ==49: diskno = (cc-48)*10+(tt-48) else: diskno = cc-48 else: diskno = cc-48 return diskno; print getDiskno('SizeOnDisk11') #wipe the colon and summation def wipeColonAndSummation(line):#quchu douhao bingqie qiuhe thesum = ''; for i in range(2,242): try: if(i==len(line)-1): thesum += line[i][line[i].rfind(':')+1:len(line[i])-1]+' ' else: thesum += line[i][line[i].rfind(':')+1:]+' ' except IndexError: thesum += '-1 ' return thesum[:len(thesum)-1] Path = '/media/hadoop/My Book/WorkSpace/PreProcess/AY42/2011_07_02/\ r12a03014.dg.aliyun.com' disk = []#disk1-disk11 store the 11 disk's data,disk0 is empty for i in range(0,12): contact = {}#every disk's data store in a contact which contains the key and the value disk.append(contact) keys = []#keys1-keys11 store the 11 disk's key(may be unnecesary) for i in range(0,12): key = [] keys.append(key) import os filelist = os.listdir(Path) for eachfile in filelist: #every loop process one M f if eachfile.startswith('M'): br = file(Path+'/'+eachfile,'r') reader = br.readline() while reader: diskno=-1 line = reader.split(' ') if len(line) >= 4: if line[0].startswith('/sys/pangu/ChunkServerRole/') or line[0].startswith('/Machine/DISK/\/apsarapangu\/disk'): line_key = line[0]+line[1] diskno = getDiskno(line_key) if diskno > 0: if line[2].rfind(':') > 0: if disk[diskno].has_key(line_key): value = disk[diskno][line_key] oneDayData = wipeColonAndSummation(line) value += ' '+oneDayData disk[diskno][line_key] = value else: oneDayData = wipeColonAndSummation(line) disk[diskno][line_key] = oneDayData keys[diskno].append(line_key) reader = br.readline() br.close() else: pass#because 'pass' in the last, so it equals with 'continue' #one day 's data is all in 'disk';the next is output print disk[11]['/Machine/DISK/\\/apsarapangu\\/disk11/AVGQU_SZ'] print keys[11] print len(keys[11])