只記錄大致思路和我認爲其中須要記錄的地方。python
正則匹配的模式很難記憶,即便記住了,也很難寫出無錯誤的匹配模式。可是,藉助網上一些提供實時對比的網站,如 regexr.com。
代碼示意:json
import os import re ''' define regex mode ''' get_imgpath_regex = re.compile(r'''( (\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2},\d+) # time \s # separate (.*)?get_imgpath\sused\stime\sis\s # info (\d+.\d+) # time \sargs\sis\s\(u' # separate (.*?)',\)\sresult\sis\s # img dir (.*?.jpg) # img path )''', re.VERBOSE) get_imgpath_flag = 'get_imgpath' def main(): for infile in infile_list: # prase line using regex mode with open(infile, "r") as file: for line in file: # 先判斷關鍵詞 currentDict = {} if get_imgpath_flag in line: for groups in regex['get_imgpath_regex'].findall(line): # 再提取模式對應的內容 currentDict = {'date': groups[1], 'cost_time':groups[3], 'img_dir':groups[4], 'img_path':groups[5]} # print(currentDict) get_imgpath_match.append(currentDict) else: pass
採起的方式爲參數存儲在一個單獨的文件,如 config.json。windows
{ "FLAG" : { "SAVE_SPILT_LOG_FILE_FLAG" : false , "SAVE_MERGE_LOG_FILE_FLAG" : false , "USE_CURRENT_PATH" : false }, "PATH" : { "INPUT_LOG_FILE_PATH" : "E:\\zwk\\Code\\logger_read\\data\\pro_data" , "SAVE_SPILT_MERGE_LOG_PATH" : "E:\\zwk\\Code\\logger_read\\output\\spilt_merge_log" , "OUTPUT_RESULT_PATH" : "E:\\zwk\\Code\\logger_read\\output" }, "PARAMETERS" : { "windows_size" : 2 , "duplicate_times" : 1 } }
再對參數進行解析,app
import json def main(): # outfile_path = os.path.normpath("output/filtered") global parameters if(config['FLAG']['USE_CURRENT_PATH']): pwd = os.getcwd() config['PATH']['INPUT_LOG_FILE_PATH'] = os.path.join(pwd, 'data') config['PATH']['SAVE_SPILT_MERGE_LOG_PATH'] = os.path.join(pwd, 'output') config['PATH']['OUTPUT_RESULT_PATH'] = os.getcwd() output_file_path = config['PATH'] flag = config['FLAG'] if __name__ == '__main__': this_folder = os.path.dirname(os.path.abspath(__file__)) config_file = os.path.join(this_folder, 'config.json') exists_check = os.path.isfile(config_file) if not exists_check: print('Error: loss of config file, Exit !!!') with open(config_file, 'r') as f: config = json.load(f) # improve it, use as global variable parameters = config['PARAMETERS'] main()
在這裏,示例我認爲的還不錯作法,主要是不受系統影響網站
this_folder = os.path.dirname(os.path.abspath(__file__)) config_file = os.path.join(this_folder, 'config.json') exists_check = os.path.isfile(config_file) if not exists_check: print('Error: loss of config file, Exit !!!') with open(config_file, 'r') as f: config = json.load(f) # 省略中間 # write output to files os.makedirs(outfile['SAVE_SPILT_MERGE_LOG_PATH'], exist_ok=True)
```this