上文 用memory_profiler 監控內存耗用寫了一個遍歷目錄夾下全部文件的腳本,約有26萬個文件,用時90秒左右。
如今想要將文件名、路徑存入數據庫, 要求有新增文件時,就寫入數據,有減小的文件時,就從數據庫刪除。python
root@WJL-SH4031667: # cat setFileToSql.pysql
from os import walk from os.path import join,getmtime import sqlite3 @profile def scan(): dbName,tblName = "test.db","fmanages" conn = sqlite3.connect(dbName) cursor = conn.cursor() cursor.execute('create table {}(id varchar(20) primary key, fname varchar(30), path varchar(80), tm FLOAT, tag varchar(80))'.format(tblName)) ret = {} for path, _, files in walk("./"): for f in files: if not f.endswith(""): continue filename = join(path, f) try: cursor.execute("insert into {} (fname, path,tm, tag) values (\'{}\',\'{}\',\'{}\', \'study\')".format(tblName, filename.rstrip(), path.rstrip(), getmtime(filename))) except: continue cursor.close() conn.commit() conn.close() if __name__ == "__main__": scan()
root@WJL-SH4031667: # time python -m memory_profiler setFileToSql.py測試
Line # Mem usage Increment Line Contents ================================================ 5 29.820 MiB 29.820 MiB @profile 6 def scan(): 7 29.824 MiB 0.004 MiB dbName,tblName = "test.db","fmanages" 8 29.984 MiB 0.160 MiB conn = sqlite3.connect(dbName) 9 29.988 MiB 0.004 MiB cursor = conn.cursor() 10 30.473 MiB 0.484 MiB cursor.execute('create table {}(id varchar(20) primary key, fname varchar(30), path varchar(80), tm FLOAT, tag varchar(80))'.format(tblName)) 12 37.578 MiB -48094.824 MiB for path, _, files in walk("./"): 13 37.578 MiB -479957.762 MiB for f in files: 14 37.578 MiB -431869.512 MiB if not f.endswith(""): 15 continue 20 37.578 MiB -431869.512 MiB try: 21 37.578 MiB -431869.469 MiB cursor.execute("insert into {} (fname, path,tm, tag) values (\'{}\',\'{}\',\'{}\', \'study\')".format(tblName, f.rstrip(), path.rstrip(), getmtime(f))) 22 37.578 MiB -431727.805 MiB except: 23 37.578 MiB -431727.809 MiB continue 24 25 31.953 MiB -5.625 MiB cursor.close() 26 31.953 MiB 0.000 MiB conn.commit() 27 31.953 MiB 0.000 MiB conn.close() python -m memory_profiler setFileToSql.py 94.52s user 253.09s system 99% cpu 5:49.69 total
從上看出,使用數據庫用了內存30M左右,相比上文 用memory_profiler 監控內存耗用, 字典方式存儲時用的100M,少了70M,字典果真是吃糧大貨。想提升sqlite的寫入速度?但和上文比對以後,二者時間差很少,優化的空間就不大了。user態用時90秒的時間,都用在了IO的耗時上。優化
root@WJL-SH4031667:# ls -lh test.db
-rwxrwxrwx 1 root root 42M Nov 1 09:01 test.db.net
root@WJL-SH4031667: # sqlite3 test.db3d
sqlite> select count(*) from fmanages; 230072
#導出原有的文件記錄 extRet = dict(cursor.execute('select fname,tm from {}'.format(tblName)).fetchall()) #將上次的文件名集合和這一次的文件名集合弄出來 prekeys = extRet.keys() nowkeys = ret.keys() #檢查刪除的文件 delFile = prekeys - nowkeys ... #檢查新增的文件 newFile = nowkeys - prekeys ...
root@WJL-SH4031667: # cat setFileToSql.py
from os import walk from os.path import join,getmtime import sqlite3 @profile def scan(): dbName,tblName = "test.db","fmanages" conn = sqlite3.connect(dbName) cursor = conn.cursor() ret,extRet = {},{} i = 1 for path, _, files in walk("./"): for f in files: if not f.endswith(""): continue filename = join(path, f) ret[filename] = getmtime(filename) i += 1 ###############如下爲新增代碼,比較重要的是兩個集合還刪增############ #檢查是否存在table try: cursor.execute('create table {}(id varchar(20) primary key, fname varchar(30), path varchar(80), tm FLOAT, tag varchar(80))'.format(tblName)) except: extRet = dict(cursor.execute('select fname,tm from {}'.format(tblName)).fetchall()) #將上次的文件名集合和這一次的文件名集合弄出來 prekeys = extRet.keys() nowkeys = ret.keys() #檢查刪除的文件 delFile = prekeys - nowkeys print("刪除的文件{}".format(delFile)) #檢查新增的文件 newFile = nowkeys - prekeys print("新增的文件{}".format(newFile)) try: for filename in delFile: cursor.execute("delete from {} where fname=\'{}\'".format(tblName, filename)) print("刪除的文件{}".format(filename)) except: pass try: for filename in newFile: cursor.execute("insert into {} (fname, path,tm, tag) values (\'{}\',\'new\',\'{}\', \'study\')".format(tblName, filename.rstrip(), getmtime(filename))) print("新增的文件{}".format(filename)) except: pass ############################################ cursor.close() conn.commit() conn.close() return ret if __name__ == "__main__": scan()
(首先去刪除一個文件)
Filename: setFileToSql.py Line # Mem usage Increment Line Contents ================================================ 6 29.824 MiB 29.824 MiB @profile 7 def scan(): 8 29.828 MiB 0.004 MiB dbName,tblName = "test.db","fmanages" 9 29.988 MiB 0.160 MiB conn = sqlite3.connect(dbName) 10 29.992 MiB 0.004 MiB cursor = conn.cursor() 11 29.992 MiB 0.000 MiB ret,extRet = {},{} 12 #\u68C0\u67E5\u662F\u5426\u5B58\u5728table 13 29.992 MiB 0.000 MiB try: 14 30.477 MiB 0.484 MiB cursor.execute('create table {}(id varchar(20) primary key, fname varchar(30), path varchar(80), tm FLOAT, tag varchar(80))'.format(tblName)) 15 except: 16 extRet = dict(cursor.execute('select fname,tm from {}'.format(tblName)).fetchall()) 17 18 30.477 MiB 0.000 MiB i = 1 19 97.188 MiB -5072.457 MiB for path, _, files in walk("./"): 20 97.188 MiB -35328.441 MiB for f in files: 21 97.188 MiB -31115.449 MiB if not f.endswith(""): 22 continue 23 24 25 97.188 MiB -31114.238 MiB filename = join(path, f) 26 97.188 MiB -31074.410 MiB ret[filename] = getmtime(filename) 27 97.188 MiB -31111.219 MiB i += 1 28 29 #\u5C06\u4E0A\u6B21\u7684\u6587\u4EF6\u540D\u96C6\u5408\u548C\u8FD9\u4E00\u6B21\u7684\u6587\u4EF6\u540D\u96C6\u5408\u5F04\u51FA\u6765 30 97.188 MiB 0.000 MiB prekeys = extRet.keys() 31 97.188 MiB 0.000 MiB nowkeys = ret.keys() 32 33 #\u68C0\u67E5\u5220\u9664\u7684\u6587\u4EF6 34 97.188 MiB 0.000 MiB delFile = prekeys - nowkeys 35 97.195 MiB 0.008 MiB print("\u5220\u9664\u7684\u6587\u4EF6{}".format(delFile)) 36 37 #\u68C0\u67E5\u65B0\u589E\u7684\u6587\u4EF6 38 111.578 MiB 14.383 MiB newFile = nowkeys - prekeys 39 114.059 MiB 2.480 MiB print("\u65B0\u589E\u7684\u6587\u4EF6{}".format(newFile)) 40 41 114.059 MiB 0.000 MiB try: 42 114.059 MiB 0.000 MiB for filename in delFile: 43 cursor.execute("delete from {} where fname=\'{}\'".format(tblName, filename)) 44 print("\u5220\u9664\u7684\u6587\u4EF6{}".format(filename)) 45 except: 46 pass 47 48 49 114.059 MiB 0.000 MiB try: 50 114.109 MiB 0.000 MiB for filename in newFile: 51 114.109 MiB 0.043 MiB cursor.execute("insert into {} (fname, path,tm, tag) values (\'{}\',\'new\',\'{}\', \'study\')".format(tblName, filename.rstrip(), getmtime(filename))) 52 114.109 MiB 0.008 MiB print("\u65B0\u589E\u7684\u6587\u4EF6{}".format(filename)) 53 114.109 MiB 0.000 MiB except: 54 114.109 MiB 0.000 MiB pass 55 56 114.109 MiB 0.000 MiB cursor.close() 57 114.109 MiB 0.000 MiB conn.commit() 58 114.109 MiB 0.000 MiB conn.close() 59 114.109 MiB 0.000 MiB return ret python -m memory_profiler setFileToSql.py 95.17s user 289.77s system 97% cpu 6:32.95 total
root@WJL-SH4031667:# sqlite3 test.db
sqlite> select count(*) from fmanages; 230071
enter description here