from random import randint def load_list_data(total_nums, target_nums): """ 從文件中讀取數據,以list的方式返回 :param total_nums: 讀取的數量 :param target_nums: 須要查詢的數據的數量 """ all_data = [] target_data = [] file_name = "fbobject_idnew.txt" with open(file_name, encoding="utf8", mode="r") as f_open: for count, line in enumerate(f_open): if count < total_nums: all_data.append(line) else: break for x in range(target_nums): random_index = randint(0, total_nums) if all_data[random_index] not in target_data: target_data.append(all_data[random_index]) if len(target_data) == target_nums: break return all_data, target_data def load_dict_data(total_nums, target_nums): """ 從文件中讀取數據,以dict的方式返回 :param total_nums: 讀取的數量 :param target_nums: 須要查詢的數據的數量 """ all_data = {} target_data = [] file_name = "fbobject_idnew.txt" with open(file_name, encoding="utf8", mode="r") as f_open: for count, line in enumerate(f_open): if count < total_nums: all_data[line] = 0 else: break all_data_list = list(all_data) for x in range(target_nums): random_index = randint(0, total_nums-1) if all_data_list[random_index] not in target_data: target_data.append(all_data_list[random_index]) if len(target_data) == target_nums: break return all_data, target_data def find_test(all_data, target_data): #測試運行時間 test_times = 100 total_times = 0 import time for i in range(test_times): find = 0 start_time = time.time() for data in target_data: if data in all_data: find += 1 last_time = time.time() - start_time total_times += last_time return total_times/test_times if __name__ == "__main__": # all_data, target_data = load_list_data(10000, 1000) # all_data, target_data = load_list_data(100000, 1000) # all_data, target_data = load_list_data(1000000, 1000) # all_data, target_data = load_dict_data(10000, 1000) # all_data, target_data = load_dict_data(100000, 1000) # all_data, target_data = load_dict_data(1000000, 1000) all_data, target_data = load_dict_data(2000000, 1000) last_time = find_test(all_data, target_data) #dict查找的性能遠遠大於list #在list中隨着list數據的增大 查找時間會增大 #在dict中查找元素不會隨着dict的增大而增大 print(last_time)
不可變對象 都是可hash的, str, fronzenset, tuple,本身實現的類 __hash__ dict的內存花銷大,可是查詢速度快, 自定義的對象 或者python內部的對象都是用dict包裝的
查找效率:set>dict>listpython
單次查詢中:看來list 就是O(n)的;而set作了去重,本質應該一顆紅黑樹(猜想,STL就是紅黑樹),複雜度O(logn);dict相似對key進行了hash,而後再對hash生成一個紅黑樹進行查找,其查找複雜實際上是O(logn),並非所謂的O(1)。O(1)只是理想的實現,實際上不少hash的實現是進行了離散化的。dict比set多了一步hash的過程,so 它比set慢,不過差異不大。app