記錄瞬間html
=====================其一=====================python
# coding:UTF-8 import os import threading from time import ctime def loop(loops, list): # list存放着每一個線程須要處理的文本文件名 print('線程 %d 處理的文件列表 %s \n' % (loops + 1, list)) list_len = len(list) for i in range(list_len): f = open(list[i], mode="r", encoding="UTF-8") rows = len(f.readlines()) # 此處,我直接將整個文件讀入,因此會比較卡,能夠在此設置每次讀入的大小 f.close() print('文件 %s __ %d 行\n' % (list[i], rows)) def main(): print('all start at:', ctime()) cwd = os.getcwd() dir_list = os.listdir(cwd) file_list = [] # 該列表用來存放當前目錄下的全部txt文件 print('當前文件夾 {} 下的全部txt文件:'.format(dir_list)) for l in dir_list: if l.rfind('log') >= 0 and os.path.isfile(l): print(' ', l) file_list.append(l) threads = [] threads_num = 4 # 線程數 在此處修改下線程數就能夠比較多線程與單線程處理文件的速度差別 print('共有線程數:%d個' % threads_num) per_thread = len(file_list) / threads_num # 每一個線程處理的文本數量 print(per_thread) for i in range(threads_num): if threads_num - i == 1: # 最後一個線程,分擔餘下的全部工做量 t = threading.Thread(target=loop, args=(i, file_list[i * int(per_thread):])) else: t = threading.Thread(target=loop, args=(i, file_list[i * int(per_thread): i * int(per_thread) + int(per_thread)])) threads.append(t) for i in range(threads_num): threads[i].start() for i in range(threads_num): # 等待全部的線程結束 threads[i].join() print('all end at:', ctime()) if __name__ == '__main__': main()
上述代碼,主要解決了多線程在進行讀寫時使用的一些技巧,能夠將這段代碼引用於寫一套文件上,而後將多線程寫的文件作以合併。這樣能夠很好的解決執行速度的問題。centos
=====================其二=====================多線程
混合使用多進程和多線程的例子。app
#!/usr/bin/python import re import commands import time import multiprocessing import threading def download_image(url): print '*****the %s rpm begin to download *******' % url commands.getoutput('wget %s' % url) def get_rpm_url_list(url): commands.getoutput('wget %s' % url) rpm_info_str = open('index.html').read() regu_mate = '(?<=<a href=")(.*?)(?=">)' rpm_list = re.findall(regu_mate, rpm_info_str) rpm_url_list = [url + rpm_name for rpm_name in rpm_list] print 'the count of rpm list is: ', len(rpm_url_list) return rpm_url_list
基礎方法定義 =↑=oop
def multi_thread(rpm_url_list): threads = [] # url = 'https://mirrors.ustc.edu.cn/centos/7/os/x86_64/Packages/' # rpm_url_list = get_rpm_url_list(url) for index in range(len(rpm_url_list)): print 'rpm_url is:', rpm_url_list[index] one_thread = threading.Thread(target=download_image, args=(rpm_url_list[index],)) threads.append(one_thread) thread_num = 5 # set threading pool, you have put 4 threads in it while 1: count = min(thread_num, len(threads)) print '**********count*********', count ###25,25,...6707%25 res = [] for index in range(count): x = threads.pop() res.append(x) for thread_index in res: thread_index.start() for j in res: j.join() if not threads: break
多線程的定義 =↑=url
def multi_process(rpm_url_list): # process num at the same time is 4 process = [] rpm_url_group_0 = [] rpm_url_group_1 = [] rpm_url_group_2 = [] rpm_url_group_3 = [] for index in range(len(rpm_url_list)): if index % 4 == 0: rpm_url_group_0.append(rpm_url_list[index]) elif index % 4 == 1: rpm_url_group_1.append(rpm_url_list[index]) elif index % 4 == 2: rpm_url_group_2.append(rpm_url_list[index]) elif index % 4 == 3: rpm_url_group_3.append(rpm_url_list[index]) rpm_url_groups = [rpm_url_group_0, rpm_url_group_1, rpm_url_group_2, rpm_url_group_3] for each_rpm_group in rpm_url_groups: each_process = multiprocessing.Process(target = multi_thread, args = (each_rpm_group,)) process.append(each_process) for one_process in process: one_process.start() for one_process in process: one_process.join() # for each_url in rpm_url_list: # print '*****the %s rpm begin to download *******' %each_url # # commands.getoutput('wget %s' %each_url)
多進程調用多線程的定義 =↑=spa
def main(): url = 'https://mirrors.ustc.edu.cn/centos/7/os/x86_64/Packages/' url_paas = 'http://mirrors.ustc.edu.cn/centos/7.3.1611/paas/x86_64/openshift-origin/' url_paas2 ='http://mirrors.ustc.edu.cn/fedora/development/26/Server/x86_64/os/Packages/u/' start_time = time.time() rpm_list = get_rpm_url_list(url_paas) print multi_process(rpm_list) # print multi_thread(rpm_list) #print multi_process() # print multi_thread(rpm_list) # for index in range(len(rpm_list)): # print 'rpm_url is:', rpm_list[index] end_time = time.time() print 'the download time is:', end_time - start_time print main()
主方法 =↑=.net
代碼來源:https://blog.csdn.net/nfzhlk/article/details/76946281線程
其中獲取cpu核數的方法能夠使用
from multiprocessing import cpu_count print(cpu_count())
通常地,想要多線程快速作事情,咱們不加鎖,加了鎖後,容易致使執行的效率跟單線程保持一致了。
這樣作不划算,固然要看具體的需求是否須要使用多線程加鎖的方式。