1、線程回顧json
import time import threading '''1、一個主線程''' # def sing(): # for i in range(1,6): # print('come baby 跟我一塊兒 嗨 嗨 嗨 !!!') # time.sleep(1) # # def dance(): # for i in range(1,6): # print('偏偏 肚皮 鋼管舞 哈哈哈哈哈 ...... ') # time.sleep(1) # def main(): # sing() # dance() # if __name__ == '__main__': # main() '''2、面向過程建立線程:一個主線程,兩個子線程''' # def sing(a): # for i in range(1,6): # print('當前線程:%s ...come %s 跟我一塊兒 嗨 嗨 嗨 !!!' %(threading.current_thread().name,a)) # time.sleep(1) # def dance(a): # for i in range(1,6): # print('當前線程:%s ... 偏偏 肚皮 鋼管舞 %s你要哪種 ' %(threading.current_thread().name,a)) # time.sleep(1) # def main(): # print('...聯歡晚會如今開始...') # #建立唱歌線程 # a = '悟空' # t_sing = threading.Thread(target=sing,name='唱歌',args=(a,)) # # # 建立跳舞線程 # t_dance = threading.Thread(target=dance, name='跳舞',args=(a,)) # # #啓動線程 # t_sing.start() # t_dance.start() # # #讓主線程等待子線程執行完畢 # t_sing.join() # t_dance.join() # # print('晚會結束,各回各家') # if __name__ == '__main__': # main() '''3、面向對象建立線程''' #寫一個類,繼承threading.Thread class SingThread(threading.Thread): def __init__(self,name,a): super().__init__() self.name = name self.a = a def run(self): print("線程名:%s 參數:%s" %(self.name,self.a)) for i in range(1, 6): print('愛江山更愛漂亮人...') time.sleep(1) class DanceThread(threading.Thread): def __init__(self, name, a): super().__init__() self.name = name self.a = a def run(self): print("線程名:%s 參數:%s" % (self.name, self.a)) for i in range(1, 6): print('蹦擦擦,蹦擦擦...') time.sleep(1) def main(): #建立線程 t_sing = SingThread('唱','八戒') t_dance = DanceThread('跳','悟能') #啓動線程 t_sing.start() t_dance.start() #讓主線程等待子線程執行完畢 t_sing.join() t_dance.join() if __name__ == '__main__': main()
2、隊列多線程
from queue import Queue #建立隊列 q = Queue(5) #5個位子 print(q.empty()) #判斷是否爲空 #存入數據 q.put('濃眉哥') q.put('勒布朗') q.put('丹尼*格林') q.put('庫茲馬') q.put('麥基') print(q.full()) #判斷是否滿 print(q.qsize()) #返回隊列大小 # q.put('波普',False) #若是隊列滿了,直接報錯 # q.put('波普',True,3) #若是隊列滿了,等待3秒尚未空位,報錯 #獲取數據:先進先出 print(q.get()) print(q.get()) print(q.get()) print(q.get()) print(q.get()) # q.get('波普',False) #若是隊列爲空,直接報錯 # q.get('波普',True,3) #若是隊列爲空,等待3秒仍是空,報錯
3、多線程爬蟲app
import time import threading from queue import Queue import requests from lxml import etree import json #存放採集線程 crawl_thread_list = [] #存放解析線程 parse_thread_list = [] def create_queue(): #建立頁碼隊列 page_queue = Queue() for page in range(1,6): page_queue.put(page) # 建立內容隊列 data_queue = Queue() return page_queue,data_queue class CrawlThread(threading.Thread): def __init__(self,name,page_queue,data_queue): super(CrawlThread,self).__init__() self.name = name self.page_queue = page_queue self.data_queue = data_queue self.url = 'http://www.fanjian.net/jiantu-{}' self.headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) ' 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'} def run(self): print('%s啓動......' %self.name) while 1: if self.page_queue.empty(): break #從隊列取出頁碼 page = self.page_queue.get() #拼接url url = self.url.format(page) #發送請求,拿到響應 r = requests.get(url=url,headers=self.headers) #將響應內容放到data_queue self.data_queue.put(r.text) break print('%s結束......' % self.name) class ParseThread(threading.Thread): def __init__(self,name,data_queue,fp,lock): super(ParseThread, self).__init__() self.name = name self.data_queue = data_queue self.fp = fp self.lock = lock def parse_content(self,data): tree = etree.HTML(data) '''先查找全部的li,再從li下查找圖片標題和src''' li_list = tree.xpath('//ul[@class="cont-list"]/li') items = [] for l in li_list: # 獲取圖片標題 img_title = l.xpath('//h2/a/text()')[0] #獲取圖片url img_url = tree.xpath('//div[@class="cont-list-main"]/p/img/@data-src')[0] item = {'標題',img_title, '連接',img_url} items.append(item) #寫入文件 self.lock.acquire() #上鎖 for item in items: self.fp.write(str(item)) self.lock.release() #解鎖 def run(self): while 1: print('%s啓動......' % self.name) #從data_queue中取出一頁數據 data = self.data_queue.get() #解析內容 self.parse_content(data) def create_crawl_thread(page_queue,data_queue): crawl_name = ['採集1號','採集2號','採集3號'] for name in crawl_name: #建立子線程 t_crawl = CrawlThread(name,page_queue,data_queue) #保存到列表 crawl_thread_list.append(t_crawl) def create_parse_thread(data_queue,fp,lock): parse_name = ['解析1號', '解析2號', '解析3號'] for name in parse_name: # 建立子線程 t_parse = ParseThread(name,data_queue,fp,lock) # 保存到列表 parse_thread_list.append(t_parse) def main(): # 建立隊列 page_queue,data_queue = create_queue() #打開一個文件 fp = open('jiantu.txt','a',encoding='utf8') #建立鎖 lock = threading.Lock() #建立採集線程 create_crawl_thread(page_queue,data_queue) #建立解析線程 create_parse_thread(data_queue,fp,lock) # 啓動採集線程 for t in crawl_thread_list: t.start() # 啓動解析線程 for t in parse_thread_list: t.start() # 讓主線程等待子線程執行完畢 for t in crawl_thread_list: t.join() for t in parse_thread_list: t.join() #關閉文件 fp.close() print('主線程執行完畢!') if __name__ == '__main__': main()