s15day34 進程 內容回顧: 1. GIL鎖 2. 進程和線程的區別? 第一: 進程是cpu資源分配的最小單元。 線程是cpu計算的最小單元。 第二: 一個進程中能夠有多個線程。 第三: 對於Python來講他的進程和線程和其餘語言有差別,是有GIL鎖。 GIL鎖保證一個進程中同一時刻只有一個線程被cpu調度。 注意:IO密集型操做能夠使用多線程;計算密集型能夠使用多進程; 3. Lock和RLock 4. 線程池 5. threading.local 6. 經常使用方法 7. 面向對象補充: class Foo(object): def __init__(self): object.__setattr__(self, 'info', {}) # 在對象中設置值的本質 def __setattr__(self, key, value): self.info[key] = value def __getattr__(self, item): print(item) return self.info[item] obj = Foo() obj.name = 'alex' print(obj.name) 今日內容: 1. 進程 2. 數據共享 3. 鎖 4. 進程池 5. 模塊(爬蟲) - requests - bs4(beautifulsoup) 6. 協程 內容詳細: 1. 進程 - 進程間數據不共享 data_list = [] def task(arg): data_list.append(arg) print(data_list) def run(): for i in range(10): p = multiprocessing.Process(target=task,args=(i,)) # p = threading.Thread(target=task,args=(i,)) p.start() if __name__ == '__main__': run() - 經常使用功能: - join - deamon - name - multiprocessing.current_process() - multiprocessing.current_process().ident/pid - 類繼承方式建立進程 class MyProcess(multiprocessing.Process): def run(self): print('當前進程',multiprocessing.current_process()) def run(): p1 = MyProcess() p1.start() p2 = MyProcess() p2.start() if __name__ == '__main__': run() 2. 進程間數據共享 Queue: linux: q = multiprocessing.Queue() def task(arg,q): q.put(arg) def run(): for i in range(10): p = multiprocessing.Process(target=task, args=(i, q,)) p.start() while True: v = q.get() print(v) run() windows: def task(arg,q): q.put(arg) if __name__ == '__main__': q = multiprocessing.Queue() for i in range(10): p = multiprocessing.Process(target=task,args=(i,q,)) p.start() while True: v = q.get() print(v) Manager:(*) Linux: m = multiprocessing.Manager() dic = m.dict() def task(arg): dic[arg] = 100 def run(): for i in range(10): p = multiprocessing.Process(target=task, args=(i,)) p.start() input('>>>') print(dic.values()) if __name__ == '__main__': run() windows: def task(arg,dic): time.sleep(2) dic[arg] = 100 if __name__ == '__main__': m = multiprocessing.Manager() dic = m.dict() process_list = [] for i in range(10): p = multiprocessing.Process(target=task, args=(i,dic,)) p.start() process_list.append(p) while True: count = 0 for p in process_list: if not p.is_alive(): count += 1 if count == len(process_list): break print(dic) 3. 進程鎖 import time import threading import multiprocessing lock = multiprocessing.RLock() def task(arg): print('鬼子來了') lock.acquire() time.sleep(2) print(arg) lock.release() if __name__ == '__main__': p1 = multiprocessing.Process(target=task,args=(1,)) p1.start() p2 = multiprocessing.Process(target=task, args=(2,)) p2.start() 爲何要加鎖? 4. 進程池 import time from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor def task(arg): time.sleep(2) print(arg) if __name__ == '__main__': pool = ProcessPoolExecutor(5) for i in range(10): pool.submit(task,i) 5. 初識爬蟲: 安裝: pip3 install requests pip3 install beautifulsoup4 問題: 找不到內部指令? 方式一: C:\Users\Administrator\AppData\Local\Programs\Python\Python36\Scripts\pip3 install requests 方式二: C:\Users\Administrator\AppData\Local\Programs\Python\Python36\Scripts pip3 install requests 示例: import requests from bs4 import BeautifulSoup from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor # 模擬瀏覽器發送請求 # 內部建立 sk = socket.socket() # 和抽屜進行socket鏈接 sk.connect(...) # sk.sendall('...') # sk.recv(...) def task(url): print(url) r1 = requests.get( url=url, headers={ 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36' } ) # 查看下載下來的文本信息 soup = BeautifulSoup(r1.text,'html.parser') print(soup.text) # content_list = soup.find('div',attrs={'id':'content-list'}) # for item in content_list.find_all('div',attrs={'class':'item'}): # title = item.find('a').text.strip() # target_url = item.find('a').get('href') # print(title,target_url) def run(): pool = ThreadPoolExecutor(5) for i in range(1,50): pool.submit(task,'https://dig.chouti.com/all/hot/recent/%s' %i) if __name__ == '__main__': run() 相關: a. 以上示例進程和線程那個好? - 線程好 b. requests模塊模擬瀏覽器發送請求 - 本質 requests.get(...): - 建立socket客戶端 - 鏈接 【阻塞】 - 發送請求 - 接收請求【阻塞】 - 斷開鏈接 c. 線程和進程池 重點總結: 1. 進程 ***** - windows - linux 2. 進程數據共享 ***** - Queue - Manager 3. 進程鎖 *** 4. 進程池 ***** 5. 爬蟲(進程池/線程池的應用)
# by luffycity.com """ 面向對象補充 """ """ class Foo(object): def __init__(self): self.info = {} def __setitem__(self, key, value): self.info[key] = value def __getitem__(self, item): return self.info.get(item) obj = Foo() obj['x'] = 123 print(obj['x']) """ from flask import globals # class Foo(object): # # def __init__(self): # object.__setattr__(self, 'info', {}) # 在對象中設置值的本質 # # def __setattr__(self, key, value): # self.info[key] = value # # def __getattr__(self, item): # print(item) # return self.info[item] # # obj = Foo() # obj.name = 'alex' # print(obj.name) v = [] for i in range(10000): v.append(i) print(v)
# by luffycity.com import multiprocessing import threading # ##################### 進程間的數據不共享 ##################### """ data_list = [] def task(arg): data_list.append(arg) print(data_list) def run(): for i in range(10): p = multiprocessing.Process(target=task,args=(i,)) # p = threading.Thread(target=task,args=(i,)) p.start() if __name__ == '__main__': run() """ # ##################### 進程經常使用功能 ##################### """ import time def task(arg): time.sleep(2) print(arg) def run(): print('111111111') p1 = multiprocessing.Process(target=task,args=(1,)) p1.name = 'pp1' p1.start() print('222222222') p2 = multiprocessing.Process(target=task, args=(2,)) p2.name = 'pp2' p2.start() print('333333333') if __name__ == '__main__': run() """ # ##################### 經過繼承方式建立進程 ##################### class MyProcess(multiprocessing.Process): def run(self): print('當前進程',multiprocessing.current_process()) def run(): p1 = MyProcess() p1.start() p2 = MyProcess() p2.start() if __name__ == '__main__': run()
# by luffycity.com import multiprocessing import threading import queue import time # ##################### 進程間的數據共享:multiprocessing.Queue ##################### """ q = multiprocessing.Queue() def task(arg,q): q.put(arg) def run(): for i in range(10): p = multiprocessing.Process(target=task, args=(i, q,)) p.start() while True: v = q.get() print(v) run() """ # ##################### 進程間的數據共享:Manager ##################### """ def task(arg,dic): time.sleep(2) dic[arg] = 100 if __name__ == '__main__': m = multiprocessing.Manager() process_list = [] for i in range(10): p = multiprocessing.Process(target=task, args=(i,dic,)) p.start() process_list.append(p) while True: count = 0 for p in process_list: if not p.is_alive(): count += 1 if count == len(process_list): break print(dic) # ... """ # ##################### 進程間的數據其餘電腦 ##################### """ def task(arg,dic): pass if __name__ == '__main__': while True: # 鏈接上指定的服務器 # 去機器上獲取url url = 'adfasdf' p = multiprocessing.Process(target=task, args=(url,)) p.start() """
# by luffycity.com import time import threading import multiprocessing lock = multiprocessing.RLock() def task(arg): print('鬼子來了') lock.acquire() time.sleep(2) print(arg) lock.release() if __name__ == '__main__': p1 = multiprocessing.Process(target=task,args=(1,)) p1.start() p2 = multiprocessing.Process(target=task, args=(2,)) p2.start()
# by luffycity.com import time from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor def task(arg): time.sleep(2) print(arg) if __name__ == '__main__': pool = ProcessPoolExecutor(5) for i in range(10): pool.submit(task,i)
# by luffycity.com import requests from bs4 import BeautifulSoup from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor # 模擬瀏覽器發送請求 # 內部建立 sk = socket.socket() # 和抽屜進行socket鏈接 sk.connect(...) # sk.sendall('...') # sk.recv(...) def task(url): print(url) r1 = requests.get( url=url, headers={ 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36' } ) # 查看下載下來的文本信息 soup = BeautifulSoup(r1.text,'html.parser') print(soup.text) # content_list = soup.find('div',attrs={'id':'content-list'}) # for item in content_list.find_all('div',attrs={'class':'item'}): # title = item.find('a').text.strip() # target_url = item.find('a').get('href') # print(title,target_url) def run(): pool = ThreadPoolExecutor(5) for i in range(1,50): pool.submit(task,'https://dig.chouti.com/all/hot/recent/%s' %i) if __name__ == '__main__': run()
# by luffycity.com import multiprocessing import time def task(arg, dic): time.sleep(2) dic[arg] = 100 if __name__ == '__main__': m = multiprocessing.Manager() dic = {} process_list = [] for i in range(10): p = multiprocessing.Process(target=task, args=(i, dic,)) p.start() print('end')