Python Day 35 併發編程(守護線程,鎖,信號量,事件,條件,定時器,線程隊列,Python標準模塊--concurrent.futures，協程，Greenlet模塊，Gevent模塊)

時間 2019-11-09

標籤 python day 併發編程守護線程信號事件條件定時器隊列標準模塊 concurrent.futures concurrent futures greenlet gevent 欄目 Python 简体版

原文原文鏈接

守護線程

不管是進程仍是線程,都遵循:守護xx會等待主xx運行完畢後銷燬.須要強調的是:運行完畢並不是終止運行
守護進程 會等待主進程的代碼執行結束而結束

    p=Process(target=sayhi,args=('egon',))
    p.daemon=True #必須在t.start()以前設置

守護線程 會等待主線程執行完畢才結束,主線程會等待全部子線程結束而結束

    t=Thread(target=sayhi,args=('egon',))
    t.setDaemon(True) #必須在t.start()以前設置

1 主進程在其代碼結束後就已經算運行完畢了（守護進程在此時就被回收）,而後主進程會一直等非守護的子進程都運行完畢後回收子進程的資源(不然會產生殭屍進程)，纔會結束， 
2 主線程在其餘非守護線程運行完畢後纔算運行完畢（守護線程在此時就被回收）。由於主線程的結束意味着進程的結束，進程總體的資源都將被回收，而進程必須保證非守護線程都運行完畢後才能結束。

import time
from threading import Thread
from multiprocessing import Process

def func1():
    time.sleep(3)
    print('in func1')

def func2():
    while True:
        time.sleep(0.5)
        print('in func2')

def func3():
        time.sleep(4)
        print('in func3')

if __name__ == '__main__':
    Thread(target=func1).start()
    Thread(target=func3).start()
    t = Thread(target=func2)
    t.setDaemon(True)
    t.start()
    print('主線程')
    # time.sleep(15)
    # print('====================================')
    # Process(target=func1).start()
    # p = Process(target=func2)
    # p.daemon=True
    # p.start()
    # print('主進程')

守護進程和守護線程

鎖

進程和線程都是數據不安全的,當多個線程對與global的變量進行賦值操做,會產生問題python

from threading import Thread
import os,time
def work():
    global n
    temp=n
    time.sleep(0.1)
    n=temp-1
if __name__ == '__main__':
    n=100
    l=[]
    for i in range(100):
        p=Thread(target=work)
        l.append(p)
        p.start()
    for p in l:
        p.join()

    print(n) #結果可能爲99

多個線程搶佔資源的狀況

import threading
R=threading.Lock()
R.acquire()
'''
對公共數據的操做
'''
R.release()

同步鎖

from threading import Thread,Lock
import os,time
def work():
    global n
    lock.acquire()
    temp=n
    time.sleep(0.1)
    n=temp-1
    lock.release()
if __name__ == '__main__':
    lock=Lock()
    n=100
    l=[]
    for i in range(100):
        p=Thread(target=work)
        l.append(p)
        p.start()
    for p in l:
        p.join()

    print(n) #結果確定爲0，由原來的併發執行變成串行，犧牲了執行效率保證了數據安全

同步鎖的引用

#不加鎖:併發執行,速度快,數據不安全
from threading import current_thread,Thread,Lock
import os,time
def task():
    global n
    print('%s is running' %current_thread().getName())
    temp=n
    time.sleep(0.5)
    n=temp-1


if __name__ == '__main__':
    n=100
    lock=Lock()
    threads=[]
    start_time=time.time()
    for i in range(100):
        t=Thread(target=task)
        threads.append(t)
        t.start()
    for t in threads:
        t.join()

    stop_time=time.time()
    print('主:%s n:%s' %(stop_time-start_time,n))

'''
Thread-1 is running
Thread-2 is running
......
Thread-100 is running
主:0.5216062068939209 n:99
'''


#不加鎖:未加鎖部分併發執行,加鎖部分串行執行,速度慢,數據安全
from threading import current_thread,Thread,Lock
import os,time
def task():
    #未加鎖的代碼併發運行
    time.sleep(3)
    print('%s start to run' %current_thread().getName())
    global n
    #加鎖的代碼串行運行
    lock.acquire()
    temp=n
    time.sleep(0.5)
    n=temp-1
    lock.release()

if __name__ == '__main__':
    n=100
    lock=Lock()
    threads=[]
    start_time=time.time()
    for i in range(100):
        t=Thread(target=task)
        threads.append(t)
        t.start()
    for t in threads:
        t.join()
    stop_time=time.time()
    print('主:%s n:%s' %(stop_time-start_time,n))

'''
Thread-1 is running
Thread-2 is running
......
Thread-100 is running
主:53.294203758239746 n:0
'''

#有的同窗可能有疑問:既然加鎖會讓運行變成串行,那麼我在start以後當即使用join,就不用加鎖了啊,也是串行的效果啊
#沒錯:在start以後馬上使用jion,確定會將100個任務的執行變成串行,毫無疑問,最終n的結果也確定是0,是安全的,但問題是
#start後當即join:任務內的全部代碼都是串行執行的,而加鎖,只是加鎖的部分即修改共享數據的部分是串行的
#單從保證數據安全方面,兩者均可以實現,但很明顯是加鎖的效率更高.
from threading import current_thread,Thread,Lock
import os,time
def task():
    time.sleep(3)
    print('%s start to run' %current_thread().getName())
    global n
    temp=n
    time.sleep(0.5)
    n=temp-1


if __name__ == '__main__':
    n=100
    lock=Lock()
    start_time=time.time()
    for i in range(100):
        t=Thread(target=task)
        t.start()
        t.join()
    stop_time=time.time()
    print('主:%s n:%s' %(stop_time-start_time,n))

'''
Thread-1 start to run
Thread-2 start to run
......
Thread-100 start to run
主:350.6937336921692 n:0 #耗時是多麼的恐怖
'''

）

互斥鎖與join的區別

互斥鎖與join的區別

信號量

同進程的同樣

Semaphore管理一個內置的計數器，
每當調用acquire()時內置計數器-1；
調用release() 時內置計數器+1；
計數器不能小於0；當計數器爲0時，acquire()將阻塞線程直到其餘線程調用release()。

與進程池是徹底不一樣的概念，進程池Pool(4)，最大隻能產生4個進程，並且從頭至尾都只是這四個進程，不會產生新的，而信號量是產生一堆線程/進程

實例：(同時只有5個線程能夠得到semaphore,便可以限制最大鏈接數爲5)：

from threading import Thread,Semaphore
import threading
import time
# def func():
#     if sm.acquire():
#         print (threading.currentThread().getName() + ' get semaphore')
#         time.sleep(2)
#         sm.release()
def func():
    sm.acquire()
    print('%s get sm' %threading.current_thread().getName())
    time.sleep(3)
    sm.release()
if __name__ == '__main__':
    sm=Semaphore(5)
    for i in range(23):
        t=Thread(target=func)
        t.start()

實例

實例

事件

同進程的同樣

線程的一個關鍵特性是每一個線程都是獨立運行且狀態不可預測。
若是程序中的其 他線程須要經過判斷某個線程的狀態來肯定本身下一步的操做,這時線程同步問題就會變得很是棘手。
爲了解決這些問題,咱們須要使用threading庫中的Event對象。 
對象包含一個可由線程設置的信號標誌,它容許線程等待某些事件的發生。
在 初始狀況下,Event對象中的信號標誌被設置爲假。
若是有線程等待一個Event對象, 而這個Event對象的標誌爲假,那麼這個線程將會被一直阻塞直至該標誌爲真。
一個線程若是將一個Event對象的信號標誌設置爲真,它將喚醒全部等待這個Event對象的線程。
若是一個線程等待一個已經被設置爲真的Event對象,那麼它將忽略這個事件, 繼續執行

event.isSet()：返回event的狀態值；
event.wait()：若是 event.isSet()==False將阻塞線程；
event.set()： 設置event的狀態值爲True，全部阻塞池的線程激活進入就緒狀態， 等待操做系統調度；
event.clear()：恢復event的狀態值爲False。

import threading
import time,random
from threading import Thread,Event

def conn_mysql():
    count=1
    while not event.is_set():
        if count > 3:
            raise TimeoutError('連接超時')
        print('<%s>第%s次嘗試連接' % (threading.current_thread().getName(), count))
        event.wait(0.5)
        count+=1
    print('<%s>連接成功' %threading.current_thread().getName())


def check_mysql():
    print('\033[45m[%s]正在檢查mysql\033[0m' % threading.current_thread().getName())
    time.sleep(random.randint(2,4))
    event.set()
if __name__ == '__main__':
    event=Event()
    conn1=Thread(target=conn_mysql)
    conn2=Thread(target=conn_mysql)
    check=Thread(target=check_mysql)

    conn1.start()
    conn2.start()
    check.start()

實例

實例

條件

使得線程等待，只有知足某條件時，才釋放n個線程mysql

Python提供的Condition對象提供了對複雜線程同步問題的支持。
Condition被稱爲條件變量，除了提供與Lock相似的acquire和release方法外，還提供了wait和notify方法。
線程首先acquire一個條件變量，而後判斷一些條件。若是條件不知足則wait；
若是條件知足，進行一些處理改變條件後，經過notify方法通知其餘線程，其餘處於wait狀態的線程接到通知後會從新判斷條件。
不斷的重複這一過程，從而解決複雜的同步問題。

import threading

def run(n):
    con.acquire()
    con.wait()
    print("run the thread: %s" % n)
    con.release()

if __name__ == '__main__':

    con = threading.Condition()
    for i in range(10):
        t = threading.Thread(target=run, args=(i,))
        t.start()

    while True:
        inp = input('>>>')
        if inp == 'q':
            break
        con.acquire()
        con.notify(int(inp))
        con.release()
        print('****')

實例

定時器

定時器，指定n秒後執行某個操做git

from threading import Timer
 
def hello():
    print("hello, world")
 
t = Timer(1, hello)
t.start()  # after 1 seconds, "hello, world" will be printed

實例

線程隊列

queue隊列：使用import queue，用法與進程Queue同樣github

queue is especially useful in threaded programming when information must be exchanged safely between multiple threads.sql

import queue

q=queue.Queue()
q.put('first')
q.put('second')
q.put('third')

print(q.get())
print(q.get())
print(q.get())
'''
結果(先進先出):
first
second
third
'''

先進先出

class queue.Queue(maxsize=0) #先進先出

import queue

q=queue.LifoQueue()
q.put('first')
q.put('second')
q.put('third')

print(q.get())
print(q.get())
print(q.get())
'''
結果(後進先出):
third
second
first
'''

後進先出

class queue.LifoQueue(maxsize=0) #last in fisrt out

import queue

q=queue.PriorityQueue()
#put進入一個元組,元組的第一個元素是優先級(一般是數字,也能夠是非數字之間的比較),數字越小優先級越高
q.put((20,'a'))
q.put((10,'b'))
q.put((30,'c'))

print(q.get())
print(q.get())
print(q.get())
'''
結果(數字越小優先級越高,優先級高的優先出隊):
(10, 'b')
(20, 'a')
(30, 'c')
'''

優先級隊列

class queue.PriorityQueue(maxsize=0) #存儲數據時可設置優先級的隊列

Constructor for a priority queue. maxsize is an integer that sets the upperbound limit on the number of items that can be placed in the queue. Insertion will block once this size has been reached, until queue items are consumed. If maxsize is less than or equal to zero, the queue size is infinite.

The lowest valued entries are retrieved first (the lowest valued entry is the one returned by sorted(list(entries))[0]). A typical pattern for entries is a tuple in the form: (priority_number, data).

exception queue.Empty
Exception raised when non-blocking get() (or get_nowait()) is called on a Queue object which is empty.

exception queue.Full
Exception raised when non-blocking put() (or put_nowait()) is called on a Queue object which is full.

Queue.qsize()
Queue.empty() #return True if empty  
Queue.full() # return True if full 
Queue.put(item, block=True, timeout=None)
Put item into the queue. If optional args block is true and timeout is None (the default), block if necessary until a free slot is available. If timeout is a positive number, it blocks at most timeout seconds and raises the Full exception if no free slot was available within that time. Otherwise (block is false), put an item on the queue if a free slot is immediately available, else raise the Full exception (timeout is ignored in that case).

Queue.put_nowait(item)
Equivalent to put(item, False).

Queue.get(block=True, timeout=None)
Remove and return an item from the queue. If optional args block is true and timeout is None (the default), block if necessary until an item is available. If timeout is a positive number, it blocks at most timeout seconds and raises the Empty exception if no item was available within that time. Otherwise (block is false), return an item if one is immediately available, else raise the Empty exception (timeout is ignored in that case).

Queue.get_nowait()
Equivalent to get(False).

Two methods are offered to support tracking whether enqueued tasks have been fully processed by daemon consumer threads.

Queue.task_done()
Indicate that a formerly enqueued task is complete. Used by queue consumer threads. For each get() used to fetch a task, a subsequent call to task_done() tells the queue that the processing on the task is complete.

If a join() is currently blocking, it will resume when all items have been processed (meaning that a task_done() call was received for every item that had been put() into the queue).

Raises a ValueError if called more times than there were items placed in the queue.

Queue.join() block直到queue被消費完畢

更多方法說明

更多方法說明

Python標準模塊--concurrent.futures

1 介紹
concurrent.futures模塊提供了高度封裝的異步調用接口
ThreadPoolExecutor：線程池，提供異步調用
ProcessPoolExecutor: 進程池，提供異步調用
Both implement the same interface, which is defined by the abstract Executor class.

2 基本方法
submit(fn, *args, **kwargs)
異步提交任務

map(func, *iterables, timeout=None, chunksize=1) 
取代for循環submit的操做,和進程的map有區別,須要手動shutdown()

shutdown(wait=True) 
至關於進程池的pool.close()+pool.join()操做
wait=True，等待池內全部任務執行完畢回收完資源後才繼續
wait=False，當即返回，並不會等待池內的任務執行完畢
但無論wait參數爲什麼值，整個程序都會等到全部任務執行完畢
submit和map必須在shutdown以前

result(timeout=None)
取得結果

add_done_callback(fn)
回調函數

#介紹
The ProcessPoolExecutor class is an Executor subclass that uses a pool of processes to execute calls asynchronously. ProcessPoolExecutor uses the multiprocessing module, which allows it to side-step the Global Interpreter Lock but also means that only picklable objects can be executed and returned.

class concurrent.futures.ProcessPoolExecutor(max_workers=None, mp_context=None)
An Executor subclass that executes calls asynchronously using a pool of at most max_workers processes. If max_workers is None or not given, it will default to the number of processors on the machine. If max_workers is lower or equal to 0, then a ValueError will be raised.


#用法
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor

import os,time,random
def task(n):
    print('%s is runing' %os.getpid())
    time.sleep(random.randint(1,3))
    return n**2

if __name__ == '__main__':

    executor=ProcessPoolExecutor(max_workers=3)

    futures=[]
    for i in range(11):
        future=executor.submit(task,i)
        futures.append(future)
    executor.shutdown(True)
    print('+++>')
    for future in futures:
        print(future.result())

ProcessPoolExecutor

#介紹
ThreadPoolExecutor is an Executor subclass that uses a pool of threads to execute calls asynchronously.
class concurrent.futures.ThreadPoolExecutor(max_workers=None, thread_name_prefix='')
An Executor subclass that uses a pool of at most max_workers threads to execute calls asynchronously.

Changed in version 3.5: If max_workers is None or not given, it will default to the number of processors on the machine, multiplied by 5, assuming that ThreadPoolExecutor is often used to overlap I/O instead of CPU work and the number of workers should be higher than the number of workers for ProcessPoolExecutor.

New in version 3.6: The thread_name_prefix argument was added to allow users to control the threading.Thread names for worker threads created by the pool for easier debugging.

#用法
與ProcessPoolExecutor相同

ThreadPoolExecutor

from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor

import os,time,random
def task(n):
    print('%s is runing' %os.getpid())
    time.sleep(random.randint(1,3))
    return n**2

if __name__ == '__main__':

    executor=ThreadPoolExecutor(max_workers=3)

    # for i in range(11):
    #     future=executor.submit(task,i)

    executor.map(task,range(1,12)) #map取代了for+submit

map的用法

from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
from multiprocessing import Pool
import requests
import json
import os

def get_page(url):
    print('<進程%s> get %s' %(os.getpid(),url))
    respone=requests.get(url)
    if respone.status_code == 200:
        return {'url':url,'text':respone.text}

def parse_page(res):
    res=res.result()
    print('<進程%s> parse %s' %(os.getpid(),res['url']))
    parse_res='url:<%s> size:[%s]\n' %(res['url'],len(res['text']))
    with open('db.txt','a') as f:
        f.write(parse_res)


if __name__ == '__main__':
    urls=[
        'https://www.baidu.com',
        'https://www.python.org',
        'https://www.openstack.org',
        'https://help.github.com/',
        'http://www.sina.com.cn/'
    ]

    # p=Pool(3)
    # for url in urls:
    #     p.apply_async(get_page,args=(url,),callback=pasrse_page)
    # p.close()
    # p.join()

    p=ProcessPoolExecutor(3)
    for url in urls:
        p.submit(get_page,url).add_done_callback(parse_page) #parse_page拿到的是一個future對象obj，須要用obj.result()拿到結果

回調函數

協程

以前咱們學習了線程、進程的概念，瞭解了在操做系統中進程是資源分配的最小單位,線程是CPU調度的最小單位。按道理來講咱們已經算是把cpu的利用率提升不少了。可是咱們知道不管是建立多進程仍是建立多線程來解決問題，都要消耗必定的時間來建立進程、建立線程、以及管理他們之間的切換。

　　隨着咱們對於效率的追求不斷提升，基於單線程來實現併發又成爲一個新的課題，即只用一個主線程（很明顯可利用的cpu只有一個）狀況下實現併發。這樣就能夠節省建立線進程所消耗的時間。

　　爲此咱們須要先回顧下併發的本質：切換+保存狀態

 　　cpu正在運行一個任務，會在兩種狀況下切走去執行其餘的任務（切換由操做系統強制控制），一種狀況是該任務發生了阻塞，另一種狀況是該任務計算的時間過長

　  　

　　ps：在介紹進程理論時，說起進程的三種執行狀態，而線程纔是執行單位，因此也能夠將上圖理解爲線程的三種狀態 

　　 一：其中第二種狀況並不能提高效率，只是爲了讓cpu可以雨露均沾，實現看起來全部任務都被「同時」執行的效果，若是多個任務都是純計算的，這種切換反而會下降效率。

　　爲此咱們能夠基於yield來驗證。yield自己就是一種在單線程下能夠保存任務運行狀態的方法，咱們來簡單複習一下：

#1 yiled能夠保存狀態，yield的狀態保存與操做系統的保存線程狀態很像，可是yield是代碼級別控制的，更輕量級
#2 send能夠把一個函數的結果傳給另一個函數，以此實現單線程內程序之間的切換

引子

#串行執行
import time
def consumer(res):
    '''任務1:接收數據,處理數據'''
    pass

def producer():
    '''任務2:生產數據'''
    res=[]
    for i in range(10000000):
        res.append(i)
    return res

start=time.time()
#串行執行
res=producer()
consumer(res) #寫成consumer(producer())會下降執行效率
stop=time.time()
print(stop-start) #1.5536692142486572



#基於yield併發執行
import time
def consumer():
    '''任務1:接收數據,處理數據'''
    while True:
        x=yield

def producer():
    '''任務2:生產數據'''
    g=consumer()
    next(g)
    for i in range(10000000):
        g.send(i)

start=time.time()
#基於yield保存狀態,實現兩個任務直接來回切換,即併發的效果
#PS:若是每一個任務中都加上打印,那麼明顯地看到兩個任務的打印是你一次我一次,即併發執行的.
producer()

stop=time.time()
print(stop-start) #2.0272178649902344

單純地切換反而會下降運行效率

二：第一種狀況的切換。在任務一遇到io狀況下，切到任務二去執行，這樣就能夠利用任務一阻塞的時間完成任務二的計算，效率的提高就在於此。編程

import time
def consumer():
    '''任務1:接收數據,處理數據'''
    while True:
        x=yield

def producer():
    '''任務2:生產數據'''
    g=consumer()
    next(g)
    for i in range(10000000):
        g.send(i)
        time.sleep(2)

start=time.time()
producer() #併發執行,可是任務producer遇到io就會阻塞住,並不會切到該線程內的其餘任務去執行

stop=time.time()
print(stop-start)

yield沒法作到遇到io阻塞

　對於單線程下，咱們不可避免程序中出現io操做，但若是咱們能在本身的程序中（即用戶程序級別，而非操做系統級別）控制單線程下的多個任務能在一個任務遇到io阻塞時就切換到另一個任務去計算，這樣就保證了該線程可以最大限度地處於就緒態，即隨時均可以被cpu執行的狀態，至關於咱們在用戶程序級別將本身的io操做最大限度地隱藏起來，從而能夠迷惑操做系統，讓其看到：該線程好像是一直在計算，io比較少，從而更多的將cpu的執行權限分配給咱們的線程。json

協程的本質就是在單線程下，由用戶本身控制一個任務遇到io阻塞了就切換另一個任務去執行，以此來提高效率。爲了實現它，咱們須要找尋一種能夠同時知足如下條件的解決方案：數組

#1. 能夠控制多個任務之間的切換，切換以前將任務的狀態保存下來，以便從新運行時，能夠基於暫停的位置繼續執行。
#2. 做爲1的補充：能夠檢測io操做，在遇到io操做的狀況下才發生切換

協程：是單線程下的併發，又稱微線程，纖程。英文名Coroutine。一句話說明什麼是線程：協程是一種用戶態的輕量級線程，即協程是由用戶程序本身控制調度的。、安全

須要強調的是：多線程

#1. python的線程屬於內核級別的，即由操做系統控制調度（如單線程遇到io或執行時間過長就會被迫交出cpu執行權限，切換其餘線程運行）
#2. 單線程內開啓協程，一旦遇到io，就會從應用程序級別（而非操做系統）控制切換，以此來提高效率（！！！非io操做的切換與效率無關）

對比操做系統控制線程的切換，用戶在單線程內控制協程的切換

優勢以下：

#1. 協程的切換開銷更小，屬於程序級別的切換，操做系統徹底感知不到，於是更加輕量級
#2. 單線程內就能夠實現併發的效果，最大限度地利用cpu

缺點以下：

#1. 協程的本質是單線程下，沒法利用多核，能夠是一個程序開啓多個進程，每一個進程內開啓多個線程，每一個線程內開啓協程
#2. 協程指的是單個線程，於是一旦協程出現阻塞，將會阻塞整個線程

總結協程特色：

必須在只有一個單線程裏實現併發
修改共享數據不需加鎖
用戶程序裏本身保存多個控制流的上下文棧
附加：一個協程遇到IO操做自動切換到其它協程（如何實現檢測IO，yield、greenlet都沒法實現，就用到了gevent模塊（select機制））

Greenlet模塊

安裝：pip3 install greenlet

from greenlet import greenlet

def eat(name):
    print('%s eat 1' %name)
    g2.switch('egon')
    print('%s eat 2' %name)
    g2.switch()
def play(name):
    print('%s play 1' %name)
    g1.switch()
    print('%s play 2' %name)

g1=greenlet(eat)
g2=greenlet(play)

g1.switch('egon')#能夠在第一次switch時傳入參數，之後都不須要

greenlet實現狀態切換

單純的切換（在沒有io的狀況下或者沒有重複開闢內存空間的操做），反而會下降程序的執行速度

#順序執行
import time
def f1():
    res=1
    for i in range(100000000):
        res+=i

def f2():
    res=1
    for i in range(100000000):
        res*=i

start=time.time()
f1()
f2()
stop=time.time()
print('run time is %s' %(stop-start)) #10.985628366470337

#切換
from greenlet import greenlet
import time
def f1():
    res=1
    for i in range(100000000):
        res+=i
        g2.switch()

def f2():
    res=1
    for i in range(100000000):
        res*=i
        g1.switch()

start=time.time()
g1=greenlet(f1)
g2=greenlet(f2)
g1.switch()
stop=time.time()
print('run time is %s' %(stop-start)) # 52.763017892837524

效率對比

greenlet只是提供了一種比generator更加便捷的切換方式，當切到一個任務執行時若是遇到io，那就原地阻塞，仍然是沒有解決遇到IO自動切換來提高效率的問題。

單線程裏的這20個任務的代碼一般會既有計算操做又有阻塞操做，咱們徹底能夠在執行任務1時遇到阻塞，就利用阻塞的時間去執行任務2。。。。如此，才能提升效率，這就用到了Gevent模塊。

Gevent模塊

安裝：pip3 install gevent

Gevent 是一個第三方庫，能夠輕鬆經過gevent實現併發同步或異步編程，在gevent中用到的主要模式是Greenlet, 它是以C擴展模塊形式接入Python的輕量級協程。 Greenlet所有運行在主程序操做系統進程的內部，但它們被協做式地調度。

g1=gevent.spawn(func,1,,2,3,x=4,y=5)建立一個協程對象g1，spawn括號內第一個參數是函數名，如eat，後面能夠有多個參數，能夠是位置實參或關鍵字實參，都是傳給函數eat的

g2=gevent.spawn(func2)

g1.join() #等待g1結束

g2.join() #等待g2結束

#或者上述兩步合做一步：gevent.joinall([g1,g2])

g1.value#拿到func1的返回值

用法介紹

import gevent
def eat(name):
    print('%s eat 1' %name)
    gevent.sleep(2)
    print('%s eat 2' %name)

def play(name):
    print('%s play 1' %name)
    gevent.sleep(1)
    print('%s play 2' %name)


g1=gevent.spawn(eat,'egon')
g2=gevent.spawn(play,name='egon')
g1.join()
g2.join()
#或者gevent.joinall([g1,g2])
print('主')

例：遇到io主動切換

上例gevent.sleep(2)模擬的是gevent能夠識別的io阻塞,而time.sleep(2)或其餘的阻塞,gevent是不能直接識別的須要用下面一行代碼,打補丁,就能夠識別了

from gevent import monkey;monkey.patch_all()必須放到被打補丁者的前面，如time，socket模塊以前

或者咱們乾脆記憶成：要用gevent，須要將from gevent import monkey;monkey.patch_all()放到文件的開頭

from gevent import monkey;monkey.patch_all()

import gevent
import time
def eat():
    print('eat food 1')
    time.sleep(2)
    print('eat food 2')

def play():
    print('play 1')
    time.sleep(1)
    print('play 2')

g1=gevent.spawn(eat)
g2=gevent.spawn(play)
gevent.joinall([g1,g2])
print('主')

例

咱們能夠用threading.current_thread().getName()來查看每一個g1和g2，查看的結果爲DummyThread-n，即假線程

from gevent import monkey;monkey.patch_all()
import threading
import gevent
import time
def eat():
    print(threading.current_thread().getName())
    print('eat food 1')
    time.sleep(2)
    print('eat food 2')

def play():
    print(threading.current_thread().getName())
    print('play 1')
    time.sleep(1)
    print('play 2')

g1=gevent.spawn(eat)
g2=gevent.spawn(play)
gevent.joinall([g1,g2])
print('主')

查看threading.current_thread().getName()

查看threading.current_thread().getName()

Gevent之同步與異步

from gevent import spawn,joinall,monkey;monkey.patch_all()

import time
def task(pid):
    """
    Some non-deterministic task
    """
    time.sleep(0.5)
    print('Task %s done' % pid)


def synchronous():  # 同步
    for i in range(10):
        task(i)

def asynchronous(): # 異步
    g_l=[spawn(task,i) for i in range(10)]
    joinall(g_l)
    print('DONE')
    
if __name__ == '__main__':
    print('Synchronous:')
    synchronous()
    print('Asynchronous:')
    asynchronous()
#  上面程序的重要部分是將task函數封裝到Greenlet內部線程的gevent.spawn。
#  初始化的greenlet列表存放在數組threads中，此數組被傳給gevent.joinall 函數，
#  後者阻塞當前流程，並執行全部給定的greenlet任務。執行流程只會在 全部greenlet執行完後纔會繼續向下走。

View Code

from gevent import monkey;monkey.patch_all()
import gevent
import requests
import time

def get_page(url):
    print('GET: %s' %url)
    response=requests.get(url)
    if response.status_code == 200:
        print('%d bytes received from %s' %(len(response.text),url))


start_time=time.time()
gevent.joinall([
    gevent.spawn(get_page,'https://www.python.org/'),
    gevent.spawn(get_page,'https://www.yahoo.com/'),
    gevent.spawn(get_page,'https://github.com/'),
])
stop_time=time.time()
print('run time is %s' %(stop_time-start_time))

協程應用：爬蟲

經過gevent實現單線程下的socket併發

注意：from gevent import monkey;monkey.patch_all()必定要放到導入socket模塊以前，不然gevent沒法識別socket的阻塞

from gevent import monkey;monkey.patch_all()
from socket import *
import gevent

#若是不想用money.patch_all()打補丁,能夠用gevent自帶的socket
# from gevent import socket
# s=socket.socket()

def server(server_ip,port):
    s=socket(AF_INET,SOCK_STREAM)
    s.setsockopt(SOL_SOCKET,SO_REUSEADDR,1)
    s.bind((server_ip,port))
    s.listen(5)
    while True:
        conn,addr=s.accept()
        gevent.spawn(talk,conn,addr)

def talk(conn,addr):
    try:
        while True:
            res=conn.recv(1024)
            print('client %s:%s msg: %s' %(addr[0],addr[1],res))
            conn.send(res.upper())
    except Exception as e:
        print(e)
    finally:
        conn.close()

if __name__ == '__main__':
    server('127.0.0.1',8080)

server

from socket import *

client=socket(AF_INET,SOCK_STREAM)
client.connect(('127.0.0.1',8080))


while True:
    msg=input('>>: ').strip()
    if not msg:continue

    client.send(msg.encode('utf-8'))
    msg=client.recv(1024)
    print(msg.decode('utf-8'))

client

from threading import Thread
from socket import *
import threading

def client(server_ip,port):
    c=socket(AF_INET,SOCK_STREAM) #套接字對象必定要加到函數內，即局部名稱空間內，放在函數外則被全部線程共享，則你們公用一個套接字對象，那麼客戶端端口永遠同樣了
    c.connect((server_ip,port))

    count=0
    while True:
        c.send(('%s say hello %s' %(threading.current_thread().getName(),count)).encode('utf-8'))
        msg=c.recv(1024)
        print(msg.decode('utf-8'))
        count+=1
if __name__ == '__main__':
    for i in range(500):
        t=Thread(target=client,args=('127.0.0.1',8080))
        t.start()

多線程併發多個客戶端