1.asyncio模塊react
@asyncio.coroutine def task(): print('start...') yield from asyncio.sleep(5) #不支持HTTP請求,支持TCP請求 #但HTTP請求就是基於TCP封裝的,因此咱們能夠基於TCP協議發送 #HTTP請求 print('end') tasks=[task(),task(),task()] loop=asyncio.get_event_loop() loop.run_until_complete(asyncio.gather(*tasks)) loop.close()
import asyncio def task(host,url='/'): reader,writer=yield from asyncio.open_connection(host,80) request_header_content="GET %s HTTP/1.0\r\nHost: %s\r\n\r\n" %(url,host) request_header_content=bytes(request_header_content,encoding='utf-8') writer.write(request_header_content) yield from writer.drain() text=yield from reader.read() print(host,url,text) tasks=[task('http://www.baidu.com'),task('http://www.cnblogs.com')] loop=asyncio.get_event_loop() loop. run_until_complete(asyncio.gather(*tasks)) loop.close()
import asyncio import requests @asyncio.coroutine def task(fun,*args): print(fun,args) loop=asyncio.get_event_loop() future=loop.run_in_executor(None,fun,*args) response=yield from future print(response.url,response.content) tasks=[ task(requests.get,'http://bing.com'), task(requests.get,'http://cnblogs.com') ] loop=asyncio.get_event_loop() loop.run_until_complete(asyncio.gather(*tasks)) loop.close()
#pip install aiohttp #aiohttp + asyncio import asyncio import aiohttp @asyncio.coroutine def task(url): response=yield from aiohttp.request("GET",url) print(response) tasks=[task('http://bing.com'),task('http://cnblogs.com')] loop=asyncio.get_event_loop() result=loop.run_until_complete(asyncio.gather(*tasks)) loop.close()
2.tornado模塊web
from tornado.httpclient import AsyncHTTPClient from tornado.httpclient import HTTPRequest from tornado import ioloop COUNT=None count=0 def handle_response(response): global count count+=1 if response.error: print('error') else: body=response.body print(body) global COUNT if count==COUNT: ioloop.IOLoop.instance().stop() def fun(): url_list=['http://www.baidu.com','http://www.cnblogs.com'] global COUNT COUNT=len(url_list) for url in url_list: http_client=AsyncHTTPClient() http_client.fetch(HTTPRequest(url),handle_response) ioloop.IOLoop.current().add_callback(fun) ioloop.IOLoop.current().start() #死循環
3.Twisted模塊併發
#爬蟲異步,提升併發 from twisted.web.client import getPage from twisted.internet import reactor,defer def one_done(args): print(args) print(type(args)) def all_done(args): print(args) print(type(args)) reactor.stop() @defer.inlineCallbacks def tasks(url): res=getPage(bytes(url,'utf-8')) res.addCallback(one_done) yield res url_list=['http://www.baidu.com','http://www.cnblogs.com'] def_list=[] for i in url_list: v=tasks(i) def_list.append(v) d=defer.DeferredList(def_list) d.addBoth(all_done) reactor.run() #死循環
4.gevent模塊app
#pip install greenlet #協程的模塊+異步IO #pip install gevent #依賴greenlet模塊 import gevent import requests from gevent.pool import Pool #協程池 from gevent import monkey monkey.patch_all() #封裝成異步IO pool=Pool(3) #限制發送的個數 def task(method,url,req_kwargs): print(method,url,req_kwargs) response=requests.request(method,url,**req_kwargs) print(response.url) print(response.content) # gevent.joinall([ # gevent.spawn(task,method="GET",url="http://cnblogs.com",req_kwargs={}), # gevent.spawn(task,method="GET",url='http://bing.com',req_kwargs={}), # ]) gevent.joinall([ pool.spawn(task,method="GET",url="http://cnblogs.com",req_kwargs={}), pool.spawn(task,method="GET",url='http://bing.com',req_kwargs={}), ])
#gevent+requests import grequests requests_list=[ grequests.get('http://cnblogs.com'), grequests.get('http://bing.com'), grequests.get('http://che.com') ] response_list=grequests.map(requests_list) print(response_list)
總結:異步
自定義異步爬蟲IO時:async
#gevent->Twised->Tornado->asyncio