import time def crawl_page(url): print('crawling {}'.format(url)) sleep_time = int(url.split('_')[-1]) time.sleep(sleep_time) print('OK {}'.format(url)) def main(urls): for url in urls: crawl_page(url) %time main(['url_1', 'url_2', 'url_3', 'url_4']) ######### 輸出 ########## crawling url_1 OK url_1 crawling url_2 OK url_2 crawling url_3 OK url_3 crawling url_4 OK url_4 Wall time: 10 s
import asyncio import nest_asyncio nest_asyncio.apply() async def crawl_page(url): print('crawling {}'.format(url)) sleep_time = int(url.split('_')[-1]) await asyncio.sleep(sleep_time) print('OK {}'.format(url)) async def main(urls): for url in urls: await crawl_page(url) %time asyncio.run(main(['url_1', 'url_2', 'url_3', 'url_4'])) ########## 輸出 ########## crawling url_1 OK url_1 crawling url_2 OK url_2 crawling url_3 OK url_3 crawling url_4 OK url_4 Wall time: 10 s
首先來看 import asyncio,這個庫包含了大部分咱們實現協程所需的魔法工具。async 修飾詞聲明異步函數,因而,這裏的 crawl_page 和 main 都變成了異步函數。能夠經過 await 來調用異步函數。await 執行的效果和 Python 正常執行是同樣的,也就是說程序會阻塞在這裏,進入被調用的協程函數,執行完畢返回後再繼續,而這也是 await 的字面意思。代碼中 await asyncio.sleep(sleep_time) 會在這裏休息若干秒,await crawl_page(url) 則會執行 crawl_page() 函數。session
上面的代碼仍然是同步執行的,因此一樣是用了10秒。app
若是要異步執行,能夠用Task任務的概念。dom
import asyncio import nest_asyncio nest_asyncio.apply() async def crawl_page(url): print('crawling {}'.format(url)) sleep_time = int(url.split('_')[-1]) await asyncio.sleep(sleep_time) print('OK {}'.format(url)) async def main(urls): tasks = [asyncio.create_task(crawl_page(url)) for url in urls] for task in tasks: await task %time asyncio.run(main(['url_1', 'url_2', 'url_3', 'url_4'])) ######### 輸出 ########## crawling url_1 crawling url_2 crawling url_3 crawling url_4 OK url_1 OK url_2 OK url_3 OK url_4 Wall time: 4.01 s
在create_task建立任務後,任務就會立刻由事件循環調度執行。若是不調用await task,代碼就不會阻塞。咱們想要等全部任務結束再往下走,所以用for task in tasks: await task。如今時間就4秒左右,能夠用await asyncio.gather(*tasks)來代替for task in tasks: await task。異步
import asyncio import nest_asyncio nest_asyncio.apply() import time async def crawl_page(url): print('crawling {}'.format(url)) sleep_time = int(url.split('_')[-1]) #await asyncio.sleep(sleep_time) await asyncio.sleep(sleep_time) print('OK {}'.format(url)) async def main(urls): tasks = [asyncio.create_task(crawl_page(url)) for url in urls ] await asyncio.gather(*tasks) %time asyncio.run(main(['url_1', 'url_2', 'url_3', 'url_4']))
看代碼:async
import asyncio import nest_asyncio nest_asyncio.apply() async def worker_1(): await asyncio.sleep(1) return 1 async def worker_2(): await asyncio.sleep(2) return 2 / 0 async def worker_3(): await asyncio.sleep(3) print('over worker_3') return 3 async def main(): task_1 = asyncio.create_task(worker_1()) task_2 = asyncio.create_task(worker_2()) task_3 = asyncio.create_task(worker_3()) await asyncio.sleep(2) task_3.cancel() res = await asyncio.gather(task_1, task_2, task_3, return_exceptions=True) print(res) %time asyncio.run(main()) ########## 輸出 ########## # [1, ZeroDivisionError('division by zero'), CancelledError()] # Wall time: 2 s
注意return_exceptions參數,上面代碼在執worker_2會拋出除0的異常,而worker_2中沒有作try..catch捕捉錯誤,本應該會程序會中止,因爲設置了return_exceptions=True,因此沒有影響到其餘任務的執行。而 CancelledError()表示task_3被cancel()取消掉.函數
show the code:工具
import asyncio import random async def consumer(queue, id): while True: val = await queue.get() print('{} get a val: {}'.format(id, val)) await asyncio.sleep(1) async def producer(queue, id): for i in range(5): val = random.randint(1, 10) await queue.put(val) print('{} put a val: {}'.format(id, val)) await asyncio.sleep(2) async def main(): queue = asyncio.Queue() consumer_1 = asyncio.create_task(consumer(queue, 'consumer_1')) consumer_2 = asyncio.create_task(consumer(queue, 'consumer_2')) producer_1 = asyncio.create_task(producer(queue, 'producer_1')) producer_2 = asyncio.create_task(producer(queue, 'producer_2')) await asyncio.sleep(10) consumer_1.cancel() consumer_2.cancel() await asyncio.gather(consumer_1, consumer_2, producer_1, producer_2, return_exceptions=True) %time asyncio.run(main()) ########## 輸出 ########## # producer_1 put a val: 5 # producer_2 put a val: 3 # consumer_1 get a val: 5 # consumer_2 get a val: 3 # producer_1 put a val: 1 # producer_2 put a val: 3 # consumer_2 get a val: 1 # consumer_1 get a val: 3 # producer_1 put a val: 6 # producer_2 put a val: 10 # consumer_1 get a val: 6 # consumer_2 get a val: 10 # producer_1 put a val: 4 # producer_2 put a val: 5 # consumer_2 get a val: 4 # consumer_1 get a val: 5 # producer_1 put a val: 2 # producer_2 put a val: 8 # consumer_1 get a val: 2 # consumer_2 get a val: 8 # Wall time: 10 s
import requests from bs4 import BeautifulSoup def main(): url = "https://movie.douban.com/cinema/later/beijing/" init_page = requests.get(url).content init_soup = BeautifulSoup(init_page, 'lxml') all_movies = init_soup.find('div', id="showing-soon") for each_movie in all_movies.find_all('div', class_="item"): all_a_tag = each_movie.find_all('a') all_li_tag = each_movie.find_all('li') movie_name = all_a_tag[1].text url_to_fetch = all_a_tag[1]['href'] movie_date = all_li_tag[0].text response_item = requests.get(url_to_fetch).content soup_item = BeautifulSoup(response_item, 'lxml') img_tag = soup_item.find('img') print('{} {} {}'.format(movie_name, movie_date, img_tag['src'])) %time main() ########## 輸出 ########## 九龍不敗 07月02日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2560169035.jpg 善良的天使 07月02日 https://img1.doubanio.com/view/photo/s_ratio_poster/public/p2558266159.jpg 別歲 07月02日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2558138041.jpg 上海的女兒 07月02日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2555602094.jpg 愛寵大機密2 07月05日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2555923582.jpg 掃毒2天地對決 07月05日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2560684734.jpg 豬豬俠·難以想象的世界 07月05日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2560664101.jpg 他她他她 07月05日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2559292102.jpg 獅子王 07月12日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2559658750.jpg 命運之夜——天之杯II :迷失之蝶 07月12日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2560749451.jpg 寶萊塢機器人2.0:重生歸來 07月12日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2558657891.jpg 素人特工 07月12日 https://img1.doubanio.com/view/photo/s_ratio_poster/public/p2560447448.jpg 機動戰士高達NT 07月12日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2558661806.jpg 舞動吧!少年 07月12日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2555119986.jpg 嘿,蠢賊 07月16日 https://img1.doubanio.com/view/photo/s_ratio_poster/public/p2560832388.jpg 銀河補習班 07月18日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2560954373.jpg 小小的願望 07月18日 https://img1.doubanio.com/view/photo/s_ratio_poster/public/p2560659129.jpg 匠心 07月18日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2553935771.jpg 豬八戒·傳說 07月19日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2559590242.jpg 刀背藏身 07月19日 https://img1.doubanio.com/view/photo/s_ratio_poster/public/p2557644589.jpg 爲家而戰 07月19日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2559337905.jpg Wall time: 22.1 s
import asyncio import aiohttp from bs4 import BeautifulSoup async def fetch_content(url): async with aiohttp.ClientSession( headers=header, connector=aiohttp.TCPConnector(ssl=False) ) as session: async with session.get(url) as response: return await response.text() async def main(): url = "https://movie.douban.com/cinema/later/beijing/" init_page = await fetch_content(url) init_soup = BeautifulSoup(init_page, 'lxml') movie_names, urls_to_fetch, movie_dates = [], [], [] all_movies = init_soup.find('div', id="showing-soon") for each_movie in all_movies.find_all('div', class_="item"): all_a_tag = each_movie.find_all('a') all_li_tag = each_movie.find_all('li') movie_names.append(all_a_tag[1].text) urls_to_fetch.append(all_a_tag[1]['href']) movie_dates.append(all_li_tag[0].text) tasks = [fetch_content(url) for url in urls_to_fetch] pages = await asyncio.gather(*tasks) for movie_name, movie_date, page in zip(movie_names, movie_dates, pages): soup_item = BeautifulSoup(page, 'lxml') img_tag = soup_item.find('img') print('{} {} {}'.format(movie_name, movie_date, img_tag['src'])) %time asyncio.run(main()) ########## 輸出 ########## 九龍不敗 07月02日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2560169035.jpg 善良的天使 07月02日 https://img1.doubanio.com/view/photo/s_ratio_poster/public/p2558266159.jpg 別歲 07月02日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2558138041.jpg 上海的女兒 07月02日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2555602094.jpg 愛寵大機密2 07月05日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2555923582.jpg 掃毒2天地對決 07月05日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2560684734.jpg 豬豬俠·難以想象的世界 07月05日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2560664101.jpg 他她他她 07月05日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2559292102.jpg 獅子王 07月12日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2559658750.jpg 命運之夜——天之杯II :迷失之蝶 07月12日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2560749451.jpg 寶萊塢機器人2.0:重生歸來 07月12日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2558657891.jpg 素人特工 07月12日 https://img1.doubanio.com/view/photo/s_ratio_poster/public/p2560447448.jpg 機動戰士高達NT 07月12日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2558661806.jpg 舞動吧!少年 07月12日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2555119986.jpg 嘿,蠢賊 07月16日 https://img1.doubanio.com/view/photo/s_ratio_poster/public/p2560832388.jpg 銀河補習班 07月18日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2560954373.jpg 小小的願望 07月18日 https://img1.doubanio.com/view/photo/s_ratio_poster/public/p2560659129.jpg 匠心 07月18日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2553935771.jpg 豬八戒·傳說 07月19日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2559590242.jpg 刀背藏身 07月19日 https://img1.doubanio.com/view/photo/s_ratio_poster/public/p2557644589.jpg 爲家而戰 07月19日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2559337905.jpg Wall time: 5.82 s
極客時間《Python核心技術與實戰》專欄post