定義一個特殊的函數,協程對象php
import asyncio import time # 定義了一個特殊的函數 # 特殊:調用後會返回一個協程對象,且函數內部的實現語句不會被當即執行 # 建立一個協程對象 async def test(num): print(num) c = test(10) print(c)
#<coroutine object test at 0x0000025058808A40>
封裝一個任務對象 html
#封裝一個任務對象 async def test(num): print(num) c = test(10) # #根據協程對象封裝了一個任務對象 task = asyncio.ensure_future(c) print(task)
#<Task pending coro=<test() running at H:/autoclient/test/test.py:6>>
建立事件並執行 jquery
#事件循環對象 async def request(url): print('正在請求:',url) time.sleep(2) print('請求完畢!',url) c1 = request('www.1.com') task_A = asyncio.ensure_future(c1) #建立一個事件循環對象 loop = asyncio.get_event_loop() #將任務對象註冊到該對象中而且啓動事件循環 loop.run_until_complete(task_A)
任務對象的綁定回調web
import asyncio import time async def request(url): print('正在請求:',url) time.sleep(2) print('請求完畢!',url) return url #定義一個任務對象的回調函數 #task參數表示的就是該函數被綁定的那個任務對象 def task_callback(task): print('i am task_callback()') print(task.result()) #task.result()返回的就是任務對象對應的特殊函數內部的返回值 c = request('www.xxx.com') task = asyncio.ensure_future(c) task.add_done_callback(task_callback) loop = asyncio.get_event_loop() loop.run_until_complete(task)
多任務異步協程:chrome
import asyncio import time start = time.time() #在特殊函數內部不能夠出現不支持異步模塊相關的代碼 async def request(url): print('正在請求:',url) # time.sleep(2)#time模塊是不支持異步 await asyncio.sleep(2) #阻塞操做必須使用await關鍵字進行掛起 print('請求完畢!',url) return url urls = [ 'www.1.com', 'www.2.com', 'www.3.com' ] def task_callback(task): print(task.result()) tasks = [] #多任務列表:存放多個任務對象 for url in urls: c = request(url) task = asyncio.ensure_future(c) task.add_done_callback(task_callback) tasks.append(task) #將多個任務對象裝在到一個任務列表中 loop = asyncio.get_event_loop() #多任務註冊 #wait就是將任務列表中的任務對象進行掛起 loop.run_until_complete(asyncio.wait(tasks)) print(time.time()-start)
多任務異步爬蟲測試:通常開500個協程flask
flask代碼:api
from flask import Flask import time app = Flask(__name__) @app.route('/bobo') def index_bobo(): time.sleep(2) return 'Hello bobo' @app.route('/jay') def index_jay(): time.sleep(2) return 'Hello jay' @app.route('/tom') def index_tom(): time.sleep(2) return 'Hello tom' if __name__ == '__main__': app.run(threaded=True)
爬蟲測試代碼:瀏覽器
import asyncio import time import requests start = time.time() #在特殊函數內部不能夠出現不支持異步模塊相關的代碼 async def request(url): print('正在請求:',url) response = requests.get(url) return response.text urls = [ 'http://127.0.0.1:5000/bobo', 'http://127.0.0.1:5000/tom', 'http://127.0.0.1:5000/jay' ] def parse(task): page_text = task.result() print(page_text+',請求到的數據!!!') tasks = [] for url in urls: c = request(url) task = asyncio.ensure_future(c) task.add_done_callback(parse) tasks.append(task) loop = asyncio.get_event_loop() loop.run_until_complete(asyncio.wait(tasks)) print(time.time()-start)
aiohttp的使用:網絡
mport asyncio import time import aiohttp start = time.time() #在特殊函數內部不能夠出現不支持異步模塊相關的代碼 #簡單的基本架構: # async def request(url): # with aiohttp.ClientSession() as s: # #s.get/post和requests中的get/post用法幾乎同樣:url,headers,data/prames # #在s.get中若是使用代理操做:proxy="http://ip:port" # with s.get(url) as response: # #獲取字符串形式的響應數據:response.text() # #獲取byte類型的:response.read() # page_text = response.text() # return page_text #在當前架構的基礎上補充細節便可 #細節1:在每個with前加上async關鍵字 #細節2:在get方法前和response.text()前加上await關鍵字進行手動掛起操做 async def request(url): async with aiohttp.ClientSession() as s: #s.get/post和requests中的get/post用法幾乎同樣:url,headers,data/prames #在s.get中若是使用代理操做:proxy="http://ip:port" async with await s.get(url) as response: #獲取字符串形式的響應數據:response.text() #獲取byte類型的:response.read() page_text = await response.text() return page_text # urls = [ # 'http://127.0.0.1:5000/bobo', # 'http://127.0.0.1:5000/tom', # 'http://127.0.0.1:5000/jay', # 'http://127.0.0.1:5000/bobo', # 'http://127.0.0.1:5000/tom', # 'http://127.0.0.1:5000/jay', # 'http://127.0.0.1:5000/bobo', # 'http://127.0.0.1:5000/tom', # 'http://127.0.0.1:5000/jay', # ] urls = [] for i in range(500): urls.append('http://127.0.0.1:5000/bobo') def parse(task): page_text = task.result() print(page_text+',請求到的數據!!!') tasks = [] for url in urls: c = request(url) task = asyncio.ensure_future(c) task.add_done_callback(parse) tasks.append(task) loop = asyncio.get_event_loop() loop.run_until_complete(asyncio.wait(tasks)) print(time.time()-start)
案例:多線程
import aiohttp import asyncio from lxml import etree all_titles = [] headers = { 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36' } async def request(url): async with aiohttp.ClientSession() as s: async with await s.get(url,headers=headers) as response: page_text = await response.text() return page_text urls = [] url = 'http://wz.sun0769.com/index.php/question/questionType?type=4&page=%d' for page in range(100): u_page = page * 30 new_url = format(url%u_page) urls.append(new_url) tasks = [] def parse(task): page_text = task.result() page_text = page_text.encode('gb2312').decode('gbk') tree = etree.HTML(page_text) tr_list = tree.xpath('//*[@id="morelist"]/div/table[2]//tr/td/table//tr') for tr in tr_list: title = tr.xpath('./td[2]/a[2]/text()')[0] print(title) all_titles.append(title) for url in urls: c = request(url) task = asyncio.ensure_future(c) task.add_done_callback(parse) tasks.append(task) loop = asyncio.get_event_loop() loop.run_until_complete(asyncio.wait(tasks))
selenim演示:
from selenium import webdriver from time import sleep # 後面是你的瀏覽器驅動位置,記得前面加r'','r'是防止字符轉義的 driver = webdriver.Chrome(r'chromedriver.exe') # 用get打開百度頁面 driver.get("http://www.baidu.com") # 查找頁面的「設置」選項,並進行點擊 driver.find_elements_by_link_text('設置')[0].click() sleep(2) # # 打開設置後找到「搜索設置」選項,設置爲每頁顯示50條 driver.find_elements_by_link_text('搜索設置')[0].click() sleep(2) # 選中每頁顯示50條 m = driver.find_element_by_id('nr') sleep(2) m.find_element_by_xpath('//*[@id="nr"]/option[3]').click() m.find_element_by_xpath('.//option[3]').click() sleep(2) # 點擊保存設置 driver.find_elements_by_class_name("prefpanelgo")[0].click() sleep(2) # 處理彈出的警告頁面 肯定accept() 和 取消dismiss() driver.switch_to_alert().accept() sleep(2) # 找到百度的輸入框,並輸入 美女 driver.find_element_by_id('kw').send_keys('美女') sleep(2) # 點擊搜索按鈕 driver.find_element_by_id('su').click() sleep(2) # 在打開的頁面中找到「Selenium - 開源中國社區」,並打開這個頁面 driver.find_elements_by_link_text('美女_百度圖片')[0].click() sleep(3) # 關閉瀏覽器 driver.quit()
selenium的基本使用:
from selenium import webdriver from time import sleep
//chromedriver.exe 瀏覽器的驅動文件 bro = webdriver.Chrome(executable_path='chromedriver.exe') #發起指定url的請求 bro.get('https://www.jd.com/') #在搜索框中搜索商品 #可使用find系列的方法進行標籤訂位 search_input = bro.find_element_by_xpath('//*[@id="key"]') #想搜索框中寫入商品名稱 search_input.send_keys('iphonex') sleep(2) btn = bro.find_element_by_xpath('//*[@id="search"]/div/div[2]/button') btn.click() sleep(2) #執行js讓滾輪向下滑動,滑動一屏 bro.execute_script('window.scrollTo(0,document.body.scrollHeight)') sleep(2) # bro.execute_script('window.scrollTo(0,-document.body.scrollHeight)') page_text = bro.page_source with open('./jingdong.html','w',encoding='utf-8') as fp: fp.write(page_text) print(page_text) sleep(4) #關閉瀏覽器 bro.quit()
selenium的動做鏈
from selenium import webdriver from selenium.webdriver import ActionChains #動做連 from time import sleep bro = webdriver.Chrome(executable_path='chromedriver.exe') bro.get('https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable') #定位要拖動的標籤 #定位的標籤是存在於iframe的子頁面中,若是直接使用find作定位,是定位不到的 # target_ele = bro.find_element_by_id('draggable') #像定位iframe中子頁面中的標籤必須進行以下操做 bro.switch_to.frame('iframeResult') target_ele = bro.find_element_by_id('draggable') #基於動做連實現滑動操做 action = ActionChains(bro) #點擊且長按 action.click_and_hold(target_ele) for i in range(5): #perform()表示當即執行動做連指定好的動做 action.move_by_offset(17,0).perform() sleep(0.5) action.release() sleep(4) bro.quit()