相似於selenium,pyppeteer也能渲染網頁,可是它是異步的。css
pip install pyppeteer
python
# python 3.7.5 import asyncio from pyppeteer import launch from pyquery import PyQuery as pq async def main(): browser = await launch() page = await browser.newPage() await page.goto("http://quotes.toscrape.com/js/") doc = pq(await page.content()) print("Quotes:", doc(".quote").length) await browser.close() asyncio.run(main())
import asyncio from pyppeteer import launch from pyquery import PyQuery as pq class Global: browser = None async def intercept_request(req): """屏蔽幾類資源""" if req.resourceType in ["image", "media", "eventsource", "websocket", "stylesheet", "font"]: await req.abort() else: await req.continue_() async def fetch(): page = await Global.browser.newPage() await page.setUserAgent( "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299" ) # 自定義useragent await page.setViewport({"width": 1080, "height": 960}) await page.setRequestInterception(True) page.on("request", intercept_request) await page.goto("https://juejin.im/timeline") await asyncio.sleep(3) doc = pq(await page.content()) print("Quotes:", doc("a").length) await page.close() async def main(): Global.browser = await launch() await asyncio.gather(*[fetch() for _ in range(10)]) # 併發 await Global.browser.close() asyncio.get_event_loop().run_until_complete(main())