首先確定是照搬它的項目簡介了哈哈哈哈
本文主要是經過一些小例子,把它的介紹基本都跑上一遍。嘻嘻javascript
yarn add puppeteer # or "npm i puppeteer" 安裝puppeteer時,它會下載最新版本的Chromium,以保證與api協同工做,若是要跳過下載的話能夠參考環境變量的設置
// PNG const puppeteer = require('puppeteer'); // 引入puppeteer (async ()=>{ const browser = await puppeteer.launch(); const page = await browser.newPage(); await page.goto(`https://example.com`); await page.screenshot({ path: 'example.png' }); await browser.close(); })();
// PDF const puppeteer = require('puppeteer'); (async () => { const browser = await puppeteer.launch(); const page = await browser.newPage(); await page.goto('https://news.ycombinator.com', {waitUntil: 'networkidle2'}); await page.pdf({path: 'hn.pdf', format: 'A4'}); await browser.close(); })(); // 固然了.pdf方法提供了更多參數可使用
服務端的demohtml
const puppeteer = require('puppeteer'); const Koa = require('koa') const app = new Koa() async function getSpaContent(ctx, next) { if(/\.html/.test(ctx.request.url)){ const browser = await puppeteer.launch(); const page = await browser.newPage(); await page.goto(`http://localhost:1234${ctx.request.url}`) console.log(`visiting http://localhost:1234${ctx.request.url}`) await page.content().then((v)=>{ //此處返回的是promise 簡單地獲取便可 ctx.body = v; }) await browser.close(); }else{ ctx.body = {}; } await next(); } app.use(getSpaContent); let len = process.argv.length; let port = `3000`; for(let i =0;i<len-1;i++){ if(process.argv[i]==='--port' && i!==len-1){ port = process.argv[i+1]; } } app.listen(port,()=>{ console.log(`listening in port with ${port}`); })
const puppeteer = require('puppeteer'); const nextLink = `a[rel*="next"]`; const ARTICLE_ITEM = `.repo-list-item`; const TITLE_SELECTOR = `div > h3 > a`; const STAR_SELECTOR = `div > .muted-link`; const CONTENT_SELECTOR = `div > div > p`; const mainLink = `https://github.com/search?p=1&q=javascript&type=Repositories&utf8=%E2%9C%93`; const data = []; process.setMaxListeners(0); (async() => { const browser = await puppeteer.launch({ headless:true //要看演示可使用false }); const page = await browser.newPage(); let val = {}; await page.setViewport({ width:980, height:980 }) try { await page.goto(mainLink); val = await page.evaluate((nextLink) => { return document.querySelector(nextLink); //主要是審查頁面元素 防止進入深淵進入死循環 }, nextLink); while (val !== null && !!val) { await page.evaluate((ARTICLE_ITEM, TITLE_SELECTOR, STAR_SELECTOR, CONTENT_SELECTOR) => { function searchElement(parent = null) { function getDataWithNull(element, attr, defaultValue) { if ((element !== null) && (element instanceof HTMLElement)) { return element[attr]; } else { return defaultValue; } } if (parent === null) { parent = document; } return { title: getDataWithNull(parent.querySelector(TITLE_SELECTOR), 'innerText', ''), star: getDataWithNull(parent.querySelector(STAR_SELECTOR), 'innerText', ''), content: getDataWithNull(parent.querySelector(CONTENT_SELECTOR), 'innerText', '') } } return Array.from(document.querySelectorAll(ARTICLE_ITEM)).map((val) => { return searchElement(val) }) }, ARTICLE_ITEM, TITLE_SELECTOR, STAR_SELECTOR, CONTENT_SELECTOR).then((v) => { data.push(v); return v; }) await page.click(nextLink) await page.waitForNavigation({timeout:500}).then(()=>{},async (a) => { val = await page.evaluate((nextLink) => { return document.querySelector(nextLink); }, nextLink); }) await page.screenshot({ path: 'demo3.png',// 拍個照證實咱們確實是由於調入深淵了 fullPage: true }) } } catch (e) { // 速度太快會進入深淵。這裏只是演示因此直接點。 console.log(`共爬取 ${data.length*10}`) } finally { await browser.close(); } })(); // 邏輯很簡單。大概不用解釋。。
// 測試 // 使用mocha測試 const assert = require('assert'); //使用assert斷言庫 const puppeteer = require('puppeteer'); const WEBSITE_TITLE = 'kangkangblog – Mr.kangblog'; const MY_GITHUB_LINK = 'https://github.com/ZWkang'; const FIRST_ITEM_TEXT = '首頁'; let browser; let page; before(async ()=>{ browser = await puppeteer.launch({ headless:true }) page = await browser.newPage() await page.goto('https://ls-l.cn') }) describe('check my website',()=>{ it('i need a title man!!',async ()=>{ const titleValue = await page.title().then((title_value)=>{ return title_value }) assert.equal(titleValue,WEBSITE_TITLE) }).timeout(10000); it('menu frist item',async ()=>{ await page.waitForSelector('#site-navigation') const titleItem = await page.evaluate(()=>{ return document.querySelectorAll('#site-navigation ul > li')[0].innerText; }) assert.equal(titleItem,FIRST_ITEM_TEXT); }).timeout(10000); it(`the website will have my github link`,async()=>{ const my_github_link = await page.evaluate(()=>{ return document.querySelector('.call-to-action-button').href }) assert.equal(my_github_link,MY_GITHUB_LINK) }).timeout(10000); }) after(async ()=>{ await browser.close() })
// 表單提交 // 鍵盤輸入 const puppeteer = require('puppeteer'); (async () => { const browser = await puppeteer.launch({ headless: false //要看演示可使用false }); const page = await browser.newPage(); await page.setViewport({// 設置viewport尺寸 width:1280, height:980 }) await page.goto('https://segmentfault.com/') await page.waitForSelector('#searchBox') await page.click('#searchBox') await page.type('javascript',{delay:100}) await page.click('.btn-link') await page.waitForSelector('.search-result') await page.waitFor(8000).then(async ()=>{ await page.screenshot({ path: 'keyboardTest.png',// 拍個照 fullPage: true }) }) await browser.close() })();
const puppeteer = require('puppeteer'); (async ()=>{ const browser = await puppeteer.launch({ headless:true }) const page = await browser.newPage() await page.tracing.start({path: 'trace.json'}); await page.goto('https://ls-l.cn'); await page.tracing.stop() await page.close() await browser.close() })()
測試能夠給咱們更瞭解本身的代碼,在update以後更快地獲得反饋。
puppeteer給前端帶來了新的意義~一部分攜帶着統一。
在將來能夠看到會有基於puppeteer封裝的二次工具的出現~保持學習,迎接將來的挑戰吧~
歡迎有不一樣觀點的合理討論。嘻嘻前端