puppeteer官網node
引用puppeteer官網解釋: Most things that you can do manually in the browser can be done using Puppeteer!
page.setViewport()
設置獲取屏幕大小,默認獲取屏幕大小爲800px * 600px
page.pdf(路徑,大小)
保存爲pdf格式圖片git
- 舉例:
page.pdf({path: 'hn.pdf', format: 'A4'});
page.evaluate(fn)
執行chrome的apigithub
舉例:web
await page.evaluate(() => { return { width: document.documentElement.clientWidth, height: document.documentElement.clientHeight, deviceScaleFactor: window.deivcePixelRatio }; })puppeteer.launch({headless: false});
打開瀏覽器,默認值是true更多APIchrome
const puppeteer = require('puppeteer'); // 引用default.js的sceenshot路徑,將截取的屏幕pdf保存到該路徑下。 const { screenshot } = require('./config/default.js'); (async () => { // 獲取browser實例 const browser = await puppeteer.launch(); // 獲取瀏覽器tab頁面實例 const page = await browser.newPage(); // 連接到百度首頁 await page.goto('https://www.baidu.com'); // 截屏 await page.screenshot({ // 將截屏按時間戳保存到指定路徑下。 path: `${screenshot}/${Date.now()}.png` }); // 關閉 await browser.close(); })();
node src/screenshot.js
. |-mn |-src | |-config | | |-default.js | |-helper | | |-srcToImg.js | |-mn.js |-package.json
const puppeteer = require('puppeteer'); const { mn } = require('./config/default'); const srcToImg = require('./helper/srcToImg'); (async () => { const browser = await puppeteer.launch(); const page = await browser.newPage(); await page.goto('https://image.baidu.com'); console.log('go to https://image.baidu.com'); await page.setViewport({ width: 1920, height: 1080 }); console.log('reset viewport'); await page.focus('#kw'); await page.keyboard.sendCharacter('狗'); await page.click('.s_search'); console.log('go to search list'); page.on('load', async () => { console.log('page loading done, start fetch ...'); const srcs = await page.evaluate(() => { const images = document.querySelectorAll('img.main_img'); return Array.prototype.map.call(images, img => img.src); }); console.log(`get ${srcs.length} image, start download`); srcs.forEach(async (src) => { await srcToImg(src, mn); }); await browser.close(); }) })();
const path = require('path'); module.exports = { screenshot: path.resolve(__dirname, '../../screenshot'), mn: path.resolve(__dirname, '../../mn') }
const http = require('http'); const https = require('https'); const fs = require('fs'); const path = require('path'); const { promisify } = require('util'); const writeFile = promisify(fs.writeFile); module.exports = async(src, dir) => { if(/\.(jpg|png|gif)$/.test(src)) { await urlToImg(src, dir); }else { await base64ToImg(src, dir); } } // 識別src爲http或者https的圖片 const urlToImg = promisify((url, dir, callback) => { const mod = /^https:/.test(url) ? https : http; const ext = path.extname(url); const file = path.join(dir, `${Date.now()}${ext}`); mod.get(url, res => { res.pipe(fs.createWriteStream(file)) .on('finish', () => { callback(); console.log(file); }) }) }) // 識別src爲base64地址的圖片 const base64ToImg = async (base64Str, dir) => { // data: image/jpeg;base64,/raegreagearg const matchs = base64Str.match(/^data:(.+?);base64,(.+)$/); try { const ext = matches[1].split('/')[1] .replace('jpeg', 'jpg'); const file = path.join(dir, `${Date.now()}.${ext}`); await writeFile(file, match[2], 'base64'); console.log(file); } catch (ex) { console.log('非法 base64 字符串'); } }
go to https://image.baidu.com reset viewport go to search list page loading done, start fetch ... get 46 image, start download 非法 base64 字符串 非法 base64 字符串 非法 base64 字符串 非法 base64 字符串 非法 base64 字符串 非法 base64 字符串 /Users/lius/Desktop/web spider/headless-crawler/headless_crawler/mn/1530800351397.jpg /Users/lius/Desktop/web spider/headless-crawler/headless_crawler/mn/1530800351396.jpg /Users/lius/Desktop/web spider/headless-crawler/headless_crawler/mn/1530800351398.jpg /Users/lius/Desktop/web spider/headless-crawler/headless_crawler/mn/1530800351400.jpg /Users/lius/Desktop/web spider/headless-crawler/headless_crawler/mn/1530800351405.jpg /Users/lius/Desktop/web spider/headless-crawler/headless_crawler/mn/1530800351386.jpg /Users/lius/Desktop/web spider/headless-crawler/headless_crawler/mn/1530800351399.jpg /Users/lius/Desktop/web spider/headless-crawler/headless_crawler/mn/1530800351405.jpg /Users/lius/Desktop/web spider/headless-crawler/headless_crawler/mn/1530800351405.jpg /Users/lius/Desktop/web spider/headless-crawler/headless_crawler/mn/1530800351402.jpg /Users/lius/Desktop/web spider/headless-crawler/headless_crawler/mn/1530800351412.jpg /Users/lius/Desktop/web spider/headless-crawler/headless_crawler/mn/1530800351413.jpg /Users/lius/Desktop/web spider/headless-crawler/headless_crawler/mn/1530800351403.jpg /Users/lius/Desktop/web spider/headless-crawler/headless_crawler/mn/1530800351398.jpg /Users/lius/Desktop/web spider/headless-crawler/headless_crawler/mn/1530800351399.jpg /Users/lius/Desktop/web spider/headless-crawler/headless_crawler/mn/1530800351403.jpg /Users/lius/Desktop/web spider/headless-crawler/headless_crawler/mn/1530800351406.jpg /Users/lius/Desktop/web spider/headless-crawler/headless_crawler/mn/1530800351401.jpg /Users/lius/Desktop/web spider/headless-crawler/headless_crawler/mn/1530800351408.jpg /Users/lius/Desktop/web spider/headless-crawler/headless_crawler/mn/1530800351404.jpg /Users/lius/Desktop/web spider/headless-crawler/headless_crawler/mn/1530800351414.jpg /Users/lius/Desktop/web spider/headless-crawler/headless_crawler/mn/1530800351400.jpg /Users/lius/Desktop/web spider/headless-crawler/headless_crawler/mn/1530800351402.jpg /Users/lius/Desktop/web spider/headless-crawler/headless_crawler/mn/1530800351413.jpg /Users/lius/Desktop/web spider/headless-crawler/headless_crawler/mn/1530800351408.jpg /Users/lius/Desktop/web spider/headless-crawler/headless_crawler/mn/1530800351414.jpg /Users/lius/Desktop/web spider/headless-crawler/headless_crawler/mn/1530800351413.jpg ......