一個簡單的puppeteer爬蟲

const puppeteer = require("puppeteer");
const path = require('path');
const pathToExtension = path.join(__dirname, './chrome-mac/Chromium.app/Contents/MacOS/Chromium');
const conf = {
    headless: false,
    executablePath: pathToExtension,
    defaultViewport: {
        width: 1300,
        height: 900
    },
};

(async () => {

    const browser = await puppeteer.launch(conf)
    const page = await browser.newPage()

    await page.goto('https://www.baidu.com/', {waitUntil: 'networkidle2'});
    //addScriptTag須要加在goto的後面,而後就能夠在evaluate裏使用jQuery的語法了。
    await page.addScriptTag({
        url: 'https://code.jquery.com/jquery-3.2.1.min.js',
    });
    await page.waitFor('#u1')
    // 能夠接收evaluate內部打印的console內容
    page.on('console',msg=>{
            for(let i =0;i<msg.args().length;i++){
                console.log(`${i}: ${msg.args([i])}`)
            }
    })
     const result = await page.evaluate(() => {
        let data = []; // 初始化空數組來存儲數據
        let elements = $("#u1"); // 獲取全部元素
        for (let element of elements){
            let title = element.innerText; // 獲取標題
            let url = element.href;//獲取網址
            data.push({title,url}); // 存入數組
        }
        return data;
    });
    console.log(result);
    await page.waitFor(3000);
    await browser.close();

})();
相關文章
相關標籤/搜索