node50行代碼實現壁紙爬取

剛入門node的cheerio模塊,寫了個特簡單的壁紙爬蟲玩玩javascript

話很少說,先上成品圖html

       

這裏用到了http://www.netbian.com/fengjingjava

也能夠爬妹紙圖片哦。node

這裏能夠看到,圖片區域放置於一個class爲list的區域內,圖片的img在a標籤中,咱們的目標就是獲取圖片的url和alt數組

首先函數

const cheerio = require("cheerio");
const fs = require("fs");
const request = require("request");
const iconv = require("iconv-lite");

這是個模塊我就很少說了,至於iconv的做用在我另外一篇文章裏有介紹ui

咱們先來實現獲取圖片信息,這裏用到了request模塊url

request.get({
        encoding: null,
        url: imgUrl
    },
        function (err, res, body) {
            if (!err && res.statusCode == 200) {
                let html = iconv.decode(body, "gb2312");
                //console.log(html);
                $ = cheerio.load(html);
                let lis = $(".list ul li a img");
                //console.log(lis);
                for (let i = 0; i < lis.length; i++) {
                    if (lis[i].attribs && i !== 2) {
                        urls.push(lis[i].attribs);
                    }
                }
            }
            else {
                console.log(err);
            }
        });

 

這裏先用request獲取網頁的res.body信息,用cheerio.load轉換爲一個cheerio可操做的對象,後面的操做和jQuery是同樣的spa

這樣,咱們獲取的圖片信息就存儲在了urls數組中.net

下面加上下載圖片的代碼

request.get({
        encoding: null,
        url: imgUrl
    },
        function (err, res, body) {
            if (!err && res.statusCode == 200) {
                let html = iconv.decode(body, "gb2312");
                //console.log(html);
                $ = cheerio.load(html);
                let lis = $(".list ul li a img");
                //console.log(lis);
                for (let i = 0; i < lis.length; i++) {
                    if (lis[i].attribs && i !== 2) {
                        urls.push(lis[i].attribs);
                    }
                }
                console.log(`開始下載...`);
                let startTime = new Date();
                for (let i = 0; i < urls.length; i++) {
                    request(urls[i].src, function (err, res, body) {
                        if(err) {
                            console.log(err);
                        }
                    }).pipe(fs.createWriteStream(`../壁紙/${urls[i].alt}.jpg`));
                }
                let endTime = new Date();
                console.log(`下載完成,共${urls.length}張圖片,耗時${endTime-startTime}ms`)
            }
            else {
                console.log(err);
            }
        });

用函數封裝起來,就是完整版啦

const cheerio = require("cheerio");
const fs = require("fs");
const request = require("request");
const iconv = require("iconv-lite");

let imgUrl = "http://www.netbian.com/fengjing/";
const urls = [];
//獲取圖片的地址和名字到urls中

function getImg(page) {
    console.log(`正在獲取第${page}頁的圖片`)
    let path;//頁數/index_?.htm
    if (page === 1) {
        path = "index.htm";
    }
    else if (typeof page !== "number" || page < 1) {
        console.log("請輸入正確的頁數");
        return;
    }
    else {
        path = `index_${page}.htm`;
    }
    imgUrl += path;
    request.get({
        encoding: null,
        url: imgUrl
    },
        function (err, res, body) {
            if (!err && res.statusCode == 200) {
                let html = iconv.decode(body, "gb2312");
                //console.log(html);
                $ = cheerio.load(html);
                let lis = $(".list ul li a img");
                //console.log(lis);
                for (let i = 0; i < lis.length; i++) {
                    if (lis[i].attribs && i !== 2) {
                        urls.push(lis[i].attribs);
                    }
                }
                console.log(`開始下載...`);
                let startTime = new Date();
                for (let i = 0; i < urls.length; i++) {
                    request(urls[i].src, function (err, res, body) {
                        if(err) {
                            console.log(err);
                        }
                    }).pipe(fs.createWriteStream(`../壁紙/${urls[i].alt}.jpg`));
                }
                let endTime = new Date();
                console.log(`下載完成,共${urls.length}張圖片,耗時${endTime-startTime}ms`)
            }
            else {
                console.log(err);
            }
        });
}

getImg(4);

  這個入門級的爬蟲功能還比較簡單,我還會慢慢完善。最近在嘗試一些複雜的爬蟲,但願能和你們多多交流,共同進步

相關文章
相關標籤/搜索
本站公眾號
   歡迎關注本站公眾號,獲取更多信息