所用到的node模版及依賴報javascript
項目建立java
引入模塊node
const express = require('express');
const superagent = require('superagent');
const charset = require('superagent-charset');
const cheerio = require('cheerio');
const fs = require('fs');
複製代碼
假如咱們要爬取 www.nanrentu.cc/sgtp/ 連接的圖片 首先先分析此頁面的結構jquery
$('.wrap .h-sgtp-list .h-sgtp-box-m .h-piclist li')
複製代碼
下面直接看源碼ajax
const express = require('express');
const superagent = require('superagent');
const charset = require('superagent-charset');
const cheerio = require('cheerio');
const fs = require('fs');
charset(superagent);
//爬取的連接 https://www.nanrentu.cc/sgtp/
const baseUrl = 'https://www.nanrentu.cc/sgtp/';
const app = express();
app.get('/', (req, res) => {
//設置請求頭
res.header("Access-Control-Allow-Origin", "*");
res.header('Access-Control-Allow-Methods', 'PUT, GET, POST, DELETE, OPTIONS');
res.header("Access-Control-Allow-Headers", "X-Requested-With");
res.header('Access-Control-Allow-Headers', 'Content-Type');
superagent.get(baseUrl)
.charset('utf-8')
.end((err, data) => {
if (err) {
console.log(`請求錯誤:${err}`);
res.json({ code: 400, msg: err });
return;
}
const $ = cheerio.load(data.text);
const item = [];
$('.wrap .h-sgtp-list .h-sgtp-box-m .h-piclist li').each((index, element) => {
const $dom = $(element).find('img');
const imgSrc = $dom.attr('src');
item.push({
imgSrc: imgSrc
})
superagent.get(imgSrc).pipe(fs.createWriteStream(`./image/${index}.png`));
})
res.json({ code: 200, data: item });
})
})
app.listen(3333, () => {
console.log('啓動成功:localhost:3333');
})
複製代碼
這樣咱們就爬取了不少帥哥的圖片在咱們當前項目目錄的image文件夾下面express