需求場景:
當你想分析微信生態內什麼產品最受歡迎的時候
當你想參考他們的點子與設計的時候...html
你須要數據,你須要爬蟲ios
獲取 json 格式數據npm
let res = await axios.get(url)
let html = res.data
let items = []
// 解析html
let $ = cheerio.load(html)
$(config.dom).each((idx, element) => {
let $link = $(element)
items.push({
title: $link.attr('title').replace(/\//g, '-'),
href: $link.attr('href'),
desc: $link.find('.desc').text(),
count: $link.find('.opened i').text()
})
})
console.log({
total: items.length,
items
})
複製代碼
{
total: 70,
items: [
{
title: '微報名',
href: 'https://weixiao.qq.com/store/details/10007',
desc: '在線收集報名信息,讓校園活動更便捷',
count: '43968'
},
{
title: '微上牆',
href: 'https://weixiao.qq.com/store/details/10008',
desc: '線下活動中,經過大屏幕同步展現現場觀衆發送的微信消息',
count: '34967'
},
...
]
}
複製代碼
注意圖片地址有多種json
// /img/home/apps/preview/apply_01.jpg
// https://weixiao.qq.com/img/home/apps/preview/apply_01.jpg
// http://p.qpic.cn/weixiao/0/1481391605/640
複製代碼
// 下載單張圖片資源
const downloadImage = async (imageSrc, fileName) => {
const url = /^http(s?):\/\//.test(imageSrc) ? imageSrc : `https:${imageSrc}`
const res = await axios({
url,
method: 'get',
responseType: 'stream'
})
// console.log(res.data)
res.data.pipe(fs.createWriteStream(fileName))
}
複製代碼
// 下載相冊圖片
const downloadPhoto = async item => {
const url = item.href
const res = await axios.get(url)
const html = res.data
// 暫存圖片資源地址
let items = []
// 解析html
const $ = cheerio.load(html)
$('.preview .preview-img').each((idx, element) => {
const $img = $(element)
const type = 'png'
let src = $img.attr('src')
!/^http(s?):\/\//.test(src) && (src = `https://weixiao.qq.com${src}`)
items.push({
src,
type
})
})
console.log({
total: items.length,
items
})
// 建立子目錄
let folderPath = path.resolve(__dirname, `${baseDir}/${item.title}`)
mkdirSync(folderPath)
for (let i = 0; i < items.length; i++) {
const item = items[i]
await downloadImage(item.src, `${folderPath}/${i + 1}.${item.type}`)
console.log(`已下載:${item.title}-${i + 1}`)
}
}
複製代碼
逐個資源下載,能夠優化爲 Promise.all 統一下載axios
const axios = require('axios')
const cheerio = require('cheerio')
const fs = require('fs')
const path = require('path')
const config = {
url: 'https://weixiao.qq.com', // 目標域名
route: 'store/labels?tag=0&order=2', // 目標具體地址
dom: '.app-list a.app-item' // 處理元素選擇器
}
const baseDir = `./qq-app-download`
const savePath = path.resolve(__dirname, baseDir)
// 建立文件夾
const mkdirSync = dirPathStr => {
if (!fs.existsSync(dirPathStr)) {
fs.mkdirSync(dirPathStr)
console.log(`文件夾已生成:${dirPathStr}`)
} else {
console.log(`文件夾已存在:${dirPathStr}`)
}
}
mkdirSync(savePath)
// 下載圖片
const downloadImage = async (imageSrc, fileName) => {
const url = /^http(s?):\/\//.test(imageSrc) ? imageSrc : `https:${imageSrc}`
const res = await axios({
method: 'get',
url,
responseType: 'stream'
})
// console.log(res.data)
res.data.pipe(fs.createWriteStream(fileName))
}
// 下載相冊
const downloadPhoto = async item => {
const url = item.href
const title = item.title
const res = await axios.get(url)
const html = res.data
let items = []
// 解析html
const $ = cheerio.load(html)
$('.preview .preview-img').each((idx, element) => {
const $img = $(element)
const type = 'png'
let src = $img.attr('src')
!/^http(s?):\/\//.test(src) && (src = `https://weixiao.qq.com${src}`)
items.push({
src,
type
})
})
// console.log(items)
// 存放圖片子目錄
const folderPath = path.resolve(__dirname, `${baseDir}/${item.title}`)
mkdirSync(folderPath)
for (let i = 0; i < items.length; i++) {
const item = items[i]
await downloadImage(item.src, `${folderPath}/${i + 1}.${item.type}`)
console.log(`[${title}] - ${i + 1} 下載完成`)
}
}
// 下載本頁面的全部相冊
const downloadImgList = async items => {
// for (let index = 0; index < items.length; index++) {
for (let index = 0; index < 3; index++) {
const item = items[index]
// 下載相冊
await downloadPhoto(item)
console.log(`相冊 [${item.title}] 下載完成`)
}
}
// 入口函數
const main = async () => {
const st = new Date().getTime()
const url = `${config.url}/${config.route}`
const res = await axios.get(url)
const html = res.data
let items = []
// 解析html
const $ = cheerio.load(html)
$(config.dom).each((idx, element) => {
const $link = $(element)
items.push({
title: $link.attr('title').replace(/\//g, '-'),
href: $link.attr('href'),
desc: $link.find('.desc').text(),
count: $link.find('.opened i').text()
})
})
console.log({
total: items.length,
items
})
await downloadImgList(items)
const et = new Date().getTime()
console.log(`總耗時=>${(et - st) / 1000}s`)
}
main()
複製代碼
const axios = require('axios')
const cheerio = require('cheerio')
const fs = require('fs')
const path = require('path')
const config = {
url: 'https://weixiao.qq.com', // 目標域名
route: 'store/labels?tag=0&order=2', // 目標具體地址
dom: '.app-list a.app-item' // 處理元素選擇器
}
const baseDir = `./qq-app-download`
const savePath = path.resolve(__dirname, baseDir)
const mkdirSync = dirPathStr => {
if (!fs.existsSync(dirPathStr)) {
fs.mkdirSync(dirPathStr)
console.log(`文件夾已 生成 :${dirPathStr}`)
} else {
console.log(`文件夾已 存在 :${dirPathStr}`)
}
}
mkdirSync(savePath)
// 封裝taskPromiseAll
const taskPromiseAll = async arr => {
return new Promise((resolve, reject) => {
Promise.all(arr)
.then(res => {
resolve()
})
.catch(error => {
reject(error)
})
})
}
// 下載圖片
const downloadImage = async (imageSrc, fileName) => {
let url = /^http(s?):\/\//.test(imageSrc) ? imageSrc : `https:${imageSrc}`
let res = await axios({
url,
method: 'get',
responseType: 'stream'
})
res.data.pipe(fs.createWriteStream(fileName))
}
// 下載相冊
const downloadPhoto = async item => {
const url = item.href
const res = await axios.get(url)
const html = res.data
let items = []
// 解析html
const $ = cheerio.load(html)
$('.preview .preview-img').each((idx, element) => {
const $img = $(element)
const type = 'png'
let src = $img.attr('src')
!/^http(s?):\/\//.test(src) && (src = `https://weixiao.qq.com${src}`)
items.push({
src,
type
})
})
// console.log(items)
// 存放圖片子目錄
const folderPath = path.resolve(__dirname, `${baseDir}/${item.title}`)
mkdirSync(folderPath)
const arr = items.map((item, i) =>
downloadImage(item.src, `${folderPath}/${i + 1}.${item.type}`)
)
await taskPromiseAll(arr)
}
// 下載本頁面的全部相冊
const downloadImgList = async items => {
const arr = items.map(item => downloadPhoto(item))
const res = await taskPromiseAll(arr)
// console.log(res)
}
// 入口函數
const main = async () => {
const st = new Date().getTime()
const url = `${config.url}/${config.route}`
const res = await axios.get(url)
const html = res.data
let items = []
// 解析html
const $ = cheerio.load(html)
$(config.dom).each((idx, element) => {
const $link = $(element)
items.push({
title: $link.attr('title').replace(/\//g, '-'),
href: $link.attr('href'),
desc: $link.find('.desc').text(),
count: $link.find('.opened i').text()
})
})
console.log({
total: items.length,
items
})
await downloadImgList(items)
console.log(`耗時=>${(new Date().getTime() - st) / 1000}`)
}
main()
複製代碼
資源統一下載微信
按文件下載 | 按相冊下載 | 統一下載 |
---|---|---|
![]() |
![]() |
![]() |
11.5s | 8.5s | 3.8s |