今天下班抽了點時間看了下印象筆記,整理了一個禮拜node的api筆記。。。。而後去慕課網看了Scott老師講的node系列視頻教程。因而本身寫了一個小小的爬蟲,爬的是本身寫的博客章節 ,裏面的一些es6語法和api我就不一一細說,你們能夠去看文檔,http://nodeapi.ucdok.com/#/api/,好了話很少說,直接上代碼
html
'use strict'; { const http = require(`http`); const cheerio = require(`cheerio`); const fs = require(`fs`); let url = `http://www.cnblogs.com/tween`; http.get(url, (res) => { let content = ``; res.on(`data`, (data) => { content += data; }).on(`end`, () => { let html = getContent(content); creatTxt(html); }); }).on(`error`,() => console.log(`獲取數據失敗`)); let creatTxt = content => { let txt = ``; for(let v of content){ txt += v.time; let blog = v.blog; for(let v of blog){ let json = v; for(let name in json){ txt += json[name]; } txt += `\n`; } } fs.writeFile(`blog.txt`,txt,'utf-8',(err) => { err?console.log(err):console.log(`寫入成功`); }); }; let getContent = content => { let $ = cheerio.load(content); let blogs = $(`.day`); let arr = []; blogs.each( (index, item) => { let _this = $(item); let time = _this.find(`.dayTitle`).text(); let indexBlog = []; _this.find(`.postTitle`).each((index, item) => { let title = $(item).text().trim(); let list = _this.find(`.postDesc`).eq(index).text(); let read = list.match(/\(\d+\)/g)[0].trim(); let comment = list.match(/\(\d+\)/g)[1].trim(); indexBlog[index] = { title:`\t${title}\n`, read:`\t閱讀:${read} 評論:${comment}\n`, }; }); arr[index] = { time:`${index+1} 、${time.trim()}\n`, blog:indexBlog }; }); return arr; }; }
運行後會在同目錄下建立個blog.txt,裏面的內容就是爬到的數據node