sf提供了編輯時筆記,訪問路徑:筆記url+/rawjavascript
1.使用爬蟲獲取所有筆記列表,獲取title和href。css
2.經過https://segmentfault.com+href+/raw
得到所有筆記編輯時內容。(經過https://segmentfault.com+href+/raw
得到所有筆記編輯時內容)html
須要提供我的登陸當前cookie
修改js文件中cook
變量前端
const cook =`PHPSESSID=web2~a……
自行安裝依賴 express,superagent,cheerio
java
js:ios
const express = require('express'); const app = express(); const superagent= require('superagent'); const cheerio = require('cheerio'); const cook =`xxx`; //筆記列表 var page = 1; var nots = []; //最終返回數據 var errorStop = 0; //筆記詳情 var nindex = 0; //當前筆記頁 var notsArr = []; let getHotNews = (res) => { let $ = cheerio.load(res.text); let dom = $('.drafts-stream .title a'); //翻頁數據中止 errorStop = dom && dom.length ? 0 : 1; // 找到目標數據所在的頁面元素,獲取數據 $('.drafts-stream .title a').each((idx, ele) => { let url = $(ele).attr('href').indexOf('?')!=-1 ? $(ele).attr('href').split('?')[0]:$(ele).attr('href'); let news = { title: $(ele).text(), // 獲取標題 href:'https://segmentfault.com'+ url + '/raw' // 獲取連接 }; nots.push(news) // 存入最終結果數組 }); page++; }; //獲取筆記詳情 function getNoteContent(endData){ if( nindex+1 <= nots.length){ console.log(nindex,nots[nindex].href) superagent.get(nots[nindex].href).set('Cookie',cook).end((err, res) => { if(!err){ let $ = cheerio.load(res.text); notsArr.push({title:nots[nindex].title,content:res.text});// endData.send(notsArr) nindex++; getNoteContent(endData) }else{ endData.send({data:notsArr}); } }); }else{ endData.send({data:notsArr}) } } //獲取筆記列表 function getNotes(req,res){ superagent.get('https://segmentfault.com/user/note?page=' + page).set('Cookie',cook).end((err1, res1) => { if (!err1) { getHotNews(res1); if(!errorStop){ getNotes(req,res) }else{ //若是無筆記則返回前端 getNoteContent(res) } } else { errorStop = 1; console.log(`抓取失敗 - ${err1}`) } }); } app.get('/', function (req, res) { res.header("Access-Control-Allow-Origin", "*"); //容許的header類型 res.header("Access-Control-Allow-Headers", "content-type"); //跨域容許的請求方式 res.header("Access-Control-Allow-Methods", "DELETE,PUT,POST,GET,OPTIONS"); page = 1; nots = []; //最終返回數據 errorStop = 0; //筆記詳情 nindex = 0; //當前筆記頁 notsArr = []; getNotes(req, res); }); let server = app.listen(5100, function () { let host = server.address().address; let port = server.address().port; console.log('Your App is running at http://%s:%s', host, port); });
html:web
<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <title></title> <script src="https://cdn.bootcss.com/axios/0.19.0/axios.js"></script> </head> <body> <div id="app"> <ul id="ul"></ul> </div> <script type="text/javascript"> function escape2Html(str) { var arrEntities={'lt':'<','gt':'>','nbsp':' ','amp':'&','quot':'"'}; return str.replace(/&(lt|gt|nbsp|amp|quot);/ig,function(all,t){return arrEntities[t];}); } axios('http://127.0.0.1:5100').then((res)=>{ let data =''; res.data.data.forEach((item,i)=>{ data+="<li><h4>"+(i+1)+". "+ item.title +"</h4><div><pre><xmp>"+escape2Html(item.content)+"</xmp></pre></div></li>" }) document.getElementById('ul').innerHTML = data; }); </script> </body> </html>
htm暫時展現所有筆記內容,可根據接口備份到別的網站進行發佈文章express