如今博客園首頁文章質量良莠不齊,好比我這篇就要水了。因而弄了個小爬蟲定時去爬首頁的文章,超過1000點擊的就自動發送郵件。
https://github.com/kklldog/cnblogs_noticenode
博客園的首頁列表實際上是有ajax接口的閱讀量就在這裏面,使用cheerio就能夠抽取出來。
https://www.cnblogs.com/mvc/AggSite/PostList.aspxgit
var request = require('request'); var get = function (url, callback, errCallback, trytimes) { req({ url: url, timeout: 30000 }, callback, errCallback, trytimes); } var post = function(url,body,isJson,callback,errCallback,trytimes){ req({ url: url, timeout: 30000,body:body,method:'POST',json:isJson }, callback, errCallback, trytimes); } var req = function (option, callback, errCallback, trytimes) { if (trytimes === undefined) { trytimes = 5; } request(option, function (err, res) { if (err) { console.error('request ' + option.url + ' error .'); console.error(err); if (trytimes > 0) { req(option, callback, errCallback, trytimes - 1); } else { if (errCallback) { errCallback(err); } } } else { callback(res); } }); } exports.get = get; exports.post = post; exports.req = req;
var $ = cheerio.load(body); $('div.post_item_body').each((index, postBody) => { var name = $(postBody).find('a.titlelnk').text(); $(postBody).find('span.article_view a').each((i, e) => { var link = $(e).attr('href'); var text = $(e).text(); var sIndex = text.indexOf('('); var eIndex = text.indexOf(')'); var viewCount = text.substr(sIndex + 1, eIndex - sIndex - 1); var intViewCount = parseInt(viewCount); console.log(link + ' ' + viewCount + ' ' + name); if (intViewCount > 1000) { // console.log(link + ' ' + viewCount+' '+name); trySendMail(link, name,mailAddress); } }); });
var Db = require('mongodb').Db; var Server = require('mongodb').Server; var MongoClient = require('mongodb').MongoClient; var db; var init = function () { MongoClient.connect("mongodb://localhost:27017/notice", (err, database) => { if (err) { console.error(err); return; } console.log('connect to db success'); db = database; }); } var insert = function (collName, data, callback) { var coll = db.collection(collName); coll.insert(data, (err, r) => { if (!err) { console.log('save to ' + collName + ' success !'); if (callback) { callback(r); } } else { console.error(err); } }); }; var queryPage = function (collName, filter, skip, limit, callback) { var coll = db.collection(collName); coll.find(filter).sort({ videoId: 1 }).skip(skip).limit(limit).toArray((err, r) => { if (!err) { callback(r); } else { console.error(err); callback([]); } }); } var remove = function (collName, filter, callback) { var coll = db.collection(collName); coll.remove(filter, ((err, r) => { if (!err) { console.log('remove to ' + collName + ' success !'); if (callback) { callback(r); } } else { console.error(err); } })); } var find = function (collName, filter, callback) { var coll = db.collection(collName); coll.find(filter).toArray((err, r) => { if (!err) { callback(r); } else { console.error(err); callback([]); } }) } var update = function (collName, filter, updateObj, callback, errCallback) { var coll = db.collection(collName); coll.update(filter, { $set: updateObj }, (err, r) => { if (!err) { console.log('update to ' + collName + ' success !'); if (callback) { callback(r); } } else { console.error(err); errCallback(err); } }); } exports.insert = insert; exports.queryPage = queryPage; exports.remove = remove; exports.find = find; exports.update = update; exports.init = init;
var schedule = require('node-schedule'); var cnblogs =require('./cnblogs'); var filter = function(){ cnblogs.filter(1,10); } var initSchedule = function () { schedule.scheduleJob({ hour:10, minute: 01 }, filter); console.log('schedule inited .'); }
回覆郵件地址就能夠自動訂閱推送 :)github
個人博客即將搬運同步至騰訊雲+社區,邀請你們一同入駐:https://cloud.tencent.com/developer/support-planajax