能夠放在服務器上看成計劃任務來運行,監測用戶某一個時間段內的微博動態,並使用短信平臺發送友情提示信息。php
from datetime import datetime, timedelta import requests import sqlite3 from bs4 import BeautifulSoup from functools import reduce import re from uuid import uuid3, NAMESPACE_DNS import random import math import os import json # from selenium import webdriver # from selenium.webdriver.common.keys import Keys import time # from selenium.webdriver.support.wait import WebDriverWait """ 由於設置的自動運行的時間是從23點到第二天的1點,因此方法中用存文件的方式來判斷是否應該發短信有問題。 須要判斷,當時間爲0-1之間時,將日期-1再進行操做。 做者:i@unoiou.com 環境:python3, Windows 7 or later, server or personal version. """ cwd = 'E:\pytest' os.chdir(cwd) cwd = os.getcwd() log_file = 'weibo_notification.log' # [[uid, phone], [uid, phone], [uid, phone]] weibo_id = [['your_weibo_id', 'your_relative_user_phone_number'], ['your_weibo_id', 'your_relative_user_phone_number']] api = 'http://service.weibo.com/widget/widget_blog.php?uid=' db_path = os.path.join(cwd, 'weibo.db') # ex: 123ac724f41234ccfa1234aec123432b (fake) API_KEY = 'your_yunpian_sms_key' # ex: 2124387 (fake) TEMPLATE_ID = 'your_yunpian_int_template_id' SMS_API = 'https://sms.yunpian.com/v2/sms/tpl_single_send.json' # today = datetime.today() # 時間間隔閾值 THRESHOLD = 5 EMOJI = ['(′ ▽〃)', '( ′o)', '(′ェ)', '(′ε )', '(=′ー)', '( ′θ)', '(′○)', '( ′-)', '(′∀=)', '(′▽)', '(′ノ0)', '( ′ロ )', '(′ ︿ )', '(′∇)', '(′m) ', '(′0ノ*)', '(@。ε。@)', '(=′∇=) ', '(●′ω●)', '(′~●)', '(′へ、 )', '(〃′o)', '( ;′⌒`)', '(๑→ܫ←)', '(。・`ω´・)', '(σ`・д・)', '(o゚ω゚o)', '( ・ˍ・)', '(・ˍ・)', '(〃・o・〃)', '(^・・)', '(。・ˍ・。)', '(・0・)', '(・ε・●)', '(。・ω・)', '(。・д・。)', '(・∀・)', '(・□・、)', '(・-・。)', '(・▽・。)', '(・・。)', '(・O・。)', '(ノ△・。)', '(@・ˍ・)', '(・・)', '(・ε・;)', '(・ o ・)', '(・ェ・o)', '(′・∀・『)', '(ΘΘ)', '(Θ~Θ〃)', '(ΘoΘ)', '(ΘェΘ)', '(Θ∀Θ#)', '(ΘдΘ;)', '(Θ皿Θメ)', '(ΘーΘ*)', '(Θ0Θ●)', '(Θ▽Θ)', '(ΘεΘ◎)', '(Θ◇Θ。)', '(ΘへΘ)', '(ΘˍΘ=)', '(Θ、Θ)', '(Θ△Θ@)', '(Θ3Θ)', '(°ー°〃)', '(#°Д°)', '(。□。)', '(。∀。)', '(。∇^)', '( ^ー。)', '(@。ー。 @)', '(。∇^☆)', '(o。◇。)', '( 。 ▽ 。)', '(#。ε。#)', '(。∇^d)', '(。・。;)', '(。皿。メ)', '(* 。 3 ^)', '(〃。 o 。〃)', '( °◇ °)', '( 。⊿。)', '(°□°;)', '( ロ)', '(。Д。 ;)', '(*。ノO。)', '(; 。。)', '(><)', '(;><)', '(><)', '(>.<)', '(>o<)', '(>O<)', '(o>▽<)', '(>◇< )', '(>▽<)', '(;>△<)', '( >з<)', '(o>ェ<)', '(>д<)', '(>皿<)', '(><、)', '(/<。)', '(>。;)', '(>。ヘ)', '(ノ<)', '(>。☆)', '(>y<;)', '(-ω- )', '(+ω+)', '(ω )', '( ^ω^)', '(;ω;)', '(。ω。)', '(『ω′)', '(・ω・=)', '( ̄ω ̄)', '(〃ω〃)', '(≧ω≦)', '(。・ω・)', '(=・ω・=)', '( >ω<)', '(′・ω・`)', '(/ω\)', '(/ω・\)', '(/・ω・\)', '(ΘωΘ)', '(ΦωΦ)'] def log(s, t): """ 運行日誌 :param s:任意字符串 :param t:1:INFO, 2:ERROR :return: """ global log_file, cwd date_ = datetime.today().ctime() types = [' ', '[INFO]\t', '[ERROR]\t'] with open(os.path.join(cwd, log_file), 'at') as f: info = ' - '.join((types[int(t)], date_, s, '\n')) f.write(info) print(info) def parse_weibo(uid, mobile): """ 前期使用Requests庫,比較簡單。 根據微博WidgetURL解析微博數據,生成器 repost: 轉發理由 type_: 0 原創, 1 轉發 0 -> False return: {id:id, content:content, repost:repost, time:time_, date:date_, type:type_, imgs:imgs} """ global api url = api + uid log(url, 1) ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36' header = {'User-Agent': ua} res = requests.get(url, headers=header) res.encoding = 'utf-8' soup = BeautifulSoup(res.text, 'html5lib') weibos = soup(class_='wgtCell') nick_name = soup(class_='userNm')[0].text global THRESHOLD for weibo in weibos: plain_txt = weibo(class_='wgtCell_txt', limit=1) plain = reduce(lambda x, y: x.text + y.text, plain_txt) plains = str(plain).split('<br/>') imgs = weibo('img') t = weibo(class_='wgtCell_tm', limit=1) img = imgs[0]['src'] if len(imgs) > 0 else '' contents = plain_txt[0].contents type_ = 0 if str(contents[0]).startswith('轉發了') else 1 repost = '' if type_ else plains[-1] content = reduce(lambda x, y: str(x).strip() + str(y).strip(), contents) if type_ else ''.join(plains[:-1]) t_span = str(t[0]('a')[0].text) if t_span.startswith('今天'): time_ = t_span.split(' ')[1] date_ = datetime.now().date().strftime('%Y-%m-%d') elif t_span.endswith('前'): if int(re.findall('(\d+)', t_span)[0]) < THRESHOLD: send_notification(mobile) else: log('Less than threshold.' + str(THRESHOLD) + '\t' + str(uid), 1) time_ = datetime.now() - timedelta(minutes=int(re.findall('(\d+)', t_span)[0])) time_ = str(time_.hour) + ':' + str(time_.minute) date_ = datetime.now().date().strftime('%Y-%m-%d') else: date_, time_ = t_span.split(' ') m, d, _ = re.split('\D', date_) date_ = (int(datetime.now().year), int(m), int(d)) date_ = reduce(lambda x, y: str(x) + '-' + str(y), date_) wid = gid(date_ + time_ + content + str(uid)).__str__() result = (content, repost, time_, date_, type_, img, uid, wid, nick_name) yield result def check_like(uid, mobile): """ 這裏必須使用selenium庫來解析微博用戶點贊頁面 使用selenium須要按照報錯信息下載對應的瀏覽器的webdriver並放到當前工做目錄。 :return: """ global cwd, weibo_id log('ID: ' + uid + '=====>', 1) browser = webdriver.Firefox() retry = 3 url = 'https://weibo.com/' + uid + '/like' browser.get(url) browser.implicitly_wait(40) while retry > 0: retry -= 1 try: browser.find_element_by_class_name('WB_tab_a').send_keys(Keys.DOWN) source = browser.page_source log('Successfully get HTML', 1) break except Exception as e: log(e.__str__(), 2) time.sleep(2) now = datetime.now() year, month, day = now.year, now.month, now.day # 若是時間是0-3點,日期-1. hour = now.hour if 0 <= hour <= 4: day -= 1 # ============================ day = '0' + str(day) if day < 10 else day month = '0' + str(month) if month < 10 else month base_file = str(year) + str(month) + str(day) + '_' + uid + '_like.txt' # base_file = os.path.join(cwd, base_file) log(base_file, 1) # 獲取當前點讚的微博數量 finds = re.findall(r'共(\d+)條', source) try: like_num = int(finds.pop().strip()) log('Success: HTML', 1) except Exception as e: log(e.__str__(), 2) log('獲取HTML失敗。', 2) log(source, 2) return 0 log('Liked Number: ' + str(like_num), 1) # 存一個當天的base文件,記錄從23點開始的點贊數量。 # 當天第一次運行,存入點贊數量 if os.path.exists(base_file): with open(base_file, 'rt') as f: last_like_num = f.readline().strip() log('Liked number: ' + str(like_num), 1) else: last_like_num = None log('No exist like num', 1) with open(base_file, 'wt') as f: log('Last like num: ' + str(last_like_num), 1) # 若是存了上一次的數字 flag = False if last_like_num: last_like_num = int(last_like_num) if like_num > last_like_num: flag = True else: flag = False like_num = last_like_num f.write(str(like_num)) log(str(like_num), 1) if flag: send_notification(mobile) log('Sent Msg to ' + mobile, 1) else: log('Not Send Msg.', 1) browser.close() def deposit_weibo(item): """ 存儲丟進來的微博 CREATE TABLE weibo ( id INTEGER PRIMARY KEY AUTOINCREMENT, nickname TEXT NOT NULL, content TEXT, repost TEXT, time DATETIME NOT NULL, date DATE NOT NULL, type INT NOT NULL, imgs TEXT, uid INT NOT NULL, wid TEXT NOT NULL UNIQUE ); :param item: 微博內容 tuple :return: 存儲結果,True/False """ conn = sqlite3.connect(db_path) c = conn.cursor() try: c.execute( 'INSERT INTO weibo (content, repost, time, date, type, imgs, uid, wid, nickname) VALUES (?,?,?,?,?,?,?,?,?)', item) # 微博wid相同,與定義相悖,表示已經存儲過了,就再也不重複存儲。 except Exception as e: del e conn.commit() conn.close() return False conn.commit() conn.close() return True def gid(s): """ 根據 微博的一些文本、時間戳、用戶ID 生成微博UUID。這與SQLITE3 DDL中的wid unique一致。 """ return uuid3(NAMESPACE_DNS, s) def send_notification(mobile): """ 給一我的發短信,這裏使用的是雲片網的模板單條發送的api """ today = datetime.today().now() year, month, day, hour = today.year, today.month, today.day, today.hour if 3 >= hour >= 0: day -= 1 fname = str(year) + str(month) + str(day) + '_' + mobile + '.txt' if os.path.exists(fname): log('Already sent sms, do nothing.', 1) else: with open(fname, 'at') as f: global EMOJI, API_KEY, TEMPLATE_ID, SMS_API url = SMS_API key = API_KEY tpl_id = TEMPLATE_ID # 這個要和你在短信平臺上設置的模板一致。只須要組合成一段文本。最好的方式是urlencode()一下 tpl_value = '#emoji#=' + EMOJI[math.floor(random.random() * len(EMOJI))] + '麼麼噠~😜' params = {'apikey': key, 'tpl_id': tpl_id, 'tpl_value': tpl_value, 'mobile': mobile} res = requests.post(url, data=params) log(json.dumps(res.json()), 1) print(res.json()) log('Sent msg to ' + mobile, 1) json.dump(res.json(), f) def main(): log('==========================================', 0) for u in weibo_id: uid = u[0] mobile = u[1] # if mobile != '13477692921': # continue log('Mobile: ' + mobile + ' UID: ' + uid, 1) # check_like(uid, mobile) items = 0 for weibo in parse_weibo(uid, mobile): if deposit_weibo(weibo): items += 1 log('Found new items: ' + str(items), 1) if __name__ == '__main__': main()