先放圖:php
新浪微博的登陸不是簡單的post就能解決的,他的登陸有加密,因此咱們要使用必定的解密算法才能正常登陸微博,獲得咱們想要的數據。html
先不要慌,第一步固然是import咱們須要的庫前端
from urllib import request,parse import http.cookiejar import base64 import json import rsa import binascii from PIL import Image from bs4 import BeautifulSoup import pymysql
要登陸,那確定要先拿到驗證碼vue
def GetCode(self): url="https://login.sina.com.cn/cgi/pin.php?r=694905&s=0&p=gz-52086a8a846fadcdacf4fb058324aa387858"#驗證碼地址 img=self.opener.open(url) f=open('vv.png','wb') f.write(img.read()) f.close() im=Image.open('vv.png') im.show()
拿到了驗證碼還不夠,登陸以前還有個預登陸,拿到咱們後面加密須要的參數java
def prelogin(self): url="https://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinaSSOController.preloginCallBack&su=NDc5MTkyNzQyJTQwcXEuY29t&rsakt=mod&checkpin=1&client=ssologin.js(v1.4.19)&_=1529471491358" a=self.opener.open(url).read().decode('utf-8') a=a[a.find("(")+1:a.find(")")] b=json.loads(a) return b ‘’‘ sinaSSOController.preloginCallBack({"retcode":0,"servertime":1540617565,"pcid":"gz-65c55b3534f8a1df1330b4708fb6d1b57273","nonce":"ONED4A","pubkey":"EB2A38568661887FA180BDDB5CABD5F21C7BFD59C090CB2D245A87AC253062882729293E5506350508E7F9AA3BB77F4333231490F915F6D63C55FE2F08A49B353F444AD3993CACC02DB784ABBB8E42A9B1BBFFFB38BE18D78E87A0E41B9B8F73A928EE0CCEE1F6739884B9777E4FE9E88A1BBE495927AC4A799B3181D6442443","rsakv":"1330428213","is_openlock":0,"showpin":0,"exectime":10})’‘’
而後用這些參數對登陸參數進行加密,說實在話具體的加密細節我也不記得了,當時作的時候拿了個記事本把全部東西記下來而後分析,也查了不少博客的資料才作好。node
def GetMixUser(self,username,password): username_struct={'username':username} rsaPubkey=int(self.pre['pubkey'],16) key=rsa.PublicKey(rsaPubkey, 65537) message = str(self.pre['servertime']) + '\t' + self.pre['nonce'] + '\n' + password passwd = rsa.encrypt(message.encode('utf-8'), key) upass = binascii.b2a_hex(passwd) uname=base64.b64encode(parse.urlencode(username_struct).encode('utf-8')).decode('utf-8')[12:] mix={'uname':uname,'upass':upass.decode('utf-8')} return mix
拿到加密後的登陸參數,能夠提交了mysql
def login(self,username,password,code): mix=self.GetMixUser(username,password) uname=mix['uname'] upass=mix['upass'] url="https://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.19)" print("登陸中……") postData={ "door":code, "encoding":"utf-8", "entry":"weibo", "from":"null", "gateway":1, "nonce":self.pre['nonce'], "prelt":72, "pwencode":"rsa2", "qrcode_flag":False, "returntype":"META", "savestate":7, "servertime":self.pre['servertime'], "service":"miniblog", "rsakv":self.pre['rsakv'], "su":uname, "sp":upass, "url":"https://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack", "useticket":1, "vsnf":1 } postData=parse.urlencode(postData).encode('utf-8') result=self.opener.open(url,postData).read().decode('gbk') url1=result[result.find("replace")+9:result.find(')')-1] result=self.opener.open(url1).read().decode("gbk") if(result.find("身份")!=-1): return False result=result[result.find('location')+18:] url2=result[:result.find(')')-1] self.opener.open(url2).read().decode("gbk") return True
先獲得用戶follow的列表ios
def GetUserList(self,uid,pageNum): url="https://weibo.com/"+str(uid)+"/follow?page="+str(pageNum) try: result=self.opener.open(url).read().decode('utf-8') html = result.replace('\\n', '').replace('\\t', '').replace('\\r', '').replace('\\', '') html = html[html.find("<!--關注/粉絲列表-->"):html.find("<!--關歐盟隱私協議彈窗-->")] soup = BeautifulSoup(html, "html.parser") list_a = soup.findAll(name='div', attrs={"class": "info_name W_fb W_f14"}) name = [] uid = [] for a in list_a: try: b = a.find(name="a") b = b['usercard'] b = b[3:13:] uid.append(b) name.append(a.text) print("加入用戶:" + a.text) except: print("No Data") dic = {"name": name, "uid": uid} return dic except: pass
再拿到這些用戶的主頁微博言論,咱們獲得他們發的全部博文web
def GetTalks(self,uid): rlist = [] i=0 html="" while(True): try: result=self.opener.open("https://weibo.com/u/"+str(uid)+"?page="+str(i)).read().decode("utf-8") html = result.replace("\\t", "").replace("\\n", "").replace("\\r", "").replace("\\", "") html = html[html.find("<div class=\"WB_feed WB_feed_v3 WB_feed_v4\""):] except: pass soup=BeautifulSoup(html,"html.parser") list_a = soup.find_all(name="div", attrs={"class": "WB_text W_f14"}) i = i + 1 if list_a: print("第" + str(i) + "頁") for a in list_a: at=a.text at=at.replace(" ","") if at: rlist.append(at) print("內容存入:"+at) else: break return rlist
我暫時只作了這些,而後存儲到數據庫吧。mysql的基礎語句很簡單,我也就會這麼幾句,不會的百度吧。。ajax
def sqllogin(self): db=pymysql.connect(host='localhost',user='root',db='weibouser',passwd='root',charset='utf8mb4') return db def sqlProcess(self,db): while(True): cursor=db.cursor() cursor.execute("SELECT * FROM USERS WHERE TAG =1") #1 表示 未處理 2 表示 正在處理 3 表示完成處理 result=cursor.fetchone() if(result): cursor.execute("UPDATE USERS SET TAG=2 WHERE USERID='%s'" % (result[2])) talks=self.GetTalks(uid=result[2]) for i in range(1,4): uids="" names="" userlist = self.GetUserList(uid=result[2], pageNum=i) try: uids=userlist['uid'] names=userlist['name'] except: break if int(result[4])!=3: for t in range(len(uids)): try: if self.IfExist(db,"users","name",names[t])==False: cursor.execute("INSERT INTO USERS (NAME,USERID,TAG,CLASS) VALUES ('%s','%s',%d,%d)" % (names[t], uids[t], 1, int(result[4])+1)) # 數據庫寫userlist cursor.execute("INSERT INTO FOLLOWS (USERID,FUID,FUNAME) VALUES ('%s','%s','%s')" % (result[2], uids[t],names[t])) except: print("Error") for talk in talks: try: cursor.execute("INSERT INTO USERTALKS (USERID,NAME,TALK)VALUES ('%s','%s','%s')" % (result[2],result[1],talk))#數據庫寫評論 except: print("Error") cursor.execute("UPDATE USERS SET TAG=3 WHERE USERID='%s'"%(result[2])) else: break def AnotherProcess(self,db): cursor=db.cursor(); cursor.execute("SELECT * FROM USERS WHERE 1"); results=cursor.fetchall() for result in results: sex="女" try: r = self.opener.open("https://weibo.com/u/"+result[2]).read().decode("utf-8") html = r.replace("\\t", "").replace("\\n", "").replace("\\r", "").replace("\\", "") if html.find("female") == -1: sex="男" except: pass soup = BeautifulSoup(html, "html.parser") keywords=soup.find(attrs={"name":"keywords"})['content'] description=soup.find(attrs={"name":"description"})['content'] cursor.execute("INSERT INTO USERDETAILS (NAME,DESCRIPTION,KEYWORDS,SEX)VALUES('{}','{}','{}','{}')".format(result[1],description,keywords,sex))
import pymysql from pyecharts import Graph class DrawCharts: raw_nodes = [] links = [] def __init__(self): self.CreatRelation() self.DrawCharts() def CreatRelation(self): db=pymysql.connect(host='localhost',user='root',db='weibouser',passwd='root',charset='utf8mb4') cursor=db.cursor() cursor.execute("SELECT * FROM USERS WHERE 1"); users=cursor.fetchall() for user in users:#上海大學 cursor.execute("SELECT * FROM FOLLOWS WHERE USERID='%s'"%(user[2])); results=cursor.fetchall() if results: for result in results: links.append({"source":user[1],"target":result[4]}) print(user[1]+"->"+result[4]) for i in range(3): cursor.execute("SELECT * FROM USERS WHERE CLASS=%d"%(i+1)) results=cursor.fetchall() for result in results: self.raw_nodes.append({"name": result[1], "symbolSize": 30-i*13,"category":i}) db.close() def DrawCharts(self): graph = Graph("微博關注關係圖", width=1200, height=600) categories=[ { "name":"一級深度", "itemStyle": { "normal":{ "color":'#CC0033', "borderColor": "#5182ab", "borderWidth": 1.8, "show":"True" } } }, { "name": "二級深度", "itemStyle": { "normal": { "color": '#f44242', "borderColor": "#5182ab", "borderWidth": 1.8, "show":"True" } } }, { "name": "三級深度", "itemStyle": { "normal": { "color": '#663366', "borderColor": "#5182ab", "borderWidth": 1.8, "show":"True" } } } ] graph.add("",self.raw_nodes,self.links,label_pos="right",graph_repulsion=10, is_legend_show=False,categories=categories, label_text_color=None,is_label_show=True) graph.render()
效果就不發了,運行起來比較卡,由於節點太多了,當時老師說怎麼怎麼加速,也不說細節,我當時一臉懵逼,也就沒管了,大不了減小節點數量嘛。
前端兩段關鍵代碼。。
<style scoped> .layout-con{ height: 100%; width: 100%; } .menu-item span{ display: inline-block; overflow: hidden; width: 69px; text-overflow: ellipsis; white-space: nowrap; vertical-align: bottom; transition: width .2s ease .2s; } .menu-item i{ transform: translateX(0px); transition: font-size .2s ease, transform .2s ease; vertical-align: middle; font-size: 16px; } .collapsed-menu span{ width: 0px; transition: width .2s ease; } .collapsed-menu i{ transform: translateX(5px); transition: font-size .2s ease .2s, transform .2s ease .2s; vertical-align: middle; font-size: 22px; } </style> <template> <div class="layout"> <Layout :style="{minHeight: '100vh'}"> <Sider collapsible :collapsed-width="78" v-model="isCollapsed"> <Menu active-name="1-1" theme="dark" width="auto" :class="menuitemClasses"> <MenuItem name="1-1" @click.native="tab_search_onclick()"> <Icon type="search"></Icon> <span>搜索</span> </MenuItem> <MenuItem name="1-2" @click.native="tab_follow_onclick()"> <Icon type="person"></Icon> <span>follow關係</span> </MenuItem> <MenuItem name="1-3" @click.native="tab_words_onclick()"> <Icon type="cloud"></Icon> <span >熱詞雲圖</span> </MenuItem> </Menu> </Sider> <Layout> <Header :style="{background: '#fff', boxShadow: '0 2px 3px 2px rgba(0,0,0,.1)',height:'100px'}"> <img src="../assets/logo.png" style="height: 100%"/> </Header> <Content :style="{padding: '0 16px 16px'}"> <Card style="margin-top: 20px"> <p slot="title">{{Content_Title}}</p> <div style="height: 650px"> <div v-show="v_search"> <div style="margin-bottom: 30px;margin-top: 30px"> <Input v-model="input_word" icon="ios-clock-outline" placeholder="請輸入微博用戶名稱!" style="width: 250px;margin-right: 10px"></Input> <Button type="primary" shape="circle" icon="ios-search" @click="fs_search">查詢</Button> </div> <div style="width: 50%;margin: auto"> <Card :bordered="true" style="height: 350px;text-align: left"> <p slot="title">{{Search_Card_Title}}</p> <p v-for="item in items" style="height: 70px">{{item.message}}</p> </Card> </div> </div> <div v-show="v_follows"> <div style="width: 80%;margin: auto"> <Card :bordered="true" style="height: 580px;width: 100%;margin-top: 50px"> <p slot="title">{{Follow_Card_Title}}</p> <div id="Follow_Chart" style="height: 500px;width:800px;margin: auto;"></div> </Card> </div> </div> <div v-show="v_words"> <div style="width: 50%;margin: auto"> <Card :bordered="true" style="height: 330px;margin-top:50px "> <p slot="title">{{Words_Card_Title}}</p> <div id="Words_Chart" style="height: 250px;width:500px;margin: auto"></div> </Card> </div> </div> </div> </Card> </Content> </Layout> </Layout> </div> </template> <script> let echarts = require('echarts'); require('echarts-wordcloud'); require("echarts/lib/chart/graph/forceLayout"); export default { data () { return { isCollapsed: false, items:[], Search_Card_Title:'搜索結果', Content_Title:"資料搜索", Follow_Card_Title:"關係圖繪製結果", Words_Card_Title:"詞雲繪製結果", input_word:'', v_search:true, v_follows:false, v_words:false }; }, computed: { menuitemClasses: function () { return [ 'menu-item', this.isCollapsed ? 'collapsed-menu' : '' ] } }, methods:{ fs_search(){ let axios=this.$axios; let word=this.input_word; function getDetails(){ return axios.get("http://localhost:9090/details?name="+word); } function getUsers(){ return axios.get("http://localhost:9090/words?name="+word); } function getNodes() { return axios.get("http://localhost:9090/nodes?name="+word); } function getLinks(){ return axios.get("http://localhost:9090/links?name="+word); } axios.all([getDetails(),getUsers(),getNodes(),getLinks()]).then(axios.spread((details,users,nodes,links)=>{ this.draw_relations(nodes.data,links.data); this.set_details(details.data); this.draw_words(users.data); this.$Notice.success({ title: '繪製成功', desc: '您搜索的用戶'+word+'關係圖和熱詞已經成功繪製!' }); })).catch(res=>{ this.$Notice.error({ title: '繪製失敗', desc: '您搜索的用戶'+word+'關係圖或熱詞繪製失敗,請檢查網絡或輸入信息!' }); }) }, tab_search_onclick:function(){ this.v_search=true; this.v_follows=false; this.v_words=false; this.Content_Title="資料搜索" }, tab_follow_onclick:function(){ this.v_follows=true; this.v_words=false; this.v_search=false; this.Content_Title="follow關係"; }, draw_words:function(words_data){ let mychart=echarts.init(document.getElementById('Words_Chart')); mychart.setOption({ tootip: { show: true }, series: { name: '詞雲圖', type: 'wordCloud', textStyle: { normal: { color: function () { return 'rgb(' + [ Math.round(Math.random() * 160), Math.round(Math.random() * 160), Math.round(Math.random() * 160) ].join(',') + ')' } } }, textRotation: [0, 90, -45, 45], autoSize: true, data: words_data } }) }, set_details(details){ this.items.push({"message":"微博名:"+details["name"]}); this.items.push({"message":"微博描述:"+details["description"]}); this.items.push({"message":"關鍵詞:"+details["keywords"]}); this.items.push({"message":"用戶性別:"+details["sex"]}); }, draw_relations:function(nodes,links){ let mychart=echarts.init(document.getElementById("Follow_Chart")); let option={ title: { // 圖表標題 text: "關注關係圖", // 標題文本 left : '3%', // 標題距離左側邊距 top : '3%', // 標題距頂部邊距 textStyle : { // 標題樣式 color : '#000', // 標題字體顏色 fontSize : '30', // 標題字體大小 } }, series: [{ type: "graph", // 系列類型:關係圖 top: '10%', // 圖表距離容器頂部的距離 roam: true, // 是否開啓鼠標縮放和平移漫遊。默認不開啓。若是隻想要開啓縮放或者平移,能夠設置成 'scale' 或者 'move'。設置成 true 爲都開啓 focusNodeAdjacency: true, // 是否在鼠標移到節點上的時候突出顯示節點以及節點的邊和鄰接節點。[ default: false ] force: { // 力引導佈局相關的配置項,力引導佈局是模擬彈簧電荷模型在每兩個節點之間添加一個斥力,每條邊的兩個節點之間添加一個引力,每次迭代節點會在各個斥力和引力的做用下移動位置,屢次迭代後節點會靜止在一個受力平衡的位置,達到整個模型的能量最小化。 // 力引導佈局的結果有良好的對稱性和局部聚合性,也比較美觀。 repulsion: 1000, // [ default: 50 ]節點之間的斥力因子(關係對象之間的距離)。支持設置成數組表達斥力的範圍,此時不一樣大小的值會線性映射到不一樣的斥力。值越大則斥力越大 // [ default: 30 ]邊的兩個節點之間的距離(關係對象鏈接線兩端對象的距離,會根據關係對象值得大小來判斷距離的大小), // 這個距離也會受 repulsion。支持設置成數組表達邊長的範圍,此時不一樣大小的值會線性映射到不一樣的長度。值越小則長度越長。以下示例: // 值最大的邊長度會趨向於 10,值最小的邊長度會趨向於 50 edgeLength: [10, 50] }, layout: "force", // 圖的佈局。[ default: 'none' ] // 'none' 不採用任何佈局,使用節點中提供的 x, y 做爲節點的位置。 // 'circular' 採用環形佈局;'force' 採用力引導佈局. // 標記的圖形 //symbol: "path://M19.300,3.300 L253.300,3.300 C262.136,3.300 269.300,10.463 269.300,19.300 L269.300,21.300 C269.300,30.137 262.136,37.300 253.300,37.300 L19.300,37.300 C10.463,37.300 3.300,30.137 3.300,21.300 L3.300,19.300 C3.300,10.463 10.463,3.300 19.300,3.300 Z", symbol: 'circle', lineStyle: { // 關係邊的公用線條樣式。其中 lineStyle.color 支持設置爲'source'或者'target'特殊值,此時邊會自動取源節點或目標節點的顏色做爲本身的顏色。 normal: { color: '#000', // 線的顏色[ default: '#aaa' ] width: 1, // 線寬[ default: 1 ] type: 'solid', // 線的類型[ default: solid ] 'dashed' 'dotted' opacity: 0.5, // 圖形透明度。支持從 0 到 1 的數字,爲 0 時不繪製該圖形。[ default: 0.5 ] curveness: 0 // 邊的曲度,支持從 0 到 1 的值,值越大麴度越大。[ default: 0 ] } }, label: { // 關係對象上的標籤 normal: { show: true, // 是否顯示標籤 position: "inside", // 標籤位置:'top''left''right''bottom''inside''insideLeft''insideRight''insideTop''insideBottom''insideTopLeft''insideBottomLeft''insideTopRight''insideBottomRight' textStyle: { // 文本樣式 fontSize: 16 } } }, data: nodes, links: links }], animationEasingUpdate: "cubicOut", // 數據更新動畫的緩動效果。[ default: cubicOut ] "quinticInOut" // 數據更新動畫的時長。[ default: 300 ] }; mychart.setOption(option); }, tab_words_onclick:function(){ this.Content_Title="用戶詞雲"; this.v_words=true; this.v_follows=false; this.v_search=false; }, }, name:"HelloWorld" } </script>
後端關鍵代碼
//用戶的具體信息domain
package com.example.demo; import org.hibernate.engine.loading.internal.CollectionLoadContext; import javax.persistence.*; @Entity @Table(name = "userdetails") public class UserDetails { @Id @GeneratedValue(strategy = GenerationType.AUTO) private int num; @Column(name="description") public String description; @Column(name="keywords") public String keywords; @Column(name="sex") public String sex; @Column(name="name") public String name; void setName(String name){ this.name=name; } void setDescription(String description){ this.description=description; } void setKeywords(String keywords){ this.keywords=keywords; } void setSex(String sex){ this.sex=sex; } String getName(){ return this.name; } String getDescription(){ return this.description; } String getKeywords(){ return this.keywords; } String getSex(){ return this.sex; } }
//最重要的controller
package com.example.demo; import org.springframework.stereotype.Controller; import org.springframework.web.bind.annotation.CrossOrigin; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.ResponseBody; import org.springframework.beans.factory.annotation.Autowired; import java.util.ArrayList; import java.util.List; @Controller @CrossOrigin public class IndexController { @Autowired private UserRepositroy userRepositroy; @RequestMapping(value="users") @ResponseBody public Object GetUser(){ List<User> userlist=userRepositroy.findAll(); return userlist; } @RequestMapping(value="links") @ResponseBody public Object GetLinks(String name){ List<Follows> followsList =followsRepositroy.findByName(name); List<RetnLinks> links =new ArrayList<RetnLinks>(); for(int i=0;i<followsList.size();i++){ RetnLinks l=new RetnLinks(); l.setSource(followsList.get(i).getName()); l.setTarget(followsList.get(i).getFuname()); links.add(l); } return links; } @RequestMapping(value="nodes") @ResponseBody public Object GetNodes(String name){ List<Follows> followsList=followsRepositroy.findByName(name); List<RetnNodes> nodes =new ArrayList<RetnNodes>(); RetnNodes r=new RetnNodes(); r.setName(name); r.setCategory(1); r.setSymbolSize(30); nodes.add(r); for(int i=0;i<followsList.size();i++){ RetnNodes rs=new RetnNodes(); rs.setName(followsList.get(i).getFuname()); rs.setCategory(2); rs.setSymbolSize(20); nodes.add(rs); } return nodes; } @RequestMapping(value="words") @ResponseBody public Object GetWords(String name){ List<User> userlist=userRepositroy.findByName(name); String word = userlist.get(0).getKeywords(); String value=userlist.get(0).getKeyvalue(); String[] array_word= word.split(","); String[] array_value= value.split(","); List<RetnWords> words=new ArrayList<RetnWords>(); for(int i=0;i<50;i++){ RetnWords w=new RetnWords(); w.setName(array_word[i]); w.setValue(Double.parseDouble(array_value[i])); words.add(w); } return words; } @Autowired private FollowsRepositroy followsRepositroy; @RequestMapping(value="follows") @ResponseBody public Object GetFollows(String name){ List<Follows> followslist=followsRepositroy.findByName(name); return followslist; } @Autowired private DetailsRepositroy detailsRepositroy; @RequestMapping(value="details") @ResponseBody public Object GetDetails(String name){ UserDetails userDetails=detailsRepositroy.findOneByName(name); return userDetails; } }
其餘的都是相似的。無限重複更名。