由於平時會收藏一些小黃圖,可是即刻的登陸機制對我比較費勁,好不容易找到網上的一片文章,可是看起來也比較費勁,大概知道怎麼回事了之後,本身擼了一套web
即刻的機制是這樣的,用戶掃碼進入網站,而後會獲得一個access_token,幾分鐘後會獲得一個refresh_token,而後之後再登陸的時候會調用一個接口,把refresh_token發給後臺獲得刷新的新的access_token。而第一個refresh_token須要去瀏覽器裏複製出來。代碼以下:json
#將傳入的refresh_token發給後臺去獲取
def refresh_token(refresh_token):
user_agent = getUserAgent()#構造一個user_agent
url = "https://app.jike.ruguoapp.com/app_auth_tokens.refresh"
headers = {"Origin":"https://web.okjike.com",
"Referer":"https://web.okjike.com/collection",
"User-Agent":user_agent}
headers["x-jike-refresh-token"] = str(refresh_token)
r = requests.get(url,headers= headers)
# print(r.text)
content = r.text
return content
複製代碼
構造user_agent st是我本身的工具類瀏覽器
from tools import Tools as tl
from tools import Settings as st
def getUserAgent():
agentList = st.user_agent_list
random_num = random.randint(1,len(agentList))
user_agent = agentList[random_num-1]
return user_agent
複製代碼
拿到最新的token以後就能夠想幹嗎幹嗎了bash
if __name__ == '__main__':
access_token = refresh_token('eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJkYXRhIjoibjV0dVlqcVMrV0VVSDJKYTMwY0JYOTNcL1p4RTlqRExGTW1PZGRXcU9iaWZqOEZ3M3RrNjNNXC81enJsTUQ5ajNVMFVJRHZSNjlzYmhOWTBDejlQTXdXalwvSzBUcHRpRXJFMFZnXC9NSFVOYjFHaDVGajFzSEVKWm42TzR5aUk3XC9IaklrUENNeHNsSXRmNm1nVWdTUGZBbG1jZkNkdUdsblwvTGRvVGQ1UFJjQ3FNPSIsInYiOjMsIml2IjoiTWFQdTlpRUJqbUVcLzlIZURGdVVhZUE9PSIsImlhdCI6MTU1MzQwNzgwMS43NDl9.jWG-7-dUjZqSrgMJVnj1pIf52tqoSMHav_mop0_aABI')
dic = json.loads(access_token)
startSpider('https://app.jike.ruguoapp.com/1.0/users/collections/list',dic['x-jike-access-token'])
複製代碼
爬蟲跑起來!app
loadMoreKey = None#這個全局變量是用來跑分頁的,即刻的分頁須要傳這個
def startSpider(url,access_token):
user_agent = getUserAgent()
headers = {"Accept":"application/json",
"App-Version":"5.3.0",
"Content-Type":"application/json",
"Origin":"https://web.okjike.com",
"platform":"web",
"Referer":"https://web.okjike.com/collection",
"User-Agent":user_agent}
headers["x-jike-access-token"] = access_token
tl.UsingHeaders = headers#這個是用來保存請求頭的,用來在下載的時候保持請求頭一致,能夠去掉
data = {'limit':20,'loadMoreKey':loadMoreKey}
response = requests.post(url,headers= headers, data= json.dumps(data))
response.enconding = "ascii"
print(response.status_code)
data = json.loads(response.content.decode("utf-8"))
global loadMoreKey
loadMoreKey = data['loadMoreKey']
data_list = data['data']
for dic in data_list:
pictures = dic['pictures']
for picDic in pictures:
picurl = picDic['picUrl']
tl.downLoadFile(picurl)
print('------結束20記錄------')
startSpider(url,access_token)
複製代碼