JS破解無疑是頭禿的問題,額好在有度娘搜索,實在不行,那就谷妹搜索?javascript
確實頭禿無比!!html
目前想到的三種處理方法,固然都是借鑑啦!!
方法一:直接破解JS,python直接模擬js算法得出視頻地址
方法二:扒取js,借用python第三方庫execjs執行js獲取視頻地址
方法三:借用網絡工具,實際上是別人的破解接口獲取視頻地址,好比:https://meipai.iiilab.comjava
採集效果
方法一:直接破解JS,python直接模擬js算法得出視頻地址python
存儲路徑設置web
#存儲路徑 os.makedirs(f'meipai/',exist_ok=True)
視頻地址解密算法
# 解密美拍視頻真實地址 def decode(encoded_string): def getHex(param1): return { 'str': param1[4:], 'hex': ''.join(list(param1[:4])[::-1]), } def getDec(param1): loc2 = str(int(param1, 16)) return { 'pre': list(loc2[:2]), 'tail': list(loc2[2:]), } def substr(param1, param2): loc3 = param1[0: int(param2[0])] loc4 = param1[int(param2[0]): int(param2[0]) + int(param2[1])] return loc3 + param1[int(param2[0]):].replace(loc4, "") def getPos(param1, param2): param2[0] = len(param1) - int(param2[0]) - int(param2[1]) return param2 dict2 = getHex(encoded_string) dict3 = getDec(dict2['hex']) str4 = substr(dict2['str'], dict3['pre']) return base64.b64decode(substr(str4, getPos(str4, dict3['tail']))) #來源:https://blog.csdn.net/weixin_42590877/article/details/99898650 #CSDN博客-iplaypy(蟒蛇師)
協議頭生成,超級重要,頭禿了許久!注意,cookie的添加,否則沒法翻頁!!chrome
#隨機生成協議頭 def ua(): ua=UserAgent() headers={ 'Cookie': 'MUSID=0su478f5e30e4u8jlf9gqquhn5; MP_WEB_GID=748151043219051; sid=0su478f5e30e4u8jlf9gqquhn5; UM_distinctid=16ea084234822-0be7691d606889-43450521-1fa400-16ea0842349133; virtual_device_id=8818a5d35ed03e6b6b4fd638a6f765ae; pvid=TVqin0MOgIjpLnJZKxhiL%2FwcrYA2K7Ke; CNZZDATA1256786412=937407365-1574650874-https%253A%252F%252Fwww.baidu.com%252F%7C1574831840', 'User-Agent': ua.random, } return headers
關於協議頭的處理,只能一步步嘗試分析!這裏花費了很多時間測試!思路不對!!!json
requests訪問網頁瀏覽器
#訪問網頁 def get_req(url): response = requests.get(url, headers=ua()) if response.status_code==200: response=response.content.decode('utf-8') time.sleep(2) else: response =None return response
獲取視頻微信
def get_video(response): reqs=json.loads(response) reqs=reqs['medias'] for req in reqs: videoname=req['caption'] if videoname: video_name=videoname else: video_name =req['weibo_share_caption'] video_name = video_name.replace(' ', '') video_name = re.sub(r'[\|\/\<\>\:\*\?\\\"]', "_", video_name) # 剔除不合法字符 print(video_name) video_url=req['video'] print(video_url) try: videourl = decode(video_url).decode('utf8') # 解密視頻地址 print(videourl) try: down(video_name, videourl) #server(video_name, videourl) except Exception as e: print(f'視頻下載出錯,錯誤代碼:{e}') with open(r'meipai/spider.txt', 'a+', encoding='utf-8') as f: f.write(f'視頻下載出錯,錯誤代碼:{e}---採集{videourl}|{video_name}內容失敗\n') pass except Exception as e: print(f'視頻地址解密出錯,錯誤代碼:{e}') with open(r'meipai/spider.txt', 'a+', encoding='utf-8') as f: f.write(f'視頻解密出錯,錯誤代碼:{e}---採集{video_url}|{video_name}內容失敗\n') pass
下載視頻
def down(h1,url): print("準備下載!") file_path = f"meipai/{h1}.mp4" r = requests.get(url) print("開始下載!") with open(file_path, "wb") as file: file.write(r.content) print("下載完成!") time.sleep(2) #網絡問題,出錯? # 存儲視頻,附下載進度顯示 def server(h1,videourl): print("準備下載!") file_path=f'meipai/{h1}.mp4' with closing(requests.get(videourl,headers=ua(),stream=True)) as response: chunk_size = 1024 # 單次請求最大值 content_size = int(response.headers['content-length']) # 內容體總大小 data_count = 0 with open(file_path, "wb") as file: for data in response.iter_content(chunk_size=chunk_size): file.write(data) data_count = data_count + len(data) now_jd = (data_count / content_size) * 100 print("\r 文件下載進度:%d%%(%d/%d) - %s" % (now_jd, data_count, content_size, file_path), end=" ") print("\n>>> 獲取視頻成功了!") time.sleep(2)
附完整代碼:
#美拍視頻採集2 #by 微信:huguo00289 # -*- coding: UTF-8 -*- import requests import re,time,os,json from fake_useragent import UserAgent import base64 from contextlib import closing #存儲路徑 os.makedirs(f'meipai/',exist_ok=True) # 解密美拍視頻真實地址 def decode(encoded_string): def getHex(param1): return { 'str': param1[4:], 'hex': ''.join(list(param1[:4])[::-1]), } def getDec(param1): loc2 = str(int(param1, 16)) return { 'pre': list(loc2[:2]), 'tail': list(loc2[2:]), } def substr(param1, param2): loc3 = param1[0: int(param2[0])] loc4 = param1[int(param2[0]): int(param2[0]) + int(param2[1])] return loc3 + param1[int(param2[0]):].replace(loc4, "") def getPos(param1, param2): param2[0] = len(param1) - int(param2[0]) - int(param2[1]) return param2 dict2 = getHex(encoded_string) dict3 = getDec(dict2['hex']) str4 = substr(dict2['str'], dict3['pre']) return base64.b64decode(substr(str4, getPos(str4, dict3['tail']))) #隨機生成協議頭 def ua(): ua=UserAgent() headers={ 'Cookie': 'MUSID=0su478f5e30e4u8jlf9gqquhn5; MP_WEB_GID=748151043219051; sid=0su478f5e30e4u8jlf9gqquhn5; UM_distinctid=16ea084234822-0be7691d606889-43450521-1fa400-16ea0842349133; virtual_device_id=8818a5d35ed03e6b6b4fd638a6f765ae; pvid=TVqin0MOgIjpLnJZKxhiL%2FwcrYA2K7Ke; CNZZDATA1256786412=937407365-1574650874-https%253A%252F%252Fwww.baidu.com%252F%7C1574831840', 'Host': 'www.meipai.com', 'Referer': 'https://www.meipai.com/square/13', 'User-Agent': ua.random, #'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36', } return headers #訪問網頁 def get_req(url): response = requests.get(url, headers=ua()) if response.status_code==200: response=response.content.decode('utf-8') time.sleep(2) else: response =None return response def get_video(response): reqs=json.loads(response) reqs=reqs['medias'] for req in reqs: videoname=req['caption'] if videoname: video_name=videoname else: video_name =req['weibo_share_caption'] video_name = video_name.replace(' ', '') video_name = re.sub(r'[\|\/\<\>\:\*\?\\\"]', "_", video_name) # 剔除不合法字符 print(video_name) video_url=req['video'] print(video_url) try: videourl = decode(video_url).decode('utf8') # 解密視頻地址 print(videourl) try: down(video_name, videourl) #server(video_name, videourl) except Exception as e: print(f'視頻下載出錯,錯誤代碼:{e}') with open(r'meipai/spider.txt', 'a+', encoding='utf-8') as f: f.write(f'視頻下載出錯,錯誤代碼:{e}---採集{videourl}|{video_name}內容失敗\n') pass except Exception as e: print(f'視頻地址解密出錯,錯誤代碼:{e}') with open(r'meipai/spider.txt', 'a+', encoding='utf-8') as f: f.write(f'視頻解密出錯,錯誤代碼:{e}---採集{video_url}|{video_name}內容失敗\n') pass #下載視頻 def down(h1,url): print("準備下載!") file_path = f"meipai/{h1}.mp4" r = requests.get(url) print("開始下載!") with open(file_path, "wb") as file: file.write(r.content) print("下載完成!") time.sleep(2) # 存儲視頻,附下載進度顯示 def server(h1,videourl): print("準備下載!") file_path=f'meipai/{h1}.mp4' with closing(requests.get(videourl,headers=ua(),stream=True)) as response: chunk_size = 1024 # 單次請求最大值 content_size = int(response.headers['content-length']) # 內容體總大小 data_count = 0 with open(file_path, "wb") as file: for data in response.iter_content(chunk_size=chunk_size): file.write(data) data_count = data_count + len(data) now_jd = (data_count / content_size) * 100 print("\r 文件下載進度:%d%%(%d/%d) - %s" % (now_jd, data_count, content_size, file_path), end=" ") print("\n>>> 獲取視頻成功了!") time.sleep(2) #運行主函數 def main(): for i in range(1, 100): url = f"https://www.meipai.com/squares/new_timeline?page={i}&count=24&tid=13" print(url) response=get_req(url) if response: try: get_video(response) except Exception as e: print(f'獲取視頻出錯了,錯誤代碼:{e}') pass if __name__ == '__main__': main()
方法二:execjs庫運行js破解視頻地址
test.js文件
h = "substring" , i = "split" , j = "replace" , k = "substr"; function getHex(a) { return { str: a[h](4), hex: a[h](0, 4)[i]("").reverse().join("") } } function getDec(a) { var b = parseInt(a, 16).toString(); return { pre: b[h](0, 2)[i](""), tail: b[h](2)[i]("") } } function substr(a,b) { var c = a[h](0, b[0]) , d = a[k](b[0], b[1]); return c + a[h](b[0])[j](d, "") } function getPos(a,b) { return b[0] = a.length - b[0] - b[1], b } function atob(a) { var e = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="; if (a = a.replace(/=+$/, ""), a.length % 4 == 1) throw f; for (var b, c, d = 0, g = 0, h = ""; c = a.charAt(g++); ~c && (b = d % 4 ? 64 * b + c : c, d++ % 4) ? h += String.fromCharCode(255 & b >> (-2 * d & 6)) : 0) c = e.indexOf(c); return h } function main(a) { var b = getHex(a) , c = getDec(b.hex) , d = substr(b.str, c.pre); zz = substr(d, getPos(d, c.tail)); return atob(zz) } //a = '55e0aHRVhJYAS0cDovL212dmlkZW8xMC5tZWl0dWRhdGEuY29tLzVkMzNmMGRmM2RkYzRwMm5sd3N0cWg3OTA2X0gyNjRfMV85YzNiM2QwOTNhMWRhNS5tcDQ/az1kODlkOTVmZmRlNWZjNzQ0YzUwOTY0NDZiZjNhNTE0OCZ0PTVkNDEIQbtCanOZiOWY4' //console.log(main(a))
#運行js文件解密視頻網址 import execjs def decode2(target): """ 執行 js 的解碼函數 :param target: :return: 解碼後的結果 來源:木下瞳/木下學Python """ js = execjs.compile(open(r'./test.js').read()) resualt = js.call("main",target) return resualt def main1(): url="https://www.meipai.com/squares/new_timeline?page=1&count=24&tid=13" response=get_req(url) get_video2(response)
方法三:藉助網頁破解工具,因爲post包兩個參數沒搞清楚頭緒,這裏直接採用無頭瀏覽器,selenium!
運行效果:
from selenium import webdriver import time #selenium打開瀏覽器 def s_html(): #videourl="https://www.meipai.com/media/1155453414" url="https://meipai.iiilab.com" chromedriver_path = r"替換成你本身的chromedriver.exe路徑" # 完整路徑 options=webdriver.ChromeOptions() #配置 chrome 啓動屬性 options.add_experimental_option("excludeSwitches",['enable-automation']) # 此步驟很重要,設置爲開發者模式,防止被各大網站識別出來使用了Selenium browser=webdriver.Chrome(executable_path=chromedriver_path,options=options) browser.get(url) time.sleep(2) return browser #獲取視頻鏈接 def get_s_video(browser,videourl): input =browser.find_element_by_xpath('//*[@class="form-control link-input"]') input.send_keys(videourl) time.sleep(2) button=browser.find_element_by_xpath('//*[@class="btn btn-default"]') button.click() time.sleep(8) href=browser.find_element_by_xpath('//*/div[@class="caption"]/p/a[@class="btn btn-success"]').get_attribute("href") print(href) clear_button = browser.find_element_by_xpath('//*[@class="btn btn-danger"]') clear_button.click() #browser.refresh() # 刷新頁面 time.sleep(2) return href #關閉瀏覽器 def close_s(browser): browser.quit() # 關閉瀏覽器 #獲取美拍視頻 def get_video3(response,browser): reqs=json.loads(response) reqs=reqs['medias'] print(reqs) for req in reqs: videoname = req['caption'] if videoname: video_name = videoname else: video_name = req['weibo_share_caption'] video_name = video_name.replace(' ', '') video_name = re.sub(r'[\|\/\<\>\:\*\?\\\"]', "_", video_name) # 剔除不合法字符 print(video_name) video_url=req['id'] video_url=f'https://www.meipai.com/media/{video_url}' print(video_url) videourl=get_s_video(browser, video_url) down(video_name, videourl) # 下載視頻 def main2(): browser=s_html() url = "https://www.meipai.com/squares/new_timeline?page=1&count=24&tid=13" response = get_req(url) get_video3(response,browser) close_s(browser) if __name__ == '__main__': main2()
部分函數方法沿用方法一!