JS破解初探，折騰到頭禿的美拍視頻Python採集下載

時間 2021-08-14

標籤 javascript html java python web 算法 chrome json 瀏覽器微信欄目 JavaScript 简体版

原文原文鏈接

JS破解無疑是頭禿的問題，額好在有度娘搜索，實在不行，那就谷妹搜索？javascript

確實頭禿無比！！html

目前想到的三種處理方法，固然都是借鑑啦！！
方法一：直接破解JS，python直接模擬js算法得出視頻地址
方法二：扒取js，借用python第三方庫execjs執行js獲取視頻地址
方法三：借用網絡工具，實際上是別人的破解接口獲取視頻地址，好比：https://meipai.iiilab.comjava

採集效果

方法一：直接破解JS，python直接模擬js算法得出視頻地址python

存儲路徑設置web

#存儲路徑
os.makedirs(f'meipai/',exist_ok=True)

視頻地址解密算法

# 解密美拍視頻真實地址
def decode(encoded_string):
    def getHex(param1):
        return {
            'str': param1[4:],
            'hex': ''.join(list(param1[:4])[::-1]),
        }

    def getDec(param1):
        loc2 = str(int(param1, 16))
        return {
            'pre': list(loc2[:2]),
            'tail': list(loc2[2:]),
        }

    def substr(param1, param2):
        loc3 = param1[0: int(param2[0])]
        loc4 = param1[int(param2[0]): int(param2[0]) + int(param2[1])]
        return loc3 + param1[int(param2[0]):].replace(loc4, "")

    def getPos(param1, param2):
        param2[0] = len(param1) - int(param2[0]) - int(param2[1])
        return param2

    dict2 = getHex(encoded_string)
    dict3 = getDec(dict2['hex'])
    str4 = substr(dict2['str'], dict3['pre'])
    return base64.b64decode(substr(str4, getPos(str4, dict3['tail'])))

#來源：https://blog.csdn.net/weixin_42590877/article/details/99898650
#CSDN博客-iplaypy(蟒蛇師)

協議頭生成，超級重要，頭禿了許久！注意，cookie的添加，否則沒法翻頁！！chrome

#隨機生成協議頭
def ua():
    ua=UserAgent()
    headers={
        'Cookie': 'MUSID=0su478f5e30e4u8jlf9gqquhn5; MP_WEB_GID=748151043219051; sid=0su478f5e30e4u8jlf9gqquhn5; UM_distinctid=16ea084234822-0be7691d606889-43450521-1fa400-16ea0842349133; virtual_device_id=8818a5d35ed03e6b6b4fd638a6f765ae; pvid=TVqin0MOgIjpLnJZKxhiL%2FwcrYA2K7Ke; CNZZDATA1256786412=937407365-1574650874-https%253A%252F%252Fwww.baidu.com%252F%7C1574831840',
        'User-Agent': ua.random,
    }
    return headers

關於協議頭的處理，只能一步步嘗試分析！這裏花費了很多時間測試！思路不對！！！json

requests訪問網頁瀏覽器

#訪問網頁
def get_req(url):
    response = requests.get(url, headers=ua())
    if response.status_code==200:
        response=response.content.decode('utf-8')
        time.sleep(2)
    else:
        response =None
    return response

獲取視頻微信

def get_video(response):
    reqs=json.loads(response)
    reqs=reqs['medias']
    for req in reqs:
        videoname=req['caption']
        if videoname:
            video_name=videoname
        else:
            video_name =req['weibo_share_caption']
        video_name = video_name.replace(' ', '')
        video_name = re.sub(r'[\|\/\<\>\:\*\?\\\"]', "_", video_name)  # 剔除不合法字符
        print(video_name)
        video_url=req['video']
        print(video_url)
        try:
            videourl = decode(video_url).decode('utf8')  # 解密視頻地址
            print(videourl)
            try:
                down(video_name, videourl)
                #server(video_name, videourl)
            except Exception as e:
                print(f'視頻下載出錯，錯誤代碼：{e}')
                with open(r'meipai/spider.txt', 'a+', encoding='utf-8') as f:
                    f.write(f'視頻下載出錯，錯誤代碼：{e}---採集{videourl}|{video_name}內容失敗\n')
                pass
        except Exception as e:
            print(f'視頻地址解密出錯，錯誤代碼：{e}')
            with open(r'meipai/spider.txt', 'a+', encoding='utf-8') as f:
                f.write(f'視頻解密出錯，錯誤代碼：{e}---採集{video_url}|{video_name}內容失敗\n')
            pass

下載視頻

def down(h1,url):
    print("準備下載！")
    file_path = f"meipai/{h1}.mp4"
    r = requests.get(url)
    print("開始下載！")
    with open(file_path, "wb") as file:
        file.write(r.content)
        print("下載完成！")
    time.sleep(2)

#網絡問題，出錯？
# 存儲視頻，附下載進度顯示
def server(h1,videourl):
    print("準備下載！")
    file_path=f'meipai/{h1}.mp4'
    with closing(requests.get(videourl,headers=ua(),stream=True)) as response:
        chunk_size = 1024  # 單次請求最大值
        content_size = int(response.headers['content-length'])  # 內容體總大小
        data_count = 0
        with open(file_path, "wb") as file:
            for data in response.iter_content(chunk_size=chunk_size):
                file.write(data)
                data_count = data_count + len(data)
                now_jd = (data_count / content_size) * 100
                print("\r 文件下載進度：%d%%(%d/%d) - %s" % (now_jd, data_count, content_size, file_path), end=" ")
            print("\n>>> 獲取視頻成功了！")
    time.sleep(2)

附完整代碼：

#美拍視頻採集2
#by 微信：huguo00289
# -*- coding: UTF-8 -*-
import requests
import re,time,os,json
from fake_useragent import UserAgent
import base64
from contextlib import closing


#存儲路徑
os.makedirs(f'meipai/',exist_ok=True)


# 解密美拍視頻真實地址
def decode(encoded_string):
    def getHex(param1):
        return {
            'str': param1[4:],
            'hex': ''.join(list(param1[:4])[::-1]),
        }

    def getDec(param1):
        loc2 = str(int(param1, 16))
        return {
            'pre': list(loc2[:2]),
            'tail': list(loc2[2:]),
        }

    def substr(param1, param2):
        loc3 = param1[0: int(param2[0])]
        loc4 = param1[int(param2[0]): int(param2[0]) + int(param2[1])]
        return loc3 + param1[int(param2[0]):].replace(loc4, "")

    def getPos(param1, param2):
        param2[0] = len(param1) - int(param2[0]) - int(param2[1])
        return param2

    dict2 = getHex(encoded_string)
    dict3 = getDec(dict2['hex'])
    str4 = substr(dict2['str'], dict3['pre'])
    return base64.b64decode(substr(str4, getPos(str4, dict3['tail'])))


#隨機生成協議頭
def ua():
    ua=UserAgent()
    headers={
        'Cookie': 'MUSID=0su478f5e30e4u8jlf9gqquhn5; MP_WEB_GID=748151043219051; sid=0su478f5e30e4u8jlf9gqquhn5; UM_distinctid=16ea084234822-0be7691d606889-43450521-1fa400-16ea0842349133; virtual_device_id=8818a5d35ed03e6b6b4fd638a6f765ae; pvid=TVqin0MOgIjpLnJZKxhiL%2FwcrYA2K7Ke; CNZZDATA1256786412=937407365-1574650874-https%253A%252F%252Fwww.baidu.com%252F%7C1574831840',
        'Host': 'www.meipai.com',
        'Referer': 'https://www.meipai.com/square/13',
        'User-Agent': ua.random,
        #'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
    }
    return headers

#訪問網頁
def get_req(url):
    response = requests.get(url, headers=ua())
    if response.status_code==200:
        response=response.content.decode('utf-8')
        time.sleep(2)
    else:
        response =None
    return response


def get_video(response):
    reqs=json.loads(response)
    reqs=reqs['medias']
    for req in reqs:
        videoname=req['caption']
        if videoname:
            video_name=videoname
        else:
            video_name =req['weibo_share_caption']
        video_name = video_name.replace(' ', '')
        video_name = re.sub(r'[\|\/\<\>\:\*\?\\\"]', "_", video_name)  # 剔除不合法字符
        print(video_name)
        video_url=req['video']
        print(video_url)
        try:
            videourl = decode(video_url).decode('utf8')  # 解密視頻地址
            print(videourl)
            try:
                down(video_name, videourl)
                #server(video_name, videourl)
            except Exception as e:
                print(f'視頻下載出錯，錯誤代碼：{e}')
                with open(r'meipai/spider.txt', 'a+', encoding='utf-8') as f:
                    f.write(f'視頻下載出錯，錯誤代碼：{e}---採集{videourl}|{video_name}內容失敗\n')
                pass
        except Exception as e:
            print(f'視頻地址解密出錯，錯誤代碼：{e}')
            with open(r'meipai/spider.txt', 'a+', encoding='utf-8') as f:
                f.write(f'視頻解密出錯，錯誤代碼：{e}---採集{video_url}|{video_name}內容失敗\n')
            pass

#下載視頻
def down(h1,url):
    print("準備下載！")
    file_path = f"meipai/{h1}.mp4"
    r = requests.get(url)
    print("開始下載！")
    with open(file_path, "wb") as file:
        file.write(r.content)
        print("下載完成！")
    time.sleep(2)


# 存儲視頻，附下載進度顯示
def server(h1,videourl):
    print("準備下載！")
    file_path=f'meipai/{h1}.mp4'
    with closing(requests.get(videourl,headers=ua(),stream=True)) as response:
        chunk_size = 1024  # 單次請求最大值
        content_size = int(response.headers['content-length'])  # 內容體總大小
        data_count = 0
        with open(file_path, "wb") as file:
            for data in response.iter_content(chunk_size=chunk_size):
                file.write(data)
                data_count = data_count + len(data)
                now_jd = (data_count / content_size) * 100
                print("\r 文件下載進度：%d%%(%d/%d) - %s" % (now_jd, data_count, content_size, file_path), end=" ")
            print("\n>>> 獲取視頻成功了！")
    time.sleep(2)


#運行主函數
def main():
    for i in range(1, 100):
        url = f"https://www.meipai.com/squares/new_timeline?page={i}&count=24&tid=13"
        print(url)
        response=get_req(url)
        if response:
            try:
                get_video(response)
            except Exception as e:
                print(f'獲取視頻出錯了，錯誤代碼：{e}')
                pass




if __name__ == '__main__':
    main()

方法二：execjs庫運行js破解視頻地址

test.js文件

h = "substring"
    , i = "split"
    , j = "replace"
    , k = "substr";


function getHex(a) {
    return {
        str: a[h](4),
        hex: a[h](0, 4)[i]("").reverse().join("")
    }
}


function getDec(a) {
    var b = parseInt(a, 16).toString();
    return {
        pre: b[h](0, 2)[i](""),
        tail: b[h](2)[i]("")
    }
}


function substr(a,b) {
    var c = a[h](0, b[0])
        , d = a[k](b[0], b[1]);
    return c + a[h](b[0])[j](d, "")
}


function getPos(a,b) {
    return b[0] = a.length - b[0] - b[1],
        b
}


function atob(a) {
    var e = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
    if (a = a.replace(/=+$/, ""),
    a.length % 4 == 1)
        throw f;
    for (var b, c, d = 0, g = 0, h = ""; c = a.charAt(g++); ~c && (b = d % 4 ? 64 * b + c : c,
    d++ % 4) ? h += String.fromCharCode(255 & b >> (-2 * d & 6)) : 0)
        c = e.indexOf(c);
    return h
}

function main(a) {
    var b = getHex(a)
        , c = getDec(b.hex)
        , d = substr(b.str, c.pre);
    zz = substr(d, getPos(d, c.tail));
    return atob(zz)
}


//a = '55e0aHRVhJYAS0cDovL212dmlkZW8xMC5tZWl0dWRhdGEuY29tLzVkMzNmMGRmM2RkYzRwMm5sd3N0cWg3OTA2X0gyNjRfMV85YzNiM2QwOTNhMWRhNS5tcDQ/az1kODlkOTVmZmRlNWZjNzQ0YzUwOTY0NDZiZjNhNTE0OCZ0PTVkNDEIQbtCanOZiOWY4'
//console.log(main(a))

#運行js文件解密視頻網址
import execjs

def decode2(target):
    """ 執行 js 的解碼函數 :param target: :return: 解碼後的結果 來源：木下瞳/木下學Python """
    js = execjs.compile(open(r'./test.js').read())
    resualt = js.call("main",target)
    return resualt


def main1():
    url="https://www.meipai.com/squares/new_timeline?page=1&count=24&tid=13"
    response=get_req(url)
    get_video2(response)

方法三：藉助網頁破解工具，因爲post包兩個參數沒搞清楚頭緒，這裏直接採用無頭瀏覽器,selenium!

運行效果：

from selenium import webdriver
import time

#selenium打開瀏覽器
def s_html():
    #videourl="https://www.meipai.com/media/1155453414"
    url="https://meipai.iiilab.com"
     chromedriver_path = r"替換成你本身的chromedriver.exe路徑"  # 完整路徑
    options=webdriver.ChromeOptions() #配置 chrome 啓動屬性
    options.add_experimental_option("excludeSwitches",['enable-automation']) # 此步驟很重要，設置爲開發者模式，防止被各大網站識別出來使用了Selenium
    browser=webdriver.Chrome(executable_path=chromedriver_path,options=options)
    browser.get(url)
    time.sleep(2)
    return browser

#獲取視頻鏈接
def get_s_video(browser,videourl):
    input =browser.find_element_by_xpath('//*[@class="form-control link-input"]')
    input.send_keys(videourl)
    time.sleep(2)
    button=browser.find_element_by_xpath('//*[@class="btn btn-default"]')
    button.click()
    time.sleep(8)
    href=browser.find_element_by_xpath('//*/div[@class="caption"]/p/a[@class="btn btn-success"]').get_attribute("href")
    print(href)
    clear_button = browser.find_element_by_xpath('//*[@class="btn btn-danger"]')
    clear_button.click()
    #browser.refresh() # 刷新頁面
    time.sleep(2)
    return href

#關閉瀏覽器
def close_s(browser):
    browser.quit()  # 關閉瀏覽器


#獲取美拍視頻
def get_video3(response,browser):
    reqs=json.loads(response)
    reqs=reqs['medias']
    print(reqs)
    for req in reqs:
        videoname = req['caption']
        if videoname:
            video_name = videoname
        else:
            video_name = req['weibo_share_caption']
        video_name = video_name.replace(' ', '')
        video_name = re.sub(r'[\|\/\<\>\:\*\?\\\"]', "_", video_name)  # 剔除不合法字符
        print(video_name)
        video_url=req['id']
        video_url=f'https://www.meipai.com/media/{video_url}'
        print(video_url)
        videourl=get_s_video(browser, video_url)
        down(video_name, videourl)  # 下載視頻


def main2():
    browser=s_html()
    url = "https://www.meipai.com/squares/new_timeline?page=1&count=24&tid=13"
    response = get_req(url)
    get_video3(response,browser)
    close_s(browser)




if __name__ == '__main__':
    main2()