import threading import requests import base64 import re # 解密video的URL def Decrypt_video_url(content): str_start = content[4:] list_temp = [] list_temp.extend(content[:4]) list_temp.reverse() hex = ''.join(list_temp) dec = str(int(hex, 16)) list_temp1 = [] list_temp1.extend(dec[:2]) pre = list_temp1 list_temp2 = [] list_temp2.extend(dec[2:]) tail = list_temp2 str0 = str_start[:int(pre[0])] str1 = str_start[int(pre[0]):int(pre[0]) + int(pre[1])] result1 = str0 + str_start[int(pre[0]):].replace(str1, '') tail[0] = len(result1) - int(tail[0]) - int(tail[1]) a = result1[:int(tail[0])] b = result1[int(tail[0]):int(tail[0]) + int(tail[1])] c = (a + result1[int(tail[0]):].replace(b, '')) return base64.b64decode(c).decode() # 獲取網頁的內容 def Page_text(url): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20130331 Firefox/21.0' } return requests.get(url, headers=headers).text # 解析單個網頁 def Parse_url(video_title, url_tail): page_url = 'https://www.meipai.com' + url_tail video_page = Page_text(page_url) # 獲取視頻加密後的的URL data_video = re.findall(r'data-video="(.*?)"', video_page, re.S)[0] video_url = Decrypt_video_url(data_video) print("{}\n{}\n{}\n".format(video_title, page_url, video_url)) def Get_url(url): index_page = Page_text(url) # 各個視頻的標題 videos_title = re.findall(r'class="content-l-p pa" title="(.*?)">', index_page, re.S) # 各個播放網頁的URL urls = re.findall(r'<div class="layer-black pa"></div>\n\s*<a hidefocus href="(.*?)"', index_page, re.S) t_list = [] for video_title, url_tail in zip(videos_title, urls): t = threading.Thread(name='GetUrl', target=Parse_url, args=(video_title, url_tail,)) t_list.append(t) for i in t_list: i.start() if __name__ == '__main__': Get_url('https://www.meipai.com/')