本次目標 http://www.qiqi.la/vod-detail-id-46194.html 目的,down魔道祖師,實現 前期分析文件獲得如下粗略步驟 1 進入二級頁面,找到 <iframe width="100%" height="480" src="https://cn2.zuixinbo.com/share/722caafb4825ef5d8670710fa29087cf" frameborder="0" allowfullscreen=""></iframe> 獲得網址 2 訪問 https://cn2.zuixinbo.com/share/722caafb4825ef5d8670710fa29087cf 須要帶上協議頭 Referer: https://cn2.zuixinbo.com/share/722caafb4825ef5d8670710fa29087cf User-Agent: Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36 返回另外一個頁面response_1 獲得文件標題 <title>從新壓制魔道祖師 前塵篇02 序章 誅邪(下)福利加長版 高清(480P).qlv</title> 3 在response_1 獲得:var main = "/20180710/4671_a5ef5a19/index.m3u8?sign=b0023d8b455da27a4294b38c7815f7b3"; 拼合網頁:https://cn2.zuixinbo.com/20180710/4671_a5ef5a19/index.m3u8?sign=b0023d8b455da27a4294b38c7815f7b3 訪問:獲得返回結果 #EXTM3U #EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=800000,RESOLUTION=1080x608 1000k/hls/index.m3u8 4 拼合 https://cn2.zuixinbo.com/20180710/4671_a5ef5a19/1000k/hls/index.m3u8 帶協議訪問 Referer: https://cn2.zuixinbo.com/share/722caafb4825ef5d8670710fa29087cf User-Agent: Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36 獲得.ts下載文件路徑 分段下載 5 拼合.ts文件
有空更新完整代碼
2018-10-21
開始編寫代碼
發如今從新寫代碼的過程當中,發現直接在播放頁面就有所有的播放地址,只不過是用usc2的
編碼轉換了一下,咱們須要把其轉換成ansi編碼
2 OK,這下直接拿到播放地址,作一下格式化的工做,進行第2步解析,上面的第一步工做算是白費了一片心思
html
3 按照上面步驟依次完成,基本沒問題redis
# -*- coding:utf-8 -*- # @time:2018-10-21 14:43 # @Auther:1043453579@qq.com from urllib.request import Request from urllib.request import urlopen import re,time,os from concurrent.futures import ProcessPoolExecutor,ThreadPoolExecutor static_url_1 = 'http://www.qiqi.la/vod-detail-id-46194.html' class A(object): def __init__(self,url,e=15): self.header= {'user-agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36'} self.path = os.getcwd() self.e = e self.static_url='https://cn2.zuixinbo.com' def num_of_e(self,url_2):#進入二級頁面 res = urlopen(Request(url=url_2,headers=self.header)).read() res = res.decode() title = self.take_middle_text(res,'<title>',txt_e='</title>') #標題 var_main=re.search('var main = "(.*?)";',res).group(1) #訪問下級地址 if not var_main: exit() return {'var_main':var_main,'referer':url_2,'標題':title} def open_3(self,url,referer='',**kwargs): url=self.static_url+url a={'Referer': referer} a.update(self.header) res = urlopen(Request(url=url,headers=a)).read() res = res.decode() _=self.take_middle_text(res,'1080x608','.m3u8')+'.m3u8' #獲得ts視頻地址 return {'url':_.split(),'regerer1':url} def open_4(self,url,referer1='',**kwargs): referer=referer1 referer= referer.split('/') referer=referer[0:-1] referer.append(*url) url='/'.join(referer) print(url) a = {'Referer': referer1} a.update(self.header) res = urlopen(Request(url=url,headers=a)).read() res = res.decode() ts_list=[] for i in res.split('\n'): try: if i[0]!='#': ts_list.append(i) except:pass return {'ts_list':ts_list,'url':url} def take_middle_text(self,txt, txt_s, txt_e='', seeks=0, seeke=0): # 取出中間文本,真返回中間文本,假返回False # seeks有傳參,會按照取前幾位取值 # seeke有傳參,會按照取後幾位取值 try: if txt_e or seeks or seeke: pass else: raise 1 s_1 = txt.find(txt_s) if s_1 == -1: raise 1 l_1 = len(txt_s) if txt_e: s_2 = txt.find(txt_e) if s_1 == -1 or s_2 == -1: return False return txt[s_1 + l_1:s_2] if seeks: return txt[s_1 - seeks:s_1] if seeke: return txt[s_1 + l_1:s_1 + l_1 + seeke] except: return '傳參錯誤或未找到傳參文本' def down_ts(self,dict,path_1): url = os.path.dirname(dict['url'])+'/' ts_list=dict['ts_list'] for i in ts_list: print(path_1,'這裏是path_1') path = os.path.join(path_1, i) print(path,'這裏是path_ts文件網址') if os.path.exists(path): print('已存在,跳過') else: try: res = urlopen(Request(url=url+i,headers=self.header)).read() with open(path,'wb') as f: f.write(res) print('成功寫入一條') except: print('寫入失敗') def main(self,url): dict_1 = self.num_of_e(url) #'這裏返回一個字典 ' dict_2 = self.open_3(dict_1['var_main'],dict_1['referer']) dict_3 = self.open_4(dict_2['url'], dict_2['regerer1']) #這裏的url未提純 title = dict_1['標題'] path = os.path.join(self.path,title) #@print(title,'這裏是標題') if not os.path.exists(path): os.mkdir(path) #沒有就建立一個新的目錄 self.down_ts(dict_3,path) if __name__ == '__main__': ex = ProcessPoolExecutor(2) a_1 = A(static_url_1, 15) with open('2.txt', 'r', encoding='utf8') as f: for i in f: a = i.split()[0].split('$')[1].split('#')[0] print(ex.submit(a_1.main,a).result()) ex.shutdown()
2018-10-30併發
# -*- coding:utf-8 -*- # @time:2018-10-21 14:43 # @Auther:1043453579@qq.com from urllib.request import Request from urllib.request import urlopen import re,time,os from concurrent.futures import ProcessPoolExecutor,ThreadPoolExecutor static_url_1 = 'http://www.qiqi.la/vod-detail-id-46194.html' class A(object): def __init__(self): self.header= {'user-agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36'} self.path = os.getcwd() #self.static_url='https://cn2.zuixinbo.com' self.static_url = '' #self.r = redis.Redis(host='127.0.0.1',port=6379,db=0) def get_proxy(self): return {'http':self.r.randomkey().deocode()} def down_1(self,url,referer='',code=True): while True: #proxy = self.get_proxy() #這裏設置代理 try: _ = self.header if referer: a = {'Referer': referer} _.update(a) res = urlopen(Request(url=url, headers=_),timeout=60).read() if code: res = res.decode() if res: time.sleep(1) return res else: raise 1 except Exception as e: print('請求失敗',url) with open('3.txt','a+') as f: f.write(url) f.write('\n') time.sleep(10) def num_of_e(self,url_2):#進入二級頁面 res = self.down_1(url_2) title = self.take_middle_text(res,'<title>',txt_e='</title>') #標題 var_main=re.search('var main = "(.*?)";',res).group(1) #訪問下級地址 if not var_main: exit() return {'var_main':var_main,'referer':url_2,'標題':title} def open_3(self,url,referer='',**kwargs): url=self.static_url+url res = self.down_1(url,referer=referer) _=self.take_middle_text(res,'1080x608','.m3u8')+'.m3u8' #獲得ts視頻地址 return {'url':_.split(),'regerer1':url} def open_4(self,url,referer1='',**kwargs): referer=referer1 referer= referer.split('/') referer=referer[0:-1] referer.append(*url) url='/'.join(referer) print(url) res = self.down_1(url,referer=referer1) ts_list=[] for i in res.split('\n'): try: if i[0]!='#': ts_list.append(i) except:pass return {'ts_list':ts_list,'url':url} def take_middle_text(self,txt, txt_s, txt_e='', seeks=0, seeke=0): # 取出中間文本,真返回中間文本,假返回False # seeks有傳參,會按照取前幾位取值 # seeke有傳參,會按照取後幾位取值 try: if txt_e or seeks or seeke: pass else: raise 1 s_1 = txt.find(txt_s) if s_1 == -1: raise 1 l_1 = len(txt_s) if txt_e: s_2 = txt.find(txt_e) if s_1 == -1 or s_2 == -1: return False return txt[s_1 + l_1:s_2] if seeks: return txt[s_1 - seeks:s_1] if seeke: return txt[s_1 + l_1:s_1 + l_1 + seeke] except: return '傳參錯誤或未找到傳參文本' def down_ts(self,dict,path_1): url = os.path.dirname(dict['url'])+'/' ts_list=dict['ts_list'] for i in ts_list: path = os.path.join(path_1, i) if os.path.exists(path): print('已存在,跳過',i) else: try: res = urlopen(Request(url=url+i,headers=self.header),timeout=60).read() time.sleep(1) if res: with open(path,'wb') as f: f.write(res) print('成功寫入一條',i) else: raise 1 except Exception as e: with open('3.txt','a+') as f: _ = '-'.join([str(i) for i in time.localtime()[0:6]]) f.write(_ +'###'+e+'$$$'+url) f.write('\n') print('寫入失敗',i,e) time.sleep(5) def main(self,url): _ = url.split('com') self.static_url=_[0]+'com' dict_1 = self.num_of_e(url) #'這裏返回一個字典 ' dict_2 = self.open_3(dict_1['var_main'],dict_1['referer']) dict_3 = self.open_4(dict_2['url'], dict_2['regerer1']) #這裏的url未提純 title = dict_1['標題'] path = os.path.join(self.path,title) #@print(title,'這裏是標題') if not os.path.exists(path): os.mkdir(path) #沒有就建立一個新的目錄 self.down_ts(dict_3,path) if __name__ == '__main__': ex = ProcessPoolExecutor(3) a_1 = A() with open('2.txt', 'r', encoding='utf8') as f: for i in f: a = i.split()[0].split('$')[1].split('#')[0] ex.submit(a_1.main,a) ex.shutdown() #BUG在網頁的提交網址中
第01集$https://cn2.zuixinbo.com/share/722caafb4825ef5d8670710fa29087cf# 第02集$https://cn2.zuixinbo.com/share/fbad540b2f3b5638a9be9aa6a4d8e450# 第03集$https://v-xunlei.com/share/c457d7ae48d08a6b84bc0b1b9bd7d474# 第04集$https://v-xunlei.com/share/8db1d4a631a6e9a24e2c0e842e1f1772# 第05集$https://v-xunlei.com/share/197f76fe309657064dbec74d9eea4be4# 第06集$https://v-xunlei.com/share/92b70a527191ca64ca2df1cc32142646# 第07集$https://v-xunlei.com/share/abc99d6b9938aa86d1f30f8ee0fd169f# 第08集$https://v-xunlei.com/share/22cdb13a83f73ccd1f79ffaf607b0621# 第09集$https://v-xunlei.com/share/aceacd5df18526f1d96ee1b9714e95eb# 第10集$https://v-6-cn.com/share/075b051ec3d22dac7b33f788da631fd4# 第11集$https://v-6-cn.com/share/4670c07872d5314c6ad6ffa633d4a059# 第12集$https://v-xunlei.com/share/2bba9f4124283edd644799e0cecd45ca# 第13集$https://v-cntv-cn.com/share/d87aa42cd08ba8612664a73dbdb64221# 第14集$https://v-cntv-cn.com/share/63ceea56ae1563b4477506246829b386# 第15集$https://v-cntv-cn.com/share/e8a69bf65aefc23d0f360ab695e9eac7