因爲常常在B站上學習,但無奈於家裏網絡太差,在線觀看卡頓嚴重,因而萌生了下載視頻的想法(若是隻是單純想下載視頻,請用you-get庫)。廢話很少說直接開幹。
(我發現好像不少人在爬bilibili視頻的時候都有用到某個API而後還須要一個cid參數,這些在本文中沒有用到。。。。)(另外再說明一下,第3篇文章沒有經過審覈,要看的話去公衆號哈哈)
使用工具html
目錄python
(1) 用Chrome隨便打開一個視頻,==Ctrl+Shift+C==選擇視頻框嘗試獲取視頻的連接。結果發現獲取的連接地址爲:blob:https://www.bilibili.com/1987... ,這是一個blob加密的連接,不能直接訪問。shell
(2) 網上查找資料後,這篇文章給了我靈感,思路:對網頁抓包,抓取到視頻分片的連接,再利用所抓到的連接信息進行定位。json
(3) 能夠定位到全部視頻分片的信息所有來源於 https://www.bilibili.com/vide... segmentfault
(4) 對這部分json代碼進行解析(完整json數據太大,請自行去 B站 找到對應位置觀看),能夠發現:
==quality==參數是指視頻清晰度,112爲高清1080p+、80爲高清1080p、64爲高清、32爲清晰、16爲流暢。
==duration==參數是指視頻長度,單位爲秒。
==frameRate==參數爲幀率。
==SegmentBase==參數應該是視頻片初始片大小和片基址範圍,單位爲字節。
==deadline==參數是在url裏的參數,指示了連接失效的時間戳。xcode
{ "code": 0, "message": "0", "ttl": 1, "data": { "from": "local", "result": "suee", "message": "", "quality": 64, "format": "flv720", "timelength": 1504366, "accept_format": "flv720,flv480,flv360", "accept_description": [ "高清 720P", "清晰 480P", "流暢 360P" ], "accept_quality": [ 64, 32, 16 ], "video_codecid": 7, "seek_param": "start", "seek_type": "offset", "dash": { "duration": 1505, "minBufferTime": 1.5, "min_buffer_time": 1.5, "video": [ { "id": 64, "baseUrl": "http://upos-sz-mirrorkodo.bilivideo.com/upgcxcode/03/88/98958803/98958803-1-30064.m4s?e=ig8euxZM2rNcNbdlhoNvNC8BqJIzNbfqXBvEqxTEto8BTrNvN0GvT90W5JZMkX_YN0MvXg8gNEV4NC8xNEV4N03eN0B5tZlqNxTEto8BTrNvNeZVuJ10Kj_g2UB02J0mN0B5tZlqNCNEto8BTrNvNC7MTX502C8f2jmMQJ6mqF2fka1mqx6gqj0eN0B599M=&uipk=5&nbs=1&deadline=1579449043&gen=playurl&os=kodobv&oi=1971869914&trid=9412dee30c4640c6907ef910ea2cb04cu&platform=pc&upsig=4b952dd652c9922b546b99e44756fe0a&uparams=e,uipk,nbs,deadline,gen,os,oi,trid,platform&mid=352741151", "base_url": "http://upos-sz-mirrorkodo.bilivideo.com/upgcxcode/03/88/98958803/98958803-1-30064.m4s?e=ig8euxZM2rNcNbdlhoNvNC8BqJIzNbfqXBvEqxTEto8BTrNvN0GvT90W5JZMkX_YN0MvXg8gNEV4NC8xNEV4N03eN0B5tZlqNxTEto8BTrNvNeZVuJ10Kj_g2UB02J0mN0B5tZlqNCNEto8BTrNvNC7MTX502C8f2jmMQJ6mqF2fka1mqx6gqj0eN0B599M=&uipk=5&nbs=1&deadline=1579449043&gen=playurl&os=kodobv&oi=1971869914&trid=9412dee30c4640c6907ef910ea2cb04cu&platform=pc&upsig=4b952dd652c9922b546b99e44756fe0a&uparams=e,uipk,nbs,deadline,gen,os,oi,trid,platform&mid=352741151", "backupUrl": [ "http://upos-sz-mirrorks3.bilivideo.com/upgcxcode/03/88/98958803/98958803-1-30064.m4s?e=ig8euxZM2rNcNbdlhoNvNC8BqJIzNbfqXBvEqxTEto8BTrNvN0GvT90W5JZMkX_YN0MvXg8gNEV4NC8xNEV4N03eN0B5tZlqNxTEto8BTrNvNeZVuJ10Kj_g2UB02J0mN0B5tZlqNCNEto8BTrNvNC7MTX502C8f2jmMQJ6mqF2fka1mqx6gqj0eN0B599M=&uipk=5&nbs=1&deadline=1579449043&gen=playurl&os=ks3bv&oi=1971869914&trid=9412dee30c4640c6907ef910ea2cb04cu&platform=pc&upsig=62448ee8270504e8e729d25fc402dc7f&uparams=e,uipk,nbs,deadline,gen,os,oi,trid,platform&mid=352741151" ], "backup_url": [ "http://upos-sz-mirrorks3.bilivideo.com/upgcxcode/03/88/98958803/98958803-1-30064.m4s?e=ig8euxZM2rNcNbdlhoNvNC8BqJIzNbfqXBvEqxTEto8BTrNvN0GvT90W5JZMkX_YN0MvXg8gNEV4NC8xNEV4N03eN0B5tZlqNxTEto8BTrNvNeZVuJ10Kj_g2UB02J0mN0B5tZlqNCNEto8BTrNvNC7MTX502C8f2jmMQJ6mqF2fka1mqx6gqj0eN0B599M=&uipk=5&nbs=1&deadline=1579449043&gen=playurl&os=ks3bv&oi=1971869914&trid=9412dee30c4640c6907ef910ea2cb04cu&platform=pc&upsig=62448ee8270504e8e729d25fc402dc7f&uparams=e,uipk,nbs,deadline,gen,os,oi,trid,platform&mid=352741151" ], "bandwidth": 359889, "mimeType": "video/mp4", "mime_type": "video/mp4", "codecs": "avc1.64001F", "width": 960, "height": 534, "frameRate": "25", "frame_rate": "25", "sar": "801:800", "startWithSap": 1, "start_with_sap": 1, "SegmentBase": { "Initialization": "0-995", "indexRange": "996-4639" }, "segment_base": { "initialization": "0-995", "index_range": "996-4639" }, "codecid": 7 }, // 此處省略部分數據 { "id": 30216, "baseUrl": "http://upos-hz-mirrorks3u.acgvideo.com/upgcxcode/03/88/98958803/98958803-1-30216.m4s?e=ig8euxZM2rNcNbdlhoNvNC8BqJIzNbfqXBvEqxTEto8BTrNvN0GvT90W5JZMkX_YN0MvXg8gNEV4NC8xNEV4N03eN0B5tZlqNxTEto8BTrNvNeZVuJ10Kj_g2UB02J0mN0B5tZlqNCNEto8BTrNvNC7MTX502C8f2jmMQJ6mqF2fka1mqx6gqj0eN0B599M=&uipk=5&nbs=1&deadline=1579449043&gen=playurl&os=ks3u&oi=1971869914&trid=9412dee30c4640c6907ef910ea2cb04cu&platform=pc&upsig=669eccff96c56f5586d174870a496b12&uparams=e,uipk,nbs,deadline,gen,os,oi,trid,platform&mid=352741151", "base_url": "http://upos-hz-mirrorks3u.acgvideo.com/upgcxcode/03/88/98958803/98958803-1-30216.m4s?e=ig8euxZM2rNcNbdlhoNvNC8BqJIzNbfqXBvEqxTEto8BTrNvN0GvT90W5JZMkX_YN0MvXg8gNEV4NC8xNEV4N03eN0B5tZlqNxTEto8BTrNvNeZVuJ10Kj_g2UB02J0mN0B5tZlqNCNEto8BTrNvNC7MTX502C8f2jmMQJ6mqF2fka1mqx6gqj0eN0B599M=&uipk=5&nbs=1&deadline=1579449043&gen=playurl&os=ks3u&oi=1971869914&trid=9412dee30c4640c6907ef910ea2cb04cu&platform=pc&upsig=669eccff96c56f5586d174870a496b12&uparams=e,uipk,nbs,deadline,gen,os,oi,trid,platform&mid=352741151", "backupUrl": [ "http://upos-sz-mirrorks3.bilivideo.com/upgcxcode/03/88/98958803/98958803-1-30216.m4s?e=ig8euxZM2rNcNbdlhoNvNC8BqJIzNbfqXBvEqxTEto8BTrNvN0GvT90W5JZMkX_YN0MvXg8gNEV4NC8xNEV4N03eN0B5tZlqNxTEto8BTrNvNeZVuJ10Kj_g2UB02J0mN0B5tZlqNCNEto8BTrNvNC7MTX502C8f2jmMQJ6mqF2fka1mqx6gqj0eN0B599M=&uipk=5&nbs=1&deadline=1579449043&gen=playurl&os=ks3bv&oi=1971869914&trid=9412dee30c4640c6907ef910ea2cb04cu&platform=pc&upsig=b6d7b3957f5dbcbf686f267851ec42dd&uparams=e,uipk,nbs,deadline,gen,os,oi,trid,platform&mid=352741151" ], "backup_url": [ "http://upos-sz-mirrorks3.bilivideo.com/upgcxcode/03/88/98958803/98958803-1-30216.m4s?e=ig8euxZM2rNcNbdlhoNvNC8BqJIzNbfqXBvEqxTEto8BTrNvN0GvT90W5JZMkX_YN0MvXg8gNEV4NC8xNEV4N03eN0B5tZlqNxTEto8BTrNvNeZVuJ10Kj_g2UB02J0mN0B5tZlqNCNEto8BTrNvNC7MTX502C8f2jmMQJ6mqF2fka1mqx6gqj0eN0B599M=&uipk=5&nbs=1&deadline=1579449043&gen=playurl&os=ks3bv&oi=1971869914&trid=9412dee30c4640c6907ef910ea2cb04cu&platform=pc&upsig=b6d7b3957f5dbcbf686f267851ec42dd&uparams=e,uipk,nbs,deadline,gen,os,oi,trid,platform&mid=352741151" ], "bandwidth": 67100, "mimeType": "audio/mp4", "mime_type": "audio/mp4", "codecs": "mp4a.40.2", "width": 0, "height": 0, "frameRate": "", "frame_rate": "", "sar": "", "startWithSap": 0, "start_with_sap": 0, "SegmentBase": { "Initialization": "0-907", "indexRange": "908-4551" }, "segment_base": { "initialization": "0-907", "index_range": "908-4551" }, "codecid": 0 } ] } }, "session": "da9c24388db43b3dfe81ebd676d5e41b", "videoFrame": { } } }
(1) 既然肯定了上述連接就是咱們要請求的視頻連接,那麼嘗試直接發送獲取請求看看。
結果。。。。403錯誤,服務器拒絕訪問。服務器
(2) 繼續回到Fiddler檢查抓包數據發現,同一連接反覆出現。仔細觀察返回碼發現:返回碼爲200時無數據、請求方式爲OPTIONS,返回碼爲206時有數據、請求方式爲GET。查閱[資料]
(https://www.cnblogs.com/heior...,在獲取b站視頻以前須要用OPTION方式向請求服務器分配資源,而後再用GET方式獲取視頻分片。微信
(3) 下面進行獲取一個視頻分片的測試,考慮到不論是這個OPTIONS請求仍是GET請求,其connect屬性皆是kepp-alive,考慮使用requests.session()來保持會話。網絡
import requests # url1 爲視頻連接、url2爲音頻連接 url='https://cn-hbwh2-cmcc-bcache-07.bilivideo.com/upgcxcode/03/88/98958803/98958803-1-30064.m4s?e=ig8euxZM2rNcNbdlhoNvNC8BqJIzNbfqXBvEqxTEto8BTrNvN0GvT90W5JZMkX_YN0MvXg8gNEV4NC8xNEV4N03eN0B5tZlqNxTEto8BTrNvNeZVuJ10Kj_g2UB02J0mN0B5tZlqNCNEto8BTrNvNC7MTX502C8f2jmMQJ6mqF2fka1mqx6gqj0eN0B599M=&uipk=5&nbs=1&deadline=1579518843&gen=playurl&os=bcache&oi=1971869869&trid=0cfd59d728114a54bd4747a01f87c9bbu&platform=pc&upsig=2a9e83f3258a7e2694bc83a5fbab8664&uparams=e,uipk,nbs,deadline,gen,os,oi,trid,platform&mid=352741151&origin_cdn=ks3' url2='http://upos-hz-mirrorks3u.acgvideo.com/upgcxcode/03/88/98958803/98958803-1-30216.m4s?e=ig8euxZM2rNcNbdlhoNvNC8BqJIzNbfqXBvEqxTEto8BTrNvN0GvT90W5JZMkX_YN0MvXg8gNEV4NC8xNEV4N03eN0B5tZlqNxTEto8BTrNvNeZVuJ10Kj_g2UB02J0mN0B5tZlqNCNEto8BTrNvNC7MTX502C8f2jmMQJ6mqF2fka1mqx6gqj0eN0B599M=&uipk=5&nbs=1&deadline=1579519469&gen=playurl&os=ks3u&oi=1971869869&trid=99ee525d6c7f4bc8a414a537797e31f3u&platform=pc&upsig=ce817a7120709c60ac43cf095de05c8d&uparams=e,uipk,nbs,deadline,gen,os,oi,trid,platform&mid=352741151' headers1={ 'Host': 'cn-hbwh2-cmcc-bcache-04.bilivideo.com', 'Connection': 'keep-alive', 'Access-Control-Request-Method': 'GET', 'Origin': 'https://www.bilibili.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3970.5 Safari/537.36', 'Access-Control-Request-Headers': 'range', 'Accept': '*/*', 'Sec-Fetch-Site': 'cross-site', 'Sec-Fetch-Mode': 'cors', 'Referer': 'https://www.bilibili.com/video/av56643958?t=262', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9' } headers2={ 'Host': 'cn-hbwh2-cmcc-bcache-04.bilivideo.com', 'Connection': 'keep-alive', 'Origin': 'https://www.bilibili.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3970.5 Safari/537.36', 'Accept': '*/*', 'Sec-Fetch-Site': 'cross-site', 'Sec-Fetch-Mode': 'cors', 'Referer': 'https://www.bilibili.com/video/av56643958?t=262', 'Accept-Encoding': 'identity', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Range': 'bytes=0-907' } session=requests.session() session.options(url=url1,headers=headers1) res=session.get(url=url1,headers=headers2) print(res) with open('test1.mp4','wb') as fp: fp.write(res.content) fp.flush() fp.close()
發現是可以成功下載視頻的,可是該視頻不能打開。session
(4) 從新設置Range的範圍爲'Range': 'bytes=0-4639000' 後,視頻大小爲4.42MB,可以正常觀看時長爲59秒(視頻無聲音)。猜想:以前下載的908字節不足以構成一個(該視頻分辨率爲960*534,故猜想應該是62.57kb構成一個畫面),另外視頻無聲音,查找資料後得知,B站的視頻和音頻是分離的(此時聯想到json解析最後一組數據沒有分辨率,猜想多是音頻的連接)。
驗證成功,最後一個連接爲音頻的連接
方法1:取消range參數直接一次下載整個視頻或音頻
方法2:利用416報錯碼進行分片下載,每次下載1MB的資源,最後一次將range設置爲,'Range': 'bytes=上一次末尾-'。從而實現分片下載。
代碼以下:
import requests import json from lxml import etree # 防止因https證書問題報錯 requests.packages.urllib3.disable_warnings() headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3970.5 Safari/537.36', 'Referer': 'https://www.bilibili.com/' } def GetBiliVideo(homeurl,session=requests.session()): res = session.get(url=homeurl, headers=headers, verify=False) html = etree.HTML(res.content) videoinforms = str(html.xpath('//head/script[3]/text()')[0])[20:] videojson = json.loads(videoinforms) # 獲取視頻連接和音頻連接 VideoURL = videojson['data']['dash']['video'][0]['baseUrl'] AudioURl = videojson['data']['dash']['audio'][0]['baseUrl'] print(videojson) #獲取視頻資源的名稱 name = str(html.xpath("//h1/@title")[0].encode('ISO-8859-1').decode('utf-8')) # 下載視頻和音頻 BiliBiliDownload(url=VideoURL, name=name + '_Video', session=session) BiliBiliDownload(homeurl,url=AudioURl, name=name + '_Audio', session=session) def BiliBiliDownload(homeurl,url, name, session=requests.session()): headers.update({'Referer': homeurl}) session.options(url=url, headers=headers,verify=False) # 每次下載1M的數據 begin = 0 end = 1024*512-1 flag=0 while True: headers.update({'Range': 'bytes='+str(begin) + '-' + str(end)}) res = session.get(url=url, headers=headers,verify=False) if res.status_code != 416: begin = end + 1 end = end + 1024*512 else: headers.update({'Range': str(end + 1) + '-'}) res = session.get(url=url, headers=headers,verify=False) flag=1 with open(name + '.mp4', 'ab') as fp: fp.write(res.content) fp.flush() # data=data+res.content if flag==1: fp.close() break
合併視頻和音頻我查閱了不少資料,最終決定使用ffmpeg來完成這一操做。z在用合併以前,須要先去安裝ffmpeg,詳情請參考這篇文章 ffmpeg安裝。如果運行過程當中出現ffmpeg+一堆亂碼,能夠參考這篇文章。
下面是合併音頻代碼:
# 組合音頻和視頻 (本身加的) def combine_audio(video_file, audiio_file, out_file): try: cmd ='D:/python/ffmpeg-20200115-0dc0837-win64-static/bin/ffmpeg -i '+video_file+' -i '+audiio_file+' -acodec copy '+out_file print(cmd) subprocess.call(cmd, shell=True) # "Muxing Done print('Muxing Done') if res != 0: return False return True except Exception: return False
# 一下path都須要使用全路徑 def CombineVideoAudio(videopath,audiopath,outpath): ffmpeg.video.combine_audio(videopath,audiopath,outpath)
如下是完整代碼:
import requests import json from lxml import etree import ffmpeg.video import os requests.packages.urllib3.disable_warnings() headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3970.5 Safari/537.36', 'Referer': 'https://www.bilibili.com/' } def GetBiliVideo(homeurl,num,session=requests.session()): res = session.get(url=homeurl, headers=headers, verify=False) html = etree.HTML(res.content) videoinforms = str(html.xpath('//head/script[3]/text()')[0])[20:] videojson = json.loads(videoinforms) # 獲取詳情信息列表 #listinform = str(html.xpath('//head/script[4]/text()')[0]) listinform = str(html.xpath('//head/script[4]/text()')[0].encode('ISO-8859-1').decode('utf-8'))[25:-122] listjson=json.loads(listinform) # 獲取視頻連接和音頻連接 try: # 2018年之後的b站視頻,音頻和視頻分離 VideoURL = videojson['data']['dash']['video'][0]['baseUrl'] AudioURl = videojson['data']['dash']['audio'][0]['baseUrl'] flag=0 except Exception: # 2018年之前的b站視頻,格式爲flv VideoURL = videojson['data']['durl'][0]['url'] flag=1 # 獲取文件夾的名稱 dirname = str(html.xpath("//h1/@title")[0].encode('ISO-8859-1').decode('utf-8')) if not os.path.exists(dirname): # 若是不存在則建立目錄 # 建立目錄操做函數 os.makedirs(dirname) print('目錄文件建立成功!') # 獲取每一集的名稱 name=listjson['videoData']['pages'][num]['part'] print(name) # 下載視頻和音頻 print('正在下載 "'+name+'" 的視頻····') BiliBiliDownload(homeurl=homeurl,url=VideoURL, name=os.getcwd()+'/'+dirname+'/'+name + '_Video.mp4', session=session) if flag==0: print('正在下載 "'+name+'" 的音頻····') BiliBiliDownload(homeurl=homeurl,url=AudioURl, name=os.getcwd()+'/'+dirname+'/'+name+ '_Audio.mp3', session=session) print('正在組合 "'+name+'" 的視頻和音頻····') # CombineVideoAudio(name + '_Video.mp4',name + '_Audio.mp3',name + '_output.mp4') print(' "'+name+'" 下載完成!') def BiliBiliDownload(homeurl,url, name, session=requests.session()): headers.update({'Referer': homeurl}) session.options(url=url, headers=headers,verify=False) # 每次下載1M的數據 begin = 0 end = 1024*512-1 flag=0 while True: headers.update({'Range': 'bytes='+str(begin) + '-' + str(end)}) res = session.get(url=url, headers=headers,verify=False) if res.status_code != 416: begin = end + 1 end = end + 1024*512 else: headers.update({'Range': str(end + 1) + '-'}) res = session.get(url=url, headers=headers,verify=False) flag=1 with open(name, 'ab') as fp: fp.write(res.content) fp.flush() # data=data+res.content if flag==1: fp.close() break def CombineVideoAudio(videopath,audiopath,outpath): ffmpeg.video.combine_audio(videopath,audiopath,outpath) os.remove(videopath) os.remove(audiopath) if __name__ == '__main__': # av44518113 av = input('請輸入視頻號:') url='https://www.bilibili.com/video/'+av # 視頻選集 range_start=input('從第幾集開始?') range_end = input('到第幾集結束?') if int(range_start)<=int(range_end): for i in range(int(range_start),int(range_end)+1): GetBiliVideo(url+'?p='+str(i),i-1) else: print('選集不合法!')