python 下載bilibili視頻

說明:html

1.清晰度的選擇要登陸,暫時還沒作,目前下載的視頻清晰度都是默認的480Plinux

2.進度條仿linux的,參考了一些博客修改了下,侵刪git

3.其餘評論,彈幕之類的相關爬蟲代碼放在了https://github.com/teleJa/bilibiligithub

4.判斷sys.argv那個地方是由於一些爬蟲調用了該文件,若是感受不方面,直接傳遞視頻番號進去就能夠了web

下載過程如圖json

直接上代碼:api

 1 import requests  2 import re  3 import os  4 import json  5 import sys  6 import math  7 from lxml import etree  8 
 9 
 10 class BLDSplider:  11     regex_cid = re.compile("\"cid\":(.{8})")  12 
 13     def __init__(self, aid):  14         self.aid = aid  15 
 16         self.origin_url = "https://www.bilibili.com/video/av{}?from=search&seid=9346373599622336536".format(aid)  17         self.headers = {  18             "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",  19  }  20 
 21         self.url = "https://api.bilibili.com/x/player/playurl?avid={}&cid={}&qn=0&type=&otype=json"
 22 
 23     def check_dir(self, author_name):  24         # 檢查目錄
 25         self.parent_path = "e:/bilibili/" + author_name + "/" + str(self.aid) + "/"
 26         if not os.path.exists(self.parent_path):  27  os.makedirs(self.parent_path)  28 
 29         self.video_name = self.parent_path + str(self.aid) + ".mp4"
 30 
 31     def parse_url(self, item):  32         cid = item["cid"]  33         print("aid:%s cid:%s" % (str(self.aid), cid))  34         title = item["title"]  35         print("title:%s" % title)  36 
 37         self.headers["Referer"] = self.origin_url  38         # 視頻
 39         response = requests.get(self.url.format(self.aid, cid), headers=self.headers)  40         if response.status_code == 200:  41             result = json.loads(response.content.decode())  42             durl = result["data"]["durl"][0]  43             video_url = durl["url"]  44             print("video_url:%s" % video_url)  45             # 視頻大小
 46             size = durl["size"]  47             print("size:%s,約%2.2fMB" % (size, size / (1024 * 1024)))  48             video_response = requests.get(video_url, headers=self.headers, stream=True)  49             if video_response.status_code == 200:  50                 with open(self.video_name, "wb") as file:  51                     buffer = 1024
 52                     count = 0  53                     while True:  54                         if count + buffer <= size:  55  file.write(video_response.raw.read(buffer))  56                             count += buffer  57                         else:  58                             file.write(video_response.raw.read(size % buffer))  59                             count += size % buffer  60                         file_size = os.path.getsize(self.video_name)  61                         # print("\r下載進度 %.2f %%" % (count * 100 / size), end="")
 62 
 63                         width = 50
 64                         percent = (count / size)  65                         use_num = int(percent * width)  66                         space_num = int(width - use_num)  67                         percent = percent * 100
 68                         print('\r進度:[%s%s] %d%%' % (use_num * '#', space_num * ' ', percent), file=sys.stdout,  69                               flush=True, end="")  70                         if size == count:  71                             break
 72                 print("\r\n")  73 
 74     # 獲取視頻相關參數
 75     def get_video_info(self):  76         response = requests.get(self.origin_url, headers=self.headers)  77         item = dict()  78         if response.status_code == 200:  79             # author
 80             html_element = etree.HTML(response.content.decode())  81             author = dict()  82             author_name = html_element.xpath(  83                 "/html/body/div[@id='app']/div[@class='v-wrap']/div[@class='r-con']/div[@id='v_upinfo']//a[@report-id='name']/text()")[  84  0]  85             # 一般是微博,微信公衆號等聯繫方式
 86             author_others = html_element.xpath(  87                 "/html/body/div[@id='app']/div[@class='v-wrap']/div[@class='r-con']/div[@id='v_upinfo']//div[@class='desc']/@title")[  88  0]  89             author["name"] = author_name  90             author["others"] = author_others  91             item["author"] = author  92 
 93             # cid
 94             cid = BLDSplider.regex_cid.findall(response.content.decode())[0]  95             item["cid"] = cid  96             info_url = "https://api.bilibili.com/x/web-interface/view?aid={}&cid={}".format(self.aid, cid)  97             info_response = requests.get(info_url, headers=self.headers)  98             if info_response.status_code == 200:  99                 data = json.loads(info_response.content.decode())["data"] 100                 # 視頻簡介
101                 desc = data["desc"] 102                 item["desc"] = desc 103 
104                 # title
105                 title = data["title"] 106                 item["title"] = title 107 
108                 stat = data["stat"] 109                 # 播放量
110                 view = stat["view"] 111                 item["view"] = view 112 
113                 # 彈幕
114                 danmaku = stat["danmaku"] 115                 item["danmaku"] = danmaku 116 
117                 # 評論
118                 reply = stat["reply"] 119                 item["reply"] = reply 120 
121                 # 硬幣
122                 coin = stat["coin"] 123                 item["coin"] = coin 124 
125                 # 點贊
126                 like = stat["like"] 127                 item["like"] = like 128 
129                 # 收藏
130                 favorite = stat["favorite"] 131                 item["favorite"] = favorite 132 
133                 # 分享
134                 share = stat["share"] 135                 item["share"] = share 136             self.check_dir(item["author"]["name"]) 137             # 視頻參數
138             with open(self.parent_path + "video_info.txt", "w") as file: 139                 file.write(json.dumps(item, ensure_ascii=False, indent=2)) 140             return item 141 
142     def run(self): 143         item = self.get_video_info() 144  self.parse_url(item) 145 
146 
147 def main(): 148     # 55036734
149     aid = 55036734
150     if len(sys.argv) >= 2: 151         if sys.argv[1]: 152             aid = sys.argv[1] 153     splider = BLDSplider(aid) 154  splider.run() 155 
156 
157 if __name__ == '__main__': 158     main()

相關文章
相關標籤/搜索