說明:html
1.清晰度的選擇要登陸,暫時還沒作,目前下載的視頻清晰度都是默認的480Plinux
2.進度條仿linux的,參考了一些博客修改了下,侵刪git
3.其餘評論,彈幕之類的相關爬蟲代碼放在了https://github.com/teleJa/bilibiligithub
4.判斷sys.argv那個地方是由於一些爬蟲調用了該文件,若是感受不方面,直接傳遞視頻番號進去就能夠了web
下載過程如圖json
直接上代碼:api
1 import requests 2 import re 3 import os 4 import json 5 import sys 6 import math 7 from lxml import etree 8
9
10 class BLDSplider: 11 regex_cid = re.compile("\"cid\":(.{8})") 12
13 def __init__(self, aid): 14 self.aid = aid 15
16 self.origin_url = "https://www.bilibili.com/video/av{}?from=search&seid=9346373599622336536".format(aid) 17 self.headers = { 18 "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36", 19 } 20
21 self.url = "https://api.bilibili.com/x/player/playurl?avid={}&cid={}&qn=0&type=&otype=json"
22
23 def check_dir(self, author_name): 24 # 檢查目錄
25 self.parent_path = "e:/bilibili/" + author_name + "/" + str(self.aid) + "/"
26 if not os.path.exists(self.parent_path): 27 os.makedirs(self.parent_path) 28
29 self.video_name = self.parent_path + str(self.aid) + ".mp4"
30
31 def parse_url(self, item): 32 cid = item["cid"] 33 print("aid:%s cid:%s" % (str(self.aid), cid)) 34 title = item["title"] 35 print("title:%s" % title) 36
37 self.headers["Referer"] = self.origin_url 38 # 視頻
39 response = requests.get(self.url.format(self.aid, cid), headers=self.headers) 40 if response.status_code == 200: 41 result = json.loads(response.content.decode()) 42 durl = result["data"]["durl"][0] 43 video_url = durl["url"] 44 print("video_url:%s" % video_url) 45 # 視頻大小
46 size = durl["size"] 47 print("size:%s,約%2.2fMB" % (size, size / (1024 * 1024))) 48 video_response = requests.get(video_url, headers=self.headers, stream=True) 49 if video_response.status_code == 200: 50 with open(self.video_name, "wb") as file: 51 buffer = 1024
52 count = 0 53 while True: 54 if count + buffer <= size: 55 file.write(video_response.raw.read(buffer)) 56 count += buffer 57 else: 58 file.write(video_response.raw.read(size % buffer)) 59 count += size % buffer 60 file_size = os.path.getsize(self.video_name) 61 # print("\r下載進度 %.2f %%" % (count * 100 / size), end="")
62
63 width = 50
64 percent = (count / size) 65 use_num = int(percent * width) 66 space_num = int(width - use_num) 67 percent = percent * 100
68 print('\r進度:[%s%s] %d%%' % (use_num * '#', space_num * ' ', percent), file=sys.stdout, 69 flush=True, end="") 70 if size == count: 71 break
72 print("\r\n") 73
74 # 獲取視頻相關參數
75 def get_video_info(self): 76 response = requests.get(self.origin_url, headers=self.headers) 77 item = dict() 78 if response.status_code == 200: 79 # author
80 html_element = etree.HTML(response.content.decode()) 81 author = dict() 82 author_name = html_element.xpath( 83 "/html/body/div[@id='app']/div[@class='v-wrap']/div[@class='r-con']/div[@id='v_upinfo']//a[@report-id='name']/text()")[ 84 0] 85 # 一般是微博,微信公衆號等聯繫方式
86 author_others = html_element.xpath( 87 "/html/body/div[@id='app']/div[@class='v-wrap']/div[@class='r-con']/div[@id='v_upinfo']//div[@class='desc']/@title")[ 88 0] 89 author["name"] = author_name 90 author["others"] = author_others 91 item["author"] = author 92
93 # cid
94 cid = BLDSplider.regex_cid.findall(response.content.decode())[0] 95 item["cid"] = cid 96 info_url = "https://api.bilibili.com/x/web-interface/view?aid={}&cid={}".format(self.aid, cid) 97 info_response = requests.get(info_url, headers=self.headers) 98 if info_response.status_code == 200: 99 data = json.loads(info_response.content.decode())["data"] 100 # 視頻簡介
101 desc = data["desc"] 102 item["desc"] = desc 103
104 # title
105 title = data["title"] 106 item["title"] = title 107
108 stat = data["stat"] 109 # 播放量
110 view = stat["view"] 111 item["view"] = view 112
113 # 彈幕
114 danmaku = stat["danmaku"] 115 item["danmaku"] = danmaku 116
117 # 評論
118 reply = stat["reply"] 119 item["reply"] = reply 120
121 # 硬幣
122 coin = stat["coin"] 123 item["coin"] = coin 124
125 # 點贊
126 like = stat["like"] 127 item["like"] = like 128
129 # 收藏
130 favorite = stat["favorite"] 131 item["favorite"] = favorite 132
133 # 分享
134 share = stat["share"] 135 item["share"] = share 136 self.check_dir(item["author"]["name"]) 137 # 視頻參數
138 with open(self.parent_path + "video_info.txt", "w") as file: 139 file.write(json.dumps(item, ensure_ascii=False, indent=2)) 140 return item 141
142 def run(self): 143 item = self.get_video_info() 144 self.parse_url(item) 145
146
147 def main(): 148 # 55036734
149 aid = 55036734
150 if len(sys.argv) >= 2: 151 if sys.argv[1]: 152 aid = sys.argv[1] 153 splider = BLDSplider(aid) 154 splider.run() 155
156
157 if __name__ == '__main__': 158 main()