爬取的內容不是那麼的多,但也夠用了,比葫蘆畫瓢就能畫出來本身想要的,nicehtml
捋下思路:1. 進入頁面,開發者工具抓取到本身須要的請求python
2. 帶上請求頭,分析參數web
3. 獲取本身的目標數據json
4. 格式化json數據api
5. 抓取本身須要的數據cookie
6.數據保存app
# -*- coding: utf-8 -*- # @Time : 2019/1/7 10:58 # @Author : zhangxinxin # @Email : 778786617@qq.com # @Software: PyCharm import requests import time import json import csv class BilBil(object): def __init__(self): self.url = 'https://api.bilibili.com/x/web-interface/newlist' self.html = '' self.headers = { 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36', 'Referer':'https://www.bilibili.com/v/anime/serial/?spm_id_from=333.334.b_7072696d6172795f6d656e75.8', 'Cookie':'您的cookie', 'Host':'api.bilibili.com', } self.data_list = [] def get_html(self, params): """獲取json源碼""" try: self.html = requests.get(url=self.url, params=params, headers=self.headers).text except Exception as e: print(e) print('錯誤,請從新請求!') def parse_html(self): """解析json數據""" data_dict = json.loads(self.html) data_list = data_dict['data']['archives'] for data in data_list: list1 = [] list1.append(data['aid']) list1.append(data['title']) list1.append(data['attribute']) list1.append(data['duration']) list1.append(data['pic']) list1.append(data['ctime']) self.data_list.append(list1) print(self.data_list) def save_csv(self): """保存數據至csv""" # w直接從開始寫入,a追加寫入 with open('BilBil.csv', 'w', newline='') as f: writer = csv.writer(f) writer.writerow(['番號', '名字', '觀看數', '評論數', '縮略圖地址', '發佈時間']) with open('BilBil.csv', 'a', newline='') as f: writer = csv.writer(f) for x in self.data_list: writer.writerow(x) # time.sleep(0.1) # 數據寫入須要時間,程序結束過快會致使數據寫入不全 def run(self): for x in range(0, 10): page = x params = { 'rid': '33', 'type': '0', 'pn': page, 'jsonp': 'jsonp', '_': '1546830252656' } self.get_html(params) self.parse_html() self.save_csv() time.sleep(1) print('第{}頁數據爬取完畢'.format(x + 1)) if __name__ == '__main__': s = BilBil() s.run()