# default_exp getdata # 上面一行用於nbdev中聲明本模塊的名稱。必須是notebook的第一個Cell的第一行。
#hide from nbdev.showdoc import *
#export from bs4 import BeautifulSoup from parser import * #regex_parser import re import json import time import logging import datetime import requests import pprint
# export # 使用丁香園的數據。 url = "https://ncov.dxy.cn/ncovh5/view/pneumonia?from=singlemessage&isappinstalled=0" headers = { 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36' } #保存的文件名。 dateof = '20200207'
#export def getweb(): session = requests.session() session.headers.update(headers) r = session.get(url) soup = BeautifulSoup(r.content, 'lxml') #獲取省市數據。 area_information = re.search(r'\[(.*)\]', str(soup.find('script', attrs={'id': 'getAreaStat'}))) area = json.loads(area_information.group(0)) return area
# export # 寫入文件,分省數據。 def saveprovice(area): fprovince = "data/" + "prov_" + dateof + ".csv" fp = open(fprovince, "w") fp.write("省份,確診,疑似,治癒,死亡\r") for a in area: fp.write(a['provinceName']+','+ \ str(a['confirmedCount'])+','+ \ str(a['suspectedCount'])+','+ \ str(a['curedCount'])+','+ \ str(a['deadCount'])+ '\r') fp.close() print("writed to "+ fprovince + "\r\n")
# export # 寫入文件,分市數據。 def savecity(area): fcity = "data/" + "city_" + dateof + ".csv" fc = open(fcity, "w") fc.write("省份,城市,確診,疑似,治癒,死亡\r") for p in area: cities = p['cities'] for c in cities: fc.write(p['provinceName']+','+ \ c['cityName']+','+ \ str(c['confirmedCount'])+','+ \ str(c['suspectedCount'])+','+ \ str(c['curedCount'])+','+ \ str(c['deadCount'])+'\r') fc.close() print("writed to "+ fcity + "\r\n")
這裏用到Notebook的魔法操做符,參考:html
!ls -l data
總用量 176 -rw-r--r-- 1 supermap supermap 1445 2月 9 22:49 china.csv -rw-r--r-- 1 supermap supermap 11840 2月 7 23:02 city_20200207.csv -rw-r--r-- 1 supermap supermap 12156 2月 8 18:39 city_20200208.csv -rw-r--r-- 1 supermap supermap 12169 2月 10 18:20 city_20200209.csv -rw-r--r-- 1 supermap supermap 126285 2月 9 15:09 IMG_7082.JPG -rw-r--r-- 1 supermap supermap 780 2月 7 23:02 prov_20200207.csv -rw-r--r-- 1 supermap supermap 784 2月 8 18:39 prov_20200208.csv -rw-r--r-- 1 supermap supermap 790 2月 10 18:20 prov_20200209.csv
# 將notebook轉化爲python的*.py代碼,保存到項目名稱的子目錄中。 from nbdev.export import * notebook2script()
Converted 00_digdata.ipynb. Converted 01_getdata.ipynb. Converted 10_charts.ipynb. Converted 10_china.ipynb. Converted index.ipynb.