給定一篇新聞的連接newsUrl,獲取該新聞的所有信息:標題、做者、發佈單位、審覈、來源,將發佈時間轉換成datetime類型,整個過程包裝成一個簡單清晰的函數。php
import requests import re from datetime import datetime from bs4 import BeautifulSoup url='http://news.gzcc.cn/html/2019/xiaoyuanxinwen_0329/11095.html'#新聞連接 click_url='http://oa.gzcc.cn/api.php?op=count&id=11086&modelid=80'#點擊次數連接 res = requests.get(url) res.encoding = 'utf-8' soup=BeautifulSoup(res.text,'html.parser') def news_time(soup):#新聞的發佈時間 time1=soup.select('.show-info')[0].text[5:24] time2=soup.select('.show-info')[0].text.split()[0].lstrip('發佈時間:') detail=soup.select('.show-content')[0].text print(datetime.strptime(time1,'%Y-%m-%d %H:%M:%S'))#格式化時間 def new_info(soup):#新聞的信息 info= soup.select('.show-info')[0].text author = info.split()[2]#做者 check = info.split()[3]#審覈 source = info.split()[4]#來源 print(author) print(check) print(source) def click_count(click_url):#統計點擊次數 id = re.findall('(\d{1,7})',click_url)[-1] click_format=click_url.format(id) click_content = requests.get(click_url) click_count = int(click_content.text.split('.html')[-1].lstrip("('").rstrip("');")) print("點擊次數:{}".format(click_count)) def news_content(soup):#輸出新聞內容 detail = soup.select('.show-content')[0].text print(detail) new_info(soup) news_time(soup) click_count(click_url) news_content(soup)