功能:爬取百度熱搜的實時排行榜信息html
程序設計:app
代碼:url
#爬取百度熱搜的實時排行榜 #技術路線:requests---bs4 import requests from bs4 import BeautifulSoup def getHTML(url): try: r=requests.get(url,headers={'User-Agent':'Mozilla/5.0'}) r.raise_for_status() r.encoding=r.apparent_encoding return r.text except: return "" def parseHTML(demo,file_path): f=open(file_path,"w") soup=BeautifulSoup(demo,"html.parser") num_list=soup.find_all('td','first') title_list=soup.find_all('a','list-title') for i in range(len(num_list)): info_dict={} try: info_dict.update({ '排名':num_list[i].find('span').string, '標題':title_list[i].string, }) f.write(str(info_dict)+'\n') except: continue f.close() print("爬取完畢!") def main(): url='http://top.baidu.com/buzz?b=1&fr=20811' file_path="D://百度實時熱搜排行.txt" demo=getHTML(url) parseHTML(demo,file_path) main()
結果:spa