功能描述:爬取東方財富網和百度股票的信息並將信息存在文件中html
程序設計:app
import re # import traceback import requests import bs4 from bs4 import BeautifulSoup def getHTMLText(url): try: r=requests.get(url) r.raise_for_status() r.encoding=r.raise_for_status() return r.text except: return "" def getStockList(stock_list_url,slt): demo=getHTMLText(stock_list_url) soup=BeautifulSoup(demo,"html.parser") for a in soup.find_all('a',attrs={'target':'_blank'}): try: if isinstance(a,bs4.element.Tag): match=re.search(r'[s][hz]\d{6}',a.attrs.get("href")) slt.append(match.group(0)) except: continue return "" def getStockInfo(stock_info_url,slt,file_path): count=0 for stock in slt: info_dict={} #存放股票信息 url=stock_info_url+stock+'.html' try: demo=getHTMLText(url) soup=BeautifulSoup(demo,'html.parser') info_dict['股票名稱']=soup.find('a','bets-name').text.split()[0] #text:獲取改標籤下的全部字符串並使用空格分隔 info_div=soup.find('div','bets-content') dt_list=info_div.find_all('dt') #ksy dd_list=info_div.find_all('dd') #value for i,dt in enumerate(dt_list): info_dict[dt.string]=dd_list[i].string with open(file_path,"a") as f: f.write(str(info_dict)+'\n') count+=1 print('\r當前進度:{:.2f}%'.format(count*100/len(slt)),end='') #打印進度條 except: # traceback.print_exc() continue return "" def main(): stock_list_url='http://quote.eastmoney.com/stock_list.html' #東方財富網 stock_info_url='https://gupiao.baidu.com/stock/' #百度股票 slt=[] #存放股票代碼列表 file_path="D://股票爬蟲.txt" getStockList(stock_list_url,slt) getStockInfo(stock_info_url,slt,file_path) main()
效果顯示:url