1、訪問網站20次html
import requests from bs4 import BeautifulSoup#調用函數 r = requests.get("http://www.baidu.com")#輸入網站名字 r.encoding = "utf-8" soup = BeautifulSoup(r.text) for i in range(20):#控制循環,訪問網站20次便可。 print(soup.head) print("\n") print(i+1) print("\n")
經過用for循環,來控制訪問次數,效果以下:app
2、爬蟲函數
import requests from bs4 import BeautifulSoup alluniv = [] def getHTMLText(url):#訪問網站並獲取內容 try: r = requests.get(url,timeout = 30) r.raise_for_status() r.encoding = 'utf-8' return r.text except: return "error" def fillunivlist(soup):#獲取咱們須要的內容 data=soup.find_all('tr') for tr in data: ltd =tr.find_all('td') if len(ltd)==0: continue singleuniv=[] for td in ltd: singleuniv.append(td.string) alluniv.append(singleuniv) def printunivlist(num):#創建這個排名表格 print("{:^4}{:^10}{:^5}{:^8}{:^10}".format("排名","學校名字","省份","總分","培養規模")) for i in range(num): u=alluniv[i] print("{:^4}{:^10}{:^5}{:^8}{:^10}".format(u[0],u[1],u[2],u[3],u[6])) def main(num):#輸出num個數的大學排名 url = "http://www.zuihaodaxue.cn/zuihaodaxuepaiming2019.html" html=getHTMLText(url) soup=BeautifulSoup(html,"html.parser") fillunivlist(soup) printunivlist(num) main(100)
效果以下:網站