首先獲取每一種比賽信息的分類連接php
def get_type_url(url):
web_data = requests.get(web_url)
soup = BeautifulSoup(web_data.text, 'lxml')
types = soup.select("#mn_P1_menu li a")
for type in types:
print(type.get_text())
get_num(type.get("href"))
而後獲取每個分類鏈接中的總頁數git
def get_num(url):
web_data = requests.get(url)
soup = BeautifulSoup(web_data.text, 'lxml')
num = soup.select(".pg span")
# 部分頁面沒有分頁只有一頁,須要分類一下
if(num!=[]):
i = int(num[0].get_text().split(" ")[2])
for w in range(1, i):
print("第"+str(w)+"頁")
urls = url + "index.php?page={}".format(str(w))
get_message_url(urls)
else:
get_message_url(url)
最後獲取每一頁中各個比賽的信息github
def get_message_url(url):
web_data = requests.get(url)
soup = BeautifulSoup(web_data.text, 'lxml')
titles = soup.select(".xld .xs2_tit a")
views = soup.select("span.chakan")
post_times = soup.select("div.list_info")
for title, view, post_time in zip(titles, views, post_times):
data = {
"標題": title.get_text(),
"瀏覽量": view.get_text().strip(),
"發佈時間": post_time.get_text().strip().split(" ")[0],
"連接": title.get("href")
}
print(data)