思路:用selenium打開頁面,經過xpath獲取標題,而後點擊下一頁,循環如此web
1 from selenium import webdriver 2 from time import sleep 3 from copy import copy 4 5 6 7 def get_pro_titles(page): 8 """獲取博客園最新發布的文章標題""" 9 all_title = dict() 10 option = webdriver.ChromeOptions() 11 option.add_argument('--headless') 12 option.add_argument("--disable-gpu") 13 option.add_argument("--window-size=1280,800") 14 d = webdriver.Chrome(options=option) 15 d.get('https://www.cnblogs.com') 16 for i in range(1, int(page)+1): 17 one_page_title = [] 18 for j in range(1, 21): 19 sleep(2) 20 title = d.find_element_by_xpath('//*[@id="post_list"]/div[{}]/div[2]/h3/a'.format(j)).text 21 # print(title) 22 one_page_title.append(title) 23 p = copy(one_page_title) 24 all_title['第{}頁'.format(i)] = p 25 sleep(2) 26 js = 'document.documentElement.scrollTop=10000;' 27 d.execute_script(js) 28 sleep(2) 29 d.find_element_by_xpath('//div[@id="pager_bottom"]/div/div/a[text()="Next >"]').click() 30 one_page_title.clear() 31 sleep(2) 32 d.quit() 33 return all_title 34 35 36 print(get_pro_titles(2))