Python 爬蟲:煎蛋網妹子圖

使用 Headless Chrome 替代了 PhatomJS。html

圖片保存到指定文件夾中。web

 1 import requests  2 from bs4 import BeautifulSoup  3 from selenium import webdriver  4 from selenium.webdriver.chrome.options import Options  5 
 6 chrome_options = Options()  7 chrome_options.add_argument('--headless')  8 chrome_options.add_argument('--disable-gpu')  9 driver = webdriver.Chrome(chrome_options=chrome_options) 10 dir = 'C:/spider-download/jandan-girls/'
11 img_urls = [] 12 page_urls = ["http://jandan.net/ooxx/page-{}#comments".format(str(i)) for i in range(5, 6)] 13 
14 def GetImgUrl(u): 15  driver.get(u) 16     html = driver.page_source 17     soup = BeautifulSoup(html, 'lxml') 18     images = soup.select('a.view_img_link') 19     for i in images: 20         t = i.get('href') 21         if str('gif') in str(t): 22             pass
23         else: 24             img_url = 'http:' + t 25  img_urls.append(img_url) 26 
27 def DownloadImg(): 28     n = 1
29     for i in img_urls: 30         print('' + str(n) + ' 張 ... ', end='') 31         with open(dir + i[-20:], 'wb') as f: 32  f.write(requests.get(i).content) 33         print('OK!') 34         n = n + 1
35 
36 for u in page_urls: 37  GetImgUrl(u) 38 print('*** 開始下載 ***') 39 DownloadImg() 40 print('*** 下載完成 ***')
相關文章
相關標籤/搜索
本站公眾號
   歡迎關注本站公眾號,獲取更多信息