爬蟲四 selenium + phantomjs & Headless Chrome

1、selenium操做谷歌瀏覽器html

 1 from selenium import  webdriver
 2 import time
 3 
 4 '''模擬建立一個瀏覽器對象,經過對象操做瀏覽器'''
 5 browser = webdriver.Chrome()
 6 print(browser)
 7 
 8 # path =  r'‪D:\googlediver\chromedriver.exe'
 9 # browser = webdriver.Chrome(executable_path= path)
10 
11 url = 'https://www.baidu.com/'
12 browser.get(url)
13 time.sleep(3)
14 
15 '''查找輸入框'''
16 input = browser.find_element_by_id('kw')
17 '''輸入文字'''
18 input.send_keys('菊花')
19 '''查找搜索按鈕,並點擊'''
20 button = browser.find_element_by_id('su')
21 button.click()
22 time.sleep(3)
23 '''找到指定圖片點擊'''
24 img = browser.find_element_by_class_name('op-img-address-link-imgs')
25 img.click()
26 time.sleep(5)
27 
28 '''退出瀏覽器'''
29 # browser.quit()

2、selenium操做phantomjsweb

 1 from selenium import  webdriver
 2 import time
 3 
 4 '''建立瀏覽器對象,經過對象操做瀏覽器'''
 5 browser = webdriver.PhantomJS()
 6 time.sleep(3)
 7 
 8 '''打開百度'''
 9 # url = 'https://www.baidu.com/'
10 # browser.get(url)
11 # time.sleep(3)
12 
13 '''截圖'''
14 # browser.save_screenshot(r'image/baidu.png')
15 # time.sleep(2)
16 
17 '''查找輸入框'''
18 # input = browser.find_element_by_id('kw')
19 '''輸入文字'''
20 # input.send_keys('菊花')
21 # browser.save_screenshot(r'image/ju.png')
22 # time.sleep(2)
23 '''查找搜索按鈕,並點擊'''
24 # button = browser.find_element_by_id('su')
25 # button.click()
26 # time.sleep(3)
27 # browser.save_screenshot(r'image/hua.png')

3、phantomjs下拉滾動條chrome

from selenium import  webdriver
import time

'''建立瀏覽器對象,經過對象操做瀏覽器'''
browser = webdriver.PhantomJS()
time.sleep(3)

url = 'https://dig.chouti.com/all/man/'
browser.get(url)     #打開抽屜
time.sleep(3)
browser.save_screenshot(r'image/chouti.png')    #截圖

#下拉滾動條
js = 'document.body.scrollTop=10000'
browser.execute_script(js)
time.sleep(3)

browser.save_screenshot(r'image/chouti2.png')    #截圖

'''獲取網頁代碼,保存到文件'''
html = browser.page_source

with open(r'image/chouti.html','w',encoding='utf8') as fp:
    fp.write(html)

4、例子-下拉式動態加載瀏覽器

from selenium import  webdriver
import time

'''建立瀏覽器對象'''
browser = webdriver.PhantomJS()
time.sleep(3)

#建築圖片
url = 'http://sc.chinaz.com/tupian/tesejianzhutupian.html'

'''打開網頁,保存代碼'''
browser.get(url)     #打開網頁
time.sleep(3)
with open(r'html/jianzhu1.html','w',encoding='utf8') as fp:
    fp.write(browser.page_source)

'''下拉滾動條,再次保存代碼'''
js = 'document.body.scrollTop=10000'
browser.execute_script(js)
time.sleep(3)
with open(r'html/jianzhu2.html','w',encoding='utf8') as fp:
    fp.write(browser.page_source)

'''因爲頁面動態加載,因此下拉滾動條先後保存的代碼是不同的'''

5、例子-點擊加載更多less

from selenium import  webdriver
import time

'''建立瀏覽器對象'''
browser = webdriver.PhantomJS()
time.sleep(3)

#豆瓣經典電影
url = 'https://movie.douban.com/explore#!type=movie&tag=%E7%BB%8F%E5%85%B8&sort=recommend&page_limit=20&page_start=20'

'''打開網頁,保存代碼'''
browser.get(url)     #打開網頁
time.sleep(3)
with open(r'html/dianying1.html','w',encoding='utf8') as fp:
    fp.write(browser.page_source)

'''點擊加載更多,再次保存代碼'''
show_more = browser.find_element_by_class_name('more')    #查找「加載更多」按鈕
show_more.click()   #點擊
time.sleep(3)
with open(r'html/dianying2.html','w',encoding='utf8') as fp:
    fp.write(browser.page_source)

6、headless chrome的使用ui

from selenium import  webdriver
from selenium.webdriver.chrome.options import Options
import time

'''實例化一個參數對象,使得瀏覽器以無界面模式打開'''
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')

'''谷歌驅動路徑 '''
path = 'D:\googlediver\chromedriver.exe'

'''建立瀏覽器對象'''
browser = webdriver.Chrome(executable_path=path,chrome_options=chrome_options)

'''訪問網頁'''
url = 'http://www.baidu.com/'
browser.get(url)
time.sleep(3)

browser.save_screenshot('image/wu.png')

browser.quit()
相關文章
相關標籤/搜索