selenium + 無頭瀏覽器

phantomJS是一款無頭瀏覽器, 以前咱們經過selenium操做PhantomJS來完成動態加載數據的加載,html

如今PhantomJS已經中止更新,  不過能夠使用谷歌瀏覽器的無頭瀏覽器來代替PhantomJS來完成上述操做web

使用谷歌無頭瀏覽器的實例代碼以下:chrome

from selenium import webdriver from selenium.webdriver.chrome.options import Options from time import sleep chrome_options = Options() chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-gpu') bro = webdriver.Chrome(chrome_options=chrome_options) bro.get('https://www.baidu.com') sleep(3) print(bro.page_source) bro.save_screenshot('1.png') bro.quit()

 

執行下拉滾動條操做瀏覽器

from selenium import webdriver from selenium.webdriver.chrome.options import Options import time chrome_options = Options() chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-gpu') bro = webdriver.Chrome(chrome_options=chrome_options) bro.get(url='https://movie.douban.com/typerank?type_name=%E7%88%B1%E6%83%85&type=13&interval_id=100:90&action=') time.sleep(3) bro.save_screenshot('baidu/aiqing.png') #讓bro直行簡單的js代碼,模擬滾到到底部
js = 'window.scrollBy(500,100000)' bro.execute_script(js) time.sleep(3) bro.save_screenshot('baidu/aiqing2.png') #獲取網頁代碼,保存到文件中
html = bro.page_source with open('douban.html','w',encoding='utf8') as f: f.write(html) bro.quit()

經過selenium加上下拉滾動條抓取懶加載圖片less

from selenium import webdriver from selenium.webdriver.chrome.options import Options import time chrome_options = Options() chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-gpu') bro = webdriver.Chrome(chrome_options=chrome_options) bro.get(url='http://sc.chinaz.com/tupian/ribenmeinv.html') time.sleep(2) with open('lanjiazai.html', 'w', encoding='utf8') as f: f.write(bro.page_source) # bro.save_screenshot('lanjiazai.png')
 bro.execute_script('window.scrollBy(0,10000)') time.sleep(3) with open('lanjiazai2.html', 'w', encoding='utf8') as f: f.write(bro.page_source) # bro.save_screenshot('lanjiazai2.png')
time.sleep(1) bro.close()

 qq空間登陸ui

from selenium import webdriver from time import sleep bro = webdriver.Chrome(executable_path='chromedriver.exe') url = 'https://qzone.qq.com' bro.get(url) bro.switch_to.frame('login_frame') a_tag = bro.find_element_by_id('switcher_plogin') a_tag.click() bro.find_element_by_id('u').send_keys('328410948') bro.find_element_by_id('p').send_keys('xxxxxx') bro.find_element_by_id('login_button').click() sleep(2) #登錄成功後對應的主頁
page_text = bro.page_source

 規避監測url

from selenium import webdriver from selenium.webdriver import ChromeOptions option = ChromeOptions() option.add_experimental_option('excludeSwitches', ['enable-automation']) #實現了規避監測
bro = webdriver.Chrome(executable_path='chromedriver.exe',options=option) bro.get('https://www.taobao.com/')
相關文章
相關標籤/搜索