自動化模塊selenium使用概括

時間 2019-11-09

原文原文鏈接

安裝javascript

pip3 install seleniumcss

測試是否成功html

from selenium import webdriver

browser=webdriver.Chrome()

#若是能彈出一個空白的chrome瀏覽器頁面，說明配置成功

瀏覽器對象獲取java

from selenium import webdriver

#browser=webdriver.Firefox()
browser=webdriver.Chrome()
#browser=webdriver.Edge()
#browser=webdriver.Safari()

print(type(browser))

#返回的是一個WebDriver對象
<class 'selenium.webdriver.chrome.webdriver.WebDriver'>

WebDriver對象的方法和屬性：python

add_cookie(cookie_dict)：　　爲當前會話添加一個cookie，爲字典類型
back()：在瀏覽器歷史記錄中後退一步
forward()：在瀏覽器歷史上前進一步
close()：關閉當前窗口
create_web_element(element_id)：使用指定的id建立Web元素
delete_all_cookies()：刪除會話範圍內的全部cookie
delete_cookie(name)：刪除具備給定名稱的單個cookie
execute(driver_command,params=None)：發送command執行的命令
execute_async_script(script,*args)：異步執行當前窗口或框架中的JavaScript
execute_script(script,*args)：同步執行當前窗口或框架中的JavaScript

from selenium import webdriver
driver=webdriver.Chrome()
driver.get('https://www.baidu.com')
driver.execute_script("alert('are you sure');")

#它基本能夠實現JavaScript的全部功能 PS：可是沒有測出來如何獲取js執行結果

fullscreen_window()：調用窗口管理器特定的全屏操做
get(url)：在當前瀏覽器會話中加載網頁
get_cookie(name)：按名稱獲取單個cookie
get_cookies()：返回一組字典的cookies
get_log(log_type)：獲取給定日誌類型的日誌
get_screenshot_as_base64()：獲取當前窗口的屏幕截圖，做爲base64編碼的字符串
get_screenshot_as_file(filename)：將當前窗口中的截屏保存爲png圖形
get_screenshot_as_png()：獲取當前窗口的屏幕截圖做爲二進制數據
get_window_position(windowhandle='current')：獲取當前窗口的x,y位置
get_window_rect()：獲取窗口的x,y座標以及當前窗口的高度和寬度
get_window_size()：獲取當前窗口的高度和寬度
maximize_window()：最大化webdriver正在使用的當前窗口
minimize_window()：最小化當前webdricer使用窗口
quit()：退出驅動程序並關閉每一個關聯的窗口
refresh()：刷新當前頁面
save_screenshot(filename)：將當前窗口的屏幕截圖保存爲PNG圖形文件
set_page_load_timeout(time_to_wait)：設置等待頁面加載完成的時間
set_script_timeout(time_to_wait)：設置腳本在執行期間等待的時間
set_window_position(x,y,windowHandle='current')：設置當前窗口的x,y位置
set_window_rect(x=None,y=None,width=None,height=None)：設置窗口的x,y座標以及當前窗口的高度和寬度
set_window_size(width,height,windowHandle='current')：設置當前窗口的高度和寬度
current_url：獲取當前頁面的URL
current_window_handle：返回當前窗口的句柄
desired_capabilities：返回驅動程序當前使用的所需功能
log_types：獲取可用日誌類型的列表
name：返回此實例的基礎瀏覽器的名稱
page_source：獲取當前頁面的源碼

switch_to：將焦點切換到全部選項的對象上driver.switch_to.alert
title：返回當前頁面的標題
window_handles：返回當前會話中全部窗口的句柄

from selenium import webdriver

browser=webdriver.Chrome()
browser.get('http://selenium-python.readthedocs.io')
browser.execute_script('window.open("https://www.baidu.com");')  #在標籤頁打開URL
browser.execute_script('window.open("https://www.taobao.com");')

browser.back()  #後退到前一個頁面
browser.set_page_load_timeout(5)
browser.forward()  #前進到下一個頁面
print(browser.name)
print(browser.title)
print(browser.current_url)
print(browser.current_window_handle)
print(browser.get_cookies())
print(type(browser))

#
chrome
Selenium with Python — Selenium Python Bindings 2 documentation
http://selenium-python.readthedocs.io/
CDwindow-243FD31239F20FCC0195DD522A60A0DA
[{'domain': '.readthedocs.io', 'expiry': 1530766561, 'httpOnly': False, 'name': '_gid', 'path': '/', 'secure': False, 'value': 'GA1.2.1126774326.1530680157'}, {'domain': '.readthedocs.io', 'expiry': 1593752161, 'httpOnly': False, 'name': '_ga', 'path': '/', 'secure': False, 'value': 'GA1.2.2096958532.1530680157'}, {'domain': '.readthedocs.io', 'expiry': 1530680217, 'httpOnly': False, 'name': '_gat_rtfd', 'path': '/', 'secure': False, 'value': '1'}]
<class 'selenium.webdriver.chrome.webdriver.WebDriver'>

頁面截圖web

from selenium import webdriver

driver=webdriver.Chrome()
driver.get('http://www.python.org')
driver.save_screenshot('screenshot.png')  #保持頁面截圖到當前路徑
driver.quit()

將頁面滾動到底部：正則表達式

from selenium import webdriver
driver=webdriver.Chrome()
driver.get('http://www.python.org')
#經過DOM中的window對象的scrollTo方法，將窗口位置滾動到指定位置，document.body.scrollHeight返回整個body的高度，因此頁面將滾動到頁面底部
driver.execute_script("window.scrollTo(0,document.body.scrollHeight);")

cookies操做：（PS：這段沒有測試）chrome

from selenium import webdriver
driver=webdriver.Chrome()
driver.get('https://www.baidu.com')
print(driver.get_cookies())   #獲取全部cookies
driver.add_cookie({'name':'name','domain':'www.baidu.com','value':'germey'})   #添加cookie
print(driver.get_cookies())
driver.delete_all_cookies()
print(driver.get_cookies())

元素定位api

class selenium.webdriver.common.by.By
有各類策略來定位頁面中的元素。你可使用最適合你的狀況。Selenium提供瞭如下方法來定位頁面中的元素：瀏覽器

find_element_by_id
find_element_by_name
find_element_by_xpath
find_element_by_link_text
find_element_by_partial_link_text
find_element_by_tag_name
find_element_by_class_name
find_element_by_css_selector
要查找多個元素（這些方法將返回一個列表）：

find_elements_by_name
find_elements_by_xpath
find_elements_by_link_text
find_elements_by_partial_link_text
find_elements_by_tag_name
find_elements_by_class_name
find_elements_by_css_selector
除了上面給出的公共方法以外，還有兩個私有方法可能對頁面對象中的定位器有用。這些是兩個私有方法：find_element和find_elements

（PS：這段也沒有測試，之後用到再測試）

from selenium import webdriver
from selenium.webdriver.common.by import By
driver=webdriver.Chrome()
driver.get('http://selenium-python.readthedocs.io/locating-elements.html#locating-elements')
data=driver.find_element(By.CLASS_NAME,'simple')
#driver.find_element(By.ID,'IDname') #獲取ID標籤訂位元素
#driver.find_element(By.CSS_SELECTOR,'cssname')#CSS選擇器定位元素
#driver.find_element(By.LINK_TEXT,'linktext') #連接文本定位元素
#driver.find_element(By.PARTIAL_LINK_TEXT,'linktext') #部分連接文件定位元素
#driver.find_element(By.NAME,'name') #屬性名定位元素
#driver.find_element(By.TAG_NAME,'tagname') #標籤名定位元素

print(data.text)  #打印元素文本內容

元素對象

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
opt=Options()
opt.add_argument('headless')
driver=webdriver.Chrome(chrome_options=opt)
driver.get('http://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.remote.webelement')
element=driver.find_element_by_id('module-selenium.webdriver.remote.webelement')
print(element)
print(type(element))

#返回一個webelement對象
<selenium.webdriver.remote.webelement.WebElement (session="dfaee65201abdf5a931306df6e7fe421", element="0.95256057244967-1")>
<class 'selenium.webdriver.remote.webelement.WebElement'>

selenium.webdriver.remote.webelement.WebElement爲一個DOM元素，它的方法和屬性包括：

clear() ：清除文本元素
click() ：單擊元素按鈕
get_attribute(name) ：獲取元素的給定屬性的屬性值
get_property(name) ：獲取元素的給定屬性
is_displayed() ：判斷元素是否顯示 PS：強調是是否顯示,不是是否存在，原文錯了

元素存在判斷

#   該方法用來確認元素是否存在，若是存在返回flag=true，不然返回false
def isElementExist(driver, element):
    flag = True
    try:
        driver.find_element_by_css_selector(element)
        return flag
    except:
        flag = False
        return flag


#   調用 driver是瀏覽器對象
    if isElementExist(driver, "[class='airy-ad-prompt-container']"):
        print("有")
    else:
        print("沒有")

is_enable() ：判斷元素是否被啓用
is_selected() ：返回元素是否被選中
screenshot(filename) ：將當前元素的屏幕截圖保存到文件
send_keys() #發送元素值
submit() :提交表單
value_of_css_property() ：CSS屬性的值
id ：selenium使用的內部ID
location :元素在可渲染畫布中的位置
location_once_scrolled_into_view ：發現元素在屏幕視圖中的位置
rect ：返回包含元素大小和位置的字典
screenshot_as_base64 ：獲取當前元素的截屏，做爲base64編碼的字符串
size :獲取元素的大小
tag_name :獲取元素的tagName屬性
text ：獲取元素的文本

抓取網頁源碼，有了它剩下的就是正則表達式捕獲了

from selenium import webdriver

driver=webdriver.Chrome()
driver.get('http://www.cnblogs.com/zhangxinqi/')
element=driver.find_element_by_id('q')  #獲取輸入框元素
element.send_keys('python3之requests')  #發送元素
button=driver.find_element_by_id('btnZzk')  #獲取搜索按鈕
button.click()  #發送搜索動做
data=driver.page_source   #這裏返回html源碼

print(driver.current_url)   #打印URL
print(data)
print(type(element))
driver.close()

動做模擬

class selenium.webdriver.common.action_chains.ActionChains(driver)

在上面的實例中咱們針對的是某個節點元素的操做，若是要對沒有特定元素的對象操做如鼠標拖拽、鍵盤按鍵等，這些動做就稱爲動做鏈，selenium使用ActionChains()類來實現鼠標移動，鼠標按鈕操做，按鍵操做和上下文菜單交互，懸停和拖放等

click(on_element=None) ——單擊鼠標左鍵
click_and_hold(on_element=None) ——點擊鼠標左鍵，不鬆開
context_click(on_element=None) ——點擊鼠標右鍵
double_click(on_element=None) ——雙擊鼠標左鍵
drag_and_drop(source, target) ——拖拽到某個元素而後鬆開
drag_and_drop_by_offset(source, xoffset, yoffset) ——拖拽到某個座標而後鬆開
key_down(value, element=None) ——按下某個鍵盤上的鍵
key_up(value, element=None) ——鬆開某個鍵
move_by_offset(xoffset, yoffset) ——鼠標從當前位置移動到某個座標
move_to_element(to_element) ——鼠標移動到某個元素
move_to_element_with_offset(to_element, xoffset, yoffset) ——移動到距某個元素（左上角座標）多少距離的位置
perform() ——執行鏈中的全部動做
release(on_element=None) ——在某個元素位置鬆開鼠標左鍵
send_keys(*keys_to_send) ——發送某個鍵到當前焦點的元素
send_keys_to_element(element, *keys_to_send) ——發送某個鍵到指定元素

拖拽到指定目標（PS：沒有測試）

element = driver.find_element_by_name("source")
target = driver.find_element_by_name("target")
 
from selenium.webdriver import ActionChains
action_chains = ActionChains(driver)
action_chains.drag_and_drop(element, target).perform()

鼠標操做（PS：沒有測試）

menu = driver.find_element_by_css_selector(".nav") #獲取element對象
hidden_submenu = driver.find_element_by_css_selector(".nav #submenu1")  #獲取點擊對象
#建立鼠標對象
actions = ActionChains(driver)
#移動鼠標到對象
actions.move_to_element(menu)
#點擊對象
actions.click(hidden_submenu)
#執行操做
actions.perform()

彈出對話框

class selenium.webdriver.common.alert.Alert(driver)

Alert內置支持處理彈窗對話框，方法：

accept() ：確認彈窗，用法：Alert(driver).appept()
authenticate(username,password) ：將用戶名和密碼發送到authenticated對話框，隱含點擊肯定，用法：driver.switch_to.alert.authenticate('username','password')
dismiss() ：取消確認
send_keys(keysToSend) ：將密鑰發送到警報，keysToSend爲要發送的文本
text ：獲取Alert的文本

（PS：沒有測試）

import time
from selenium import webdriver
from selenium.webdriver.common.alert import Alert
driver=webdriver.Chrome()
driver.get('https://www.baidu.com')
driver.execute_script("alert('肯定');")  #彈出窗口
time.sleep(2)
print(driver.switch_to.alert.text) #獲取alert文本
alert=Alert(driver).accept()  #自動點擊肯定窗口

鍵盤操做

class selenium.webdriver.common.keys.Keys

selenium提供一個keys包來模擬全部的按鍵操做，下面咱們介紹下一些經常使用的按鍵操做：

回車鍵：Keys.ENTER
刪除鍵：Keys.BACK_SPACE
空格鍵：Keys.SPACE
製表鍵：Keys.TAB
回退鍵：Keys.ESCAPE
刷新鍵：Keys.F5
全選（ctrl+A）：send_keys(Keys.CONTROL,'a') #組合鍵須要用send_keys方法操做
複製（ctrl+C）：send_keys(Keys.CONTROL,'c')
剪切（ctrl+X）：send_keys(Keys.CONTROL,'x')
粘貼（ctrl+V）：send_keys(Keys.CONTROL,'v')

實現點擊頁面從python的pypi頁面下載selenium源碼包：（PS：沒有測試）

import requests
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
driver=webdriver.Chrome()
driver.get('https://pypi.org/')

element=driver.find_element_by_id('search')  #獲取輸入框
element.send_keys('selenium')  #搜索selenium包
element.send_keys(Keys.ENTER)  #按回車鍵

element_a=driver.find_element_by_link_text('selenium') #定位selenium包連接

ActionChains(driver).move_to_element(element_a).click(element_a).perform() #按左鍵點擊連接執行

element_down=driver.find_element_by_link_text('Download files')  #定位下載連接
ActionChains(driver).move_to_element(element_down).click(element_down).perform()  #按左鍵點擊連接

element_selenium=driver.find_element_by_link_text('selenium-3.13.0.tar.gz')  #定位元素selenium下載包連接
data=element_selenium.get_attribute('href')   #獲取連接地址
with open('selenium-3.13.0.tar.gz','wb') as f:
    source=requests.get(data).content   #請求下載連接地址獲取二進制包數據
    f.write(source)  #寫入數據
    f.close()
    
driver.quit()

延時等待

目前，大多數Web應用程序都在使用AJAX技術。當瀏覽器加載頁面時，該頁面中的元素可能以不一樣的時間間隔加載。這使定位元素變得困難：若是DOM中還沒有存在元素，則locate函數將引起ElementNotVisibleException異常。使用等待，咱們能夠解決這個問題。等待在執行的操做之間提供了一些鬆弛 - 主要是使用元素定位元素或任何其餘操做。

Selenium Webdriver提供兩種類型的等待 - 隱式和顯式。顯式等待使WebDriver等待某個條件發生，而後再繼續執行。在嘗試查找元素時，隱式等待會使WebDriver輪詢DOM一段時間。

顯示等待：

顯示等待是根據定義的代碼，用於在進一步執行代碼以前等待某個條件發送，它提供了一些便捷方法，能夠編寫在僅須要等待的代碼上，實現方法須要WebDriverWait與ExpectedCondition結合使用：

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

driver = webdriver.Firefox()
driver.get("http://somedomain/url_that_delays_loading")
try:
    element = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, "myDynamicElement"))
    )
finally:
    driver.quit()

在拋出TimeoutException異常以前將等待10秒或者在10秒內發現了查找的元素。 WebDriverWait 默認狀況下會每500毫秒調用一次ExpectedCondition直到結果成功返回。 ExpectedCondition成功的返回結果是一個布爾類型的true或是不爲null的返回值。中文文檔 https://selenium-python-zh.readthedocs.io/en/latest/getting-started.html#id2

其餘等待條件：

title_is ：標題是某內容
title_contains ：標題包含某內容
presence_of_element_located ：節點加載出來，傳入定位元組，如(By.ID, 'p')
visibility_of_element_located ：節點可見，傳入定位元組
visibility_of ：可見，傳入節點對象
presence_of_all_elements_located ：全部節點加載出來
text_to_be_present_in_element ：某個節點文本包含某文字
text_to_be_present_in_element_value ：某個節點值包含某文字
frame_to_be_available_and_switch_to_it ：加載並切換
invisibility_of_element_located ：節點不可見
element_to_be_clickable ：節點可點擊
staleness_of ：判斷一個節點是否仍在DOM，可判斷頁面是否已經刷新
element_to_be_selected ：節點可選擇，傳節點對象
element_located_to_be_selected ：節點可選擇，傳入定位元組
element_selection_state_to_be ：傳入節點對象以及狀態，相等返回True，不然返回False
element_located_selection_state_to_be ：傳入定位元組以及狀態，相等返回True，不然返回False
alert_is_present ：是否出現警告

中文文檔 https://selenium-python-zh.readthedocs.io/en/latest/waits.html

摘自 https://www.cnblogs.com/zhangxinqi/p/9259808.html

最後附上一份截圖源碼，改了好一會才完成，原始版，未整理

#!/usr/bin/python3
# 截圖測試
#
from selenium import webdriver
import unittest
import os, sys, time
import exescript
from PIL import Image


# 合併截圖,而且裁剪多餘部分
def image_merge(li_path, img_width, img_height):
    print("圖片處理")
    max_width = 0
    total_height = 0
    # 計算合成後圖片的寬度（以最寬的爲準）和高度
    for img_path in li_path:
        if os.path.exists(img_path):
            img = Image.open(img_path)
            width, height = img.size
            if width > max_width:
                max_width = width
            total_height += height
    # 產生一張空白圖
    new_img = Image.new("RGB", (max_width, total_height), 255)
    # 合併
    x = y = 0
    for img_path in li_path:
        if os.path.exists(img_path):
            img = Image.open(img_path)
            width, height = img.size
            new_img.paste(img, (x, y))
            y += height
    # 裁剪多餘
    print(new_img.size)
    new_img.save("data/xq.jpg", "JPEG", quality=95)
    img1 = Image.open("data/xq.jpg")
    print("寬度", img1.width)
    print("圖片寬度", img_width)
    # 計算白邊
    cut = ((img1.width - img_width) / 2) - 100
    print("白邊", cut)
    ok_scr = img1.width - cut

    img_cut_size = (cut, 0, ok_scr, img_height)
    img2 = img1.crop(img_cut_size)
    img2.save("data/xq_cut.jpg", "JPEG", quality=95)


# 登陸
current_time = time.strftime("%Y-%m-%d-%H_%M_%S", time.localtime(time.time()))
current_time1 = time.strftime("%Y-%m-%d", time.localtime(time.time()))
print(current_time)
print(current_time1)

dpath = "config\chromedriver.exe"
options = webdriver.ChromeOptions()
options.add_argument(
    'user-agent="Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"'
)
options.add_argument("--headless")
# options.add_argument("headless")  # 靜默瀏覽器
# 設置成中文
# options.add_argument('lang=zh_CN.UTF-8')
options.add_argument("en-us")
# 谷歌文檔提到須要加上這個屬性來規避bug
options.add_argument("--disable-gpu")
# 禁止加載全部插件，能夠增長速度。能夠經過about:plugins頁面查看效果
options.add_argument("–disable-plugins")
# 配了環境變量第一個參數就能夠省了，否則傳絕對路徑
driver = webdriver.Chrome(executable_path=dpath, chrome_options=options)
# driver = webdriver.Chrome(executable_path=dpath)
url = "https://www.amazon.com/dp/B01L1F1OV6"
# 設定屏幕寬 和 高
w_width = 1920
w_height = 1080
driver.set_window_size(w_width, w_height)

driver.get(url)

# 必須打印圖片路徑HTMLTestRunner才能捕獲而且生成路徑，\image\**\\**.png 是獲取路徑的條件,必須這樣的目錄
# 設置存儲圖片路徑，測試結果圖片能夠按照天天進行區分
# 經過if進行斷言判斷
# driver.get("https://baidu.com/")
# # 新建立路徑「.」表示當前整個.py文件的路徑所在的位置，「\\」路徑分割符，其中的一個是「\」表示轉義字符
# pic_path = ".\\result\\image\\" + current_time1 + "\\" + current_time + ".png"
# print(pic_path)
# time.sleep(5)
# print(driver.title)
# 截取當前url頁面的圖片，並將截取的圖片保存在指定的路徑下面（pic_path），注：如下兩種方法均可以
# driver.save_screenshot(pic_path)

# 能夠注入，可是不必定能夠返回
time.sleep(5)
# exejs = exescript.ExeJs(driver)
# js_th ='return document.getElementsByClassName("a-section a-spacing-extra-large bucket")[0].clientHeight.toString()'
# exejs.exeWrap(js_th)
th = driver.execute_script("return document.title;")
print(th)
# 滾動到指定位置
print("滾動到指定位置")
js = 'window.location.hash="aplus"'
driver.execute_script(js)
# 不容許這樣使用符號類名
# ts1 = driver.find_element_by_class_name("a-section.a-spacing-extra-large.bucket")
li_img = []  # 定義截圖保存路徑
try:
    dw = driver.find_element_by_id("dpx-aplus-3p-product-description_feature_div")
    # print("定位1", dw.size)
    # print("定位1", dw.location)  # 可行
    dw1_height = dw.location["y"]
    # print("定位1", dw.location_once_scrolled_into_view)

    xq = driver.find_element_by_css_selector(
        "[class='a-section a-spacing-extra-large bucket']"
    )
    # print("定位2", xq.size)
    xq_width = xq.size["width"]
    xq_height = xq.size["height"]
    all_height = 0
    all_height = xq_height
    driver.save_screenshot("data/sc_1.png")
    li_img.append("data/sc_1.png")
    # 獲取瀏覽器可視區域高度
    th_see_height = driver.execute_script("return window.innerHeight;")
    print("可視高度", th_see_height)
    # 詳情自己尺寸
    xq_ok = driver.find_element_by_css_selector("[class='aplus-v2 desktop celwidget']")
    # print("定位2", xq.size)
    xq_ok_width = xq_ok.size["width"]
    print("展現高度", xq_ok_width)

    # 若是詳情高度大於設定屏幕高度,就須要二次滾動,甚至屢次滾動
    num = 2
    while all_height >= th_see_height:
        print("滾動到指定位置")
        th = 0
        th = dw1_height + th_see_height
        js = "window.scroll(0," + str(th) + ")"
        driver.execute_script(js)
        t_path = ""
        t_path = "data/sc_" + str(num) + ".png"
        driver.save_screenshot(t_path)
        li_img.append(t_path)
        all_height = all_height - th_see_height
        if all_height <= th_see_height:
            break
        else:
            num += 1

    print("截圖次數", num)
    print("截圖完成,數據以下")
    print("路徑", li_img)
    print("尺寸寬", xq_width)
    print("尺寸高", xq_height)
    image_merge(li_img, xq_ok_width, xq_height)
except Exception as result:
    print("檢測出異常{}".format(result))

time.sleep(1)
driver.close()