Python+Selenium----處理登陸圖片驗證碼

時間 2019-11-17

標籤 python+selenium python selenium 處理登陸圖片驗證碼欄目 Python 简体版

原文原文鏈接

1.說明html

在作自動化測試的時候，常常會遇到登陸，其中比較麻煩的就是驗證碼的處理，如今比較經常使用的圖形驗證碼，每次刷新，獲得的驗證碼不一致，因此，通常來講，獲取驗證碼圖片有兩種方式：web

（1）拿到驗證碼的圖片連接：src=」http://cli.cncaq.com//login/to_getvericode/52」，可是這種方式有時候行不通。由於有時候會發現當前的驗證碼和經過提取出來的url連接打開的驗證碼，內容是不同的，其內容不斷髮生變化。
（2）利用selenium先進行可視區域的截屏，而後定位驗證碼元素的位置以及大小，而後利用Image（PIL模塊中）進行裁剪，獲得驗證碼圖片，而後送往驗證碼模塊或者打碼平臺處理。

瀏覽器

2.代碼dom

方法一：得到驗證碼圖片地址，下載到本地，而後，進行圖文識別，獲得驗證碼中的內容（可是由於同一個地址，每次訪問得到的驗證碼也不同，因此，當前場景並不適用）函數

import random    # 導入 random(隨機數) 模塊
from selenium_demo3_test.utils.file import *   #引入下載圖片函數所在的py文件

yanzhengma_src = driver.find_element_by_id('imgvercodeLogin').get_attribute('src')  #根據驗證碼img的id得到元素，並使用get_attribute方法獲得圖片的地址
img_url = yanzhengma_src+'.png'        #根據上圖看到，我當前的地址 /52結尾，因此，我這邊添加後綴，方便稍後下載
file_name = random.randint(0, 100000)  #生成一個100000之內的隨機數
file_path = 'img\\login'               #下載驗證碼圖片的時候的保存地址，默認爲當前腳本運行目錄下的file_path文件夾中
save_img(img_url, file_name,file_path) #下載圖片（調用的其它文件中已經寫好的下載方法）_要下載的文件路徑，保存的文件名，保存路徑

下載文件方法：測試

import urllib.request
import os
import random    # 導入 random(隨機數) 模塊

#（要下載的文件地址，保存的文件名，保存地址）
def save_img(img_url,file_name,file_path):
    #保存圖片到磁盤文件夾 file_path中，默認爲當前腳本運行目錄下的 file_path文件夾
    try:
        if not os.path.exists(file_path):
            print('文件夾',file_path,'不存在，從新創建')
            #os.mkdir(file_path)
            os.makedirs(file_path)
        #得到圖片後綴
        file_suffix = os.path.splitext(img_url)[1]
        #拼接圖片名（包含路徑）
        filename = '{}{}{}{}'.format(file_path,os.sep,file_name,file_suffix)
        urllib.request.urlretrieve(img_url,filename=filename)
        print('********************************文件保存成功')
    except IOError as e:
        print('文件操做失敗',e)
    except Exception as e:
        print('錯誤 ：',e)

方法二：截屏，而後裁剪出驗證碼，再進行圖片識別url

#截圖裁剪出驗證碼，並寫入驗證碼輸入框中（保存地址，驗證碼元素，驗證碼輸入框元素）
jietu_xieru(driver,'img\\login\\','imgvercodeLogin','verfieldUserText')

截圖並裁剪圖片以及圖文識別的方法：spa

from  PIL import Image
import random          #導入 random(隨機數) 模塊
import pytesseract     #導入識別驗證碼信息包
import time

#截圖，裁剪圖片並返回驗證碼圖片名稱
# _save_url 保存路徑 ；yuansu 驗證碼元素標識
def image_cj(driver,_save_url,yuansu):
    try:
        _file_name = random.randint(0, 100000)
        _file_name_wz = str(_file_name) + '.png'
        _file_url = _save_url + _file_name_wz
        driver.get_screenshot_as_file(_file_url)  # get_screenshot_as_file截屏
        captchaElem = driver.find_element_by_id(yuansu)  # # 獲取指定元素（驗證碼）
        # 由於驗證碼在沒有縮放，直接取驗證碼圖片的絕對座標;這個座標是相對於它所屬的div的，而不是整個可視區域
        # location_once_scrolled_into_view 拿到的是相對於可視區域的座標  ;  location 拿到的是相對整個html頁面的座標
        captchaX = int(captchaElem.location['x'])
        captchaY = int(captchaElem.location['y'])
        # 獲取驗證碼寬高
        captchaWidth = captchaElem.size['width']
        captchaHeight = captchaElem.size['height']

        captchaRight = captchaX + captchaWidth
        captchaBottom = captchaY + captchaHeight

        imgObject = Image.open(_file_url)  #得到截屏的圖片
        imgCaptcha = imgObject.crop((captchaX, captchaY, captchaRight, captchaBottom))  # 裁剪
        yanzhengma_file_name = str(_file_name) + '副本.png'
        imgCaptcha.save(_save_url + yanzhengma_file_name)
        return  yanzhengma_file_name
    except Exception as e:
        print('錯誤 ：', e)



# 獲取驗證碼圖片中信息（保存地址，要識別的圖片名稱）
def image_text(_save_url,yanzhengma_file_name):
    pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files (x86)\Tesseract-OCR\tesseract'
    yanzhengma_file_url = 'F:\\Python\\workspace\\selenium_demo3_test\\test\\case\\PT\\'+ _save_url
    image = Image.open(yanzhengma_file_url + yanzhengma_file_name)
    text = pytesseract.image_to_string(image)
    print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$圖片中的內容爲：', text)
    return text



#截圖並寫入驗證碼（保存地址，驗證碼元素，驗證碼輸入框元素）
def jietu_xieru(driver,_save_url,yuansu,yanzhma_text):
    # 截圖當前屏幕，並裁剪出驗證碼保存爲:_file_name副本.png，並返回名稱
    yanzhengma_file_name = image_cj(driver,_save_url, yuansu)  ##對頁面進行截圖，彈出框寬高（由於是固定大小，暫時直接寫死了）
    # 得到驗證碼圖片中的內容
    text = image_text(_save_url, yanzhengma_file_name)
    # 寫入驗證碼
    driver.find_element_by_id('verfieldUserText').send_keys(text)
    time.sleep(2)

3.登陸日誌

from selenium import webdriver  #引入瀏覽器驅動
import time
from selenium.webdriver.common.action_chains import ActionChains  # 引入 ActionChains 類進行鼠標事情操做
import pytesseract   #導入識別驗證碼信息包
from PIL import Image
#from .utils.log import logger    引入日誌模塊
import random    # 導入 random(隨機數) 模塊
from selenium_demo3_test.utils.file import *   #引入下載圖片函數所在的py文件
from selenium_demo3_test.utils.image import *   #引入圖片操做
from  selenium_demo3_test.utils.llqi import *   #引入瀏覽器操做
#coding=utf-8


driver = llq_qudong('Chrome')
open_url(driver,'http://www.cncaq.com/')

denlu =driver.find_element_by_id('top_login_a')  #根據id獲取登陸元素
ActionChains(driver).click(denlu).perform()   #點擊登陸,打開彈出層
driver.find_element_by_id('loginNameText').send_keys('188XXXXXXXX')
driver.find_element_by_id('passwordText').send_keys('111111')
time.sleep(2)

#截圖裁剪出驗證碼，並寫入驗證碼輸入框中（保存地址，驗證碼元素，驗證碼輸入框元素）
jietu_xieru(driver,'img\\login\\','imgvercodeLogin','verfieldUserText')
driver.find_element_by_xpath('//*[@id="loginForm"]/div[6]/button').click()  #點擊登陸

_user_name = driver.find_element_by_xpath('//*[@id="userWrap"]/div/p').get_attribute('innerHTML')
user_name = '用戶1'
#判斷不相等，則未登陸成功，則爲驗證碼輸入錯誤（此時，只考慮驗證碼，且圖文識別並不是百分之百正確）一直循環讀取驗證碼輸入
while _user_name != user_name:
    jietu_xieru(driver, 'img\\login\\', 'imgvercodeLogin', 'verfieldUserText')
    driver.find_element_by_xpath('//*[@id="loginForm"]/div[6]/button').click()  # 點擊登陸
    _user_name = driver.find_element_by_xpath('//*[@id="userWrap"]/div/p').get_attribute('innerHTML')
else:
    print('#############################################登陸成功#############################################')
    pass

相關標籤/搜索

每日一句

每一个你不满意的现在，都有一个你没有努力的曾经。