驗證碼破解 | Selenium模擬登錄微博

模擬登錄微博相對來講,並不難。驗證碼是常規的5個隨機數字字母的組合,識別起來也比較容易。主要是用到許多Selenium中的知識,如定位標籤、輸入信息、點擊等。如對Selenium的使用並不熟悉,請先移駕《Python爬蟲 | Selenium詳解》。相信你再來看本篇必定能夠看懂。javascript

 

破解微博登錄的思路:php

(1)使用webdriver打開微博網頁;html

(2)輸入用戶名和密碼,點擊登陸;java

(3)對第二步的結果進行判斷node

  • 狀況一:用戶名或者密碼錯誤
  • 狀況二:登陸成功
  • 狀況三:出現驗證碼圖片,需識別
  • 狀況四:其餘錯誤

(4)本例中增長了登陸成功後得到cookies的狀況python

import requests
from requests import RequestException
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from chaojiying import Chaojiying


# 超級鷹用戶名、密碼、軟件ID、
CHAOJIYING_USERNAME =
CHAOJIYING_PASSWORD =
CHAOJIYING_SOFT_ID =
CHAOJIYING_KIND = 1006


class LoginWeibo():

    def __init__(self, username, password):
        self.url = 'https://www.weibo.com'
        self.browser = webdriver.Chrome(executable_path='D:\download\pythonRelated\chromedriver.exe')
        self.wait = WebDriverWait(self.browser, 20)
        self.username = username
        self.password = password
        self.chaojiying = Chaojiying(CHAOJIYING_USERNAME, CHAOJIYING_PASSWORD, CHAOJIYING_SOFT_ID)

    # def __del__(self):
    #     self.browser.close()

    def open(self):
        """
        打開網頁輸入用戶名密碼
        :return: None
        """
        self.browser.get(self.url)
        username = self.wait.until(EC.presence_of_element_located((By.ID, 'loginname')))
        password = self.wait.until(EC.presence_of_element_located((By.NAME, 'password')))
        username.send_keys(self.username)
        password.send_keys(self.password)

    def get_click_button(self):
        '''
        找到登陸按鈕
        :return:
        '''
        '''
        <a href="javascript:void(0)" class="W_btn_a btn_32px " action-type="btn_submit" node-type="submitBtn" suda-data="key=tblog_weibologin3&amp;value=click_sign" tabindex="6"><span node-type="submitStates">登陸</span></a>
        '''
        button = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'W_btn_a')))
        return button

    def login_successfully(self):
        """
        判斷登錄是否成功
        :return:
        """
        '''
        登陸成功才能看到
        <em class="W_ficon ficon_mail S_ficon">I</em>
        '''
        try:
            return bool(
                WebDriverWait(self.browser, 5).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.ficon_mail')))
            )
        except TimeoutException:
            return False

    def get_click_image(self, name='captcha.png'):
        """
        獲取驗證碼圖片
        :param name:
        :return: 圖片對象
        """
        try:
            '''
            <img width="95" height="34" action-type="btn_change_verifycode" node-type="verifycode_image" src="https://login.sina.com.cn/cgi/pin.php?r=88815771&amp;s=0&amp;p=gz-66c0488ef9191010d88bea8c9f3a09fdf3bf">
            '''
            element = self.wait.until(
                EC.presence_of_element_located((By.XPATH, '//img[@action-type="btn_change_verifycode"]')))
            image_url = element.get_attribute('src')
            image = get_html(image_url).content
            with open(name, 'wb') as f:
                f.write(image)
            return image
        except NoSuchElementException:
            print('')
        return None

    def password_error(self):
        """
        判斷是否密碼錯誤
        :return:
        """
        try:
            element = WebDriverWait(self.browser, 5).until(
                EC.presence_of_element_located((By.XPATH, '//div[@class="W_layer W_layer_pop"]/div/p/span[2]')))
            print(element.text)
            if element.text == '用戶名或密碼錯誤。':
                return True
        except TimeoutException:
            return False

    def get_cookies(self):
        """
        獲取Cookies
        :return:
        """
        print(self.browser.get_cookies())
        return self.browser.get_cookies()

    def login(self):

        # 1. 打開網址 輸入用戶名和密碼
        self.open()

        # 2. 點擊登陸按鈕
        button = self.get_click_button()
        button.click()

        if self.password_error():
            print('用戶名或密碼錯誤')
            return {
                'status': 2,
                'content': '用戶名或密碼錯誤'
            }
        if self.login_successfully():
            print('登陸成功')
            # 獲取賬號對應的cookies
            cookies = self.get_cookies()
            return {
                'status': 1,
                'content': cookies
            }
        else:                                   # 有時會須要驗證碼
            # 獲取驗證碼圖片
            image = self.get_click_image()

            # 識別驗證碼
            result = self.chaojiying.post_pic(image, CHAOJIYING_KIND)
            print(result)

            # 輸入驗證碼
            '''
            <input type="text" class="W_input " maxlength="6" autocomplete="off" value="驗證碼" action-data="text=請輸入驗證碼" action-type="text_copy" name="verifycode" node-type="verifycode" tabindex="3">
            '''
            verifycode = self.wait.until(EC.presence_of_element_located((By.NAME, 'verifycode')))
            verifycode.send_keys(result['pic_str'])

            # 點擊登陸按鈕
            button = self.get_click_button()
            button.click()
            if self.login_successfully():
                print('登陸成功')
                # 獲取賬號對應的cookies
                cookies = self.get_cookies()
                return {
                    'status': 1,
                    'content': cookies
                }
            else:
                self.chaojiying.report_error(result['pic_id'])
                self.login()
                # return {
                #     'status': 3,
                #     'content': '登陸失敗'
                # }


def get_html(url):
    try:
        # 添加User-Agent,放在headers中,假裝成瀏覽器
        headers = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
        }
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            response.encoding = response.apparent_encoding
            return response
        return None
    except RequestException:
        return None


if __name__ == '__main__':
    result = LoginWeibo('username', 'password').login()

 

本篇博文僅供學習交流相關的爬蟲知識,請勿過分使用,若有任何糾紛,與本人無關。(瑟瑟發抖)web

相關文章
相關標籤/搜索