模擬登錄微博相對來講,並不難。驗證碼是常規的5個隨機數字字母的組合,識別起來也比較容易。主要是用到許多Selenium中的知識,如定位標籤、輸入信息、點擊等。如對Selenium的使用並不熟悉,請先移駕《Python爬蟲 | Selenium詳解》。相信你再來看本篇必定能夠看懂。javascript
破解微博登錄的思路:php
(1)使用webdriver打開微博網頁;html
(2)輸入用戶名和密碼,點擊登陸;java
(3)對第二步的結果進行判斷node
(4)本例中增長了登陸成功後得到cookies的狀況python
import requests from requests import RequestException from selenium import webdriver from selenium.common.exceptions import NoSuchElementException, TimeoutException from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from chaojiying import Chaojiying # 超級鷹用戶名、密碼、軟件ID、 CHAOJIYING_USERNAME = CHAOJIYING_PASSWORD = CHAOJIYING_SOFT_ID = CHAOJIYING_KIND = 1006 class LoginWeibo(): def __init__(self, username, password): self.url = 'https://www.weibo.com' self.browser = webdriver.Chrome(executable_path='D:\download\pythonRelated\chromedriver.exe') self.wait = WebDriverWait(self.browser, 20) self.username = username self.password = password self.chaojiying = Chaojiying(CHAOJIYING_USERNAME, CHAOJIYING_PASSWORD, CHAOJIYING_SOFT_ID) # def __del__(self): # self.browser.close() def open(self): """ 打開網頁輸入用戶名密碼 :return: None """ self.browser.get(self.url) username = self.wait.until(EC.presence_of_element_located((By.ID, 'loginname'))) password = self.wait.until(EC.presence_of_element_located((By.NAME, 'password'))) username.send_keys(self.username) password.send_keys(self.password) def get_click_button(self): ''' 找到登陸按鈕 :return: ''' ''' <a href="javascript:void(0)" class="W_btn_a btn_32px " action-type="btn_submit" node-type="submitBtn" suda-data="key=tblog_weibologin3&value=click_sign" tabindex="6"><span node-type="submitStates">登陸</span></a> ''' button = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'W_btn_a'))) return button def login_successfully(self): """ 判斷登錄是否成功 :return: """ ''' 登陸成功才能看到 <em class="W_ficon ficon_mail S_ficon">I</em> ''' try: return bool( WebDriverWait(self.browser, 5).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.ficon_mail'))) ) except TimeoutException: return False def get_click_image(self, name='captcha.png'): """ 獲取驗證碼圖片 :param name: :return: 圖片對象 """ try: ''' <img width="95" height="34" action-type="btn_change_verifycode" node-type="verifycode_image" src="https://login.sina.com.cn/cgi/pin.php?r=88815771&s=0&p=gz-66c0488ef9191010d88bea8c9f3a09fdf3bf"> ''' element = self.wait.until( EC.presence_of_element_located((By.XPATH, '//img[@action-type="btn_change_verifycode"]'))) image_url = element.get_attribute('src') image = get_html(image_url).content with open(name, 'wb') as f: f.write(image) return image except NoSuchElementException: print('') return None def password_error(self): """ 判斷是否密碼錯誤 :return: """ try: element = WebDriverWait(self.browser, 5).until( EC.presence_of_element_located((By.XPATH, '//div[@class="W_layer W_layer_pop"]/div/p/span[2]'))) print(element.text) if element.text == '用戶名或密碼錯誤。': return True except TimeoutException: return False def get_cookies(self): """ 獲取Cookies :return: """ print(self.browser.get_cookies()) return self.browser.get_cookies() def login(self): # 1. 打開網址 輸入用戶名和密碼 self.open() # 2. 點擊登陸按鈕 button = self.get_click_button() button.click() if self.password_error(): print('用戶名或密碼錯誤') return { 'status': 2, 'content': '用戶名或密碼錯誤' } if self.login_successfully(): print('登陸成功') # 獲取賬號對應的cookies cookies = self.get_cookies() return { 'status': 1, 'content': cookies } else: # 有時會須要驗證碼 # 獲取驗證碼圖片 image = self.get_click_image() # 識別驗證碼 result = self.chaojiying.post_pic(image, CHAOJIYING_KIND) print(result) # 輸入驗證碼 ''' <input type="text" class="W_input " maxlength="6" autocomplete="off" value="驗證碼" action-data="text=請輸入驗證碼" action-type="text_copy" name="verifycode" node-type="verifycode" tabindex="3"> ''' verifycode = self.wait.until(EC.presence_of_element_located((By.NAME, 'verifycode'))) verifycode.send_keys(result['pic_str']) # 點擊登陸按鈕 button = self.get_click_button() button.click() if self.login_successfully(): print('登陸成功') # 獲取賬號對應的cookies cookies = self.get_cookies() return { 'status': 1, 'content': cookies } else: self.chaojiying.report_error(result['pic_id']) self.login() # return { # 'status': 3, # 'content': '登陸失敗' # } def get_html(url): try: # 添加User-Agent,放在headers中,假裝成瀏覽器 headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36' } response = requests.get(url, headers=headers) if response.status_code == 200: response.encoding = response.apparent_encoding return response return None except RequestException: return None if __name__ == '__main__': result = LoginWeibo('username', 'password').login()
本篇博文僅供學習交流相關的爬蟲知識,請勿過分使用,若有任何糾紛,與本人無關。(瑟瑟發抖)web