一:alex更濃的雞湯html
alex爲咱們分享了他本身的職業發展歷程,爲咱們點出一些職場上的重要關注點,非常收益,尤爲是那一句‘你爲何不受到關注’印象深入,最後alex指出思惟的升級和改變給本身的重要意義,深受啓發,感謝!git
二:Selenium基礎知識點的學習github
Selenium是一個第三方模塊,能夠徹底模擬用戶在瀏覽器上操做(在瀏覽器上點點點)。web
1,安裝chrome
- pip install seleniumcanvas
2,優缺點api
-無需查看和肯定請求頭請求體等數據細節,直接模擬人點擊瀏覽器的行爲瀏覽器
-但效率不高cookie
3,依賴驅動:app
Firefox
https://github.com/mozilla/geckodriver/releases
Chrome
http://chromedriver.storage.googleapis.com/index.html
4,與selenium相關的基本操做
from selenium import webdriver # 配置驅動
#驅動必定要本身下載並放在一個目錄,不然會出錯
option = webdriver.ChromeOptions() driver = webdriver.Chrome('/Users/wupeiqi/drivers/chromedriver', chrome_options=option) # 1. 控制瀏覽器打開指定頁面 driver.get("https://dig.chouti.com/all/hot/recent/1") # 2. 找到登陸按鈕 btn_login = driver.find_element_by_xpath('//*[@id="login-link-a"]') # 3. 點擊按鈕 btn_login.click() # 4. 找到手機標籤 input_user = driver.find_element_by_xpath('//*[@id="mobile"]') # 5. 找到密碼標籤 input_pwd = driver.find_element_by_xpath('//*[@id="mbpwd"]') # 6. 輸入用戶名 input_user.send_keys('13121758648') # 7. 輸入密碼 input_pwd.send_keys('woshiniba') # 8. 點擊登陸按鈕 input_submit = driver.find_element_by_xpath( '//*[@id="footer-band"]/div[5]/div/div/div[1]/div[2]/div[4]/div[2]/div/span[1]') input_submit.click() print(driver.get_cookies()) # # 9. 點擊跳轉 # news = driver.find_element_by_xpath('//*[@id="newsContent20646261"]/div[1]/a[1]') # # news.click() # driver.execute_script("arguments[0].click();", news) # 10.管理瀏覽器 # driver.close()
三:破解路飛官網滑動驗證碼
peiqi老師爲咱們帶來的精彩的講解,從__main__的主函數調用開始,先講了圖片的截取和距離的測算,接下來分析了怎麼模擬人類行爲的滑動過程,經過速度和加速度的空值實現,並且會故意製造匹配以後的小幅振動行爲,最後點擊肯定就能夠破解該驗證碼,重點是像素的選擇和速度的調節,感謝!
from selenium import webdriver from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait import os import shutil from PIL import Image import time def get_snap(driver): driver.save_screenshot('full_snap.png') page_snap_obj = Image.open('full_snap.png') return page_snap_obj def get_image(driver): img = driver.find_element_by_class_name('geetest_canvas_img') time.sleep(2) location = img.location size = img.size left = location['x'] top = location['y'] right = left + size['width'] bottom = top + size['height'] page_snap_obj = get_snap(driver) image_obj = page_snap_obj.crop((left * 2, top * 2, right * 2, bottom * 2)) # image_obj.show() with open('code.png', 'wb') as f: image_obj.save(f, format='png') return image_obj def get_distance(image1, image2): # start = 0 # threhold = 70 # for i in range(start, image1.size[0]): # for j in range(0, image1.size[1]): # rgb1 = image1.load()[i, j] # rgb2 = image2.load()[i, j] # res1 = abs(rgb1[0] - rgb2[0]) # res2 = abs(rgb1[1] - rgb2[1]) # res3 = abs(rgb1[2] - rgb2[2]) # # print(res1,res2,res3) # if not (res1 < threhold and res2 < threhold and res3 < threhold): # print(111111, i, j) # return i - 13 # print(2222, i, j) # return i - 13 start = 0 threhold = 70 v = [] for i in range(start, image1.size[0]): for j in range(0, image1.size[1]): rgb1 = image1.load()[i, j] rgb2 = image2.load()[i, j] res1 = abs(rgb1[0] - rgb2[0]) res2 = abs(rgb1[1] - rgb2[1]) res3 = abs(rgb1[2] - rgb2[2]) if not (res1 < threhold and res2 < threhold and res3 < threhold): print(i) if i not in v: v.append(i) stop = 0 for i in range(0, len(v)): val = i + v[0] if v[i] != val: stop = v[i] break width = stop - v[0] print(stop, v[0], width) return width def get_tracks(distance): import random exceed_distance = random.randint(0, 5) distance += exceed_distance # 先滑過一點,最後再反着滑動回來 v = 0 t = 0.2 forward_tracks = [] current = 0 mid = distance * 3 / 5 while current < distance: if current < mid: a = random.randint(1, 3) else: a = random.randint(1, 3) a = -a s = v * t + 0.5 * a * (t ** 2) v = v + a * t current += s forward_tracks.append(round(s)) # 反着滑動到準確位置 v = 0 t = 0.2 back_tracks = [] current = 0 mid = distance * 4 / 5 while abs(current) < exceed_distance: if current < mid: a = random.randint(1, 3) else: a = random.randint(-3, -5) a = -a s = -v * t - 0.5 * a * (t ** 2) v = v + a * t current += s back_tracks.append(round(s)) return {'forward_tracks': forward_tracks, 'back_tracks': list(reversed(back_tracks))} def crack(driver): # 破解滑動認證 # 一、點擊按鈕,獲得沒有缺口的圖片 button = driver.find_element_by_xpath('//*[@id="embed-captcha"]/div/div[2]/div[1]/div[3]') button.click() # 二、獲取沒有缺口的圖片 image1 = get_image(driver) # 三、點擊滑動按鈕,獲得有缺口的圖片 button = driver.find_element_by_class_name('geetest_slider_button') button.click() # 四、獲取有缺口的圖片 image2 = get_image(driver) # 五、對比兩種圖片的像素點,找出位移 distance = get_distance(image1, image2) print(distance) # # 六、模擬人的行爲習慣,根據總位移獲得行爲軌跡 tracks = get_tracks(int(distance / 2)) # 七、按照行動軌跡先正向滑動,後反滑動 button = driver.find_element_by_class_name('geetest_slider_button') ActionChains(driver).click_and_hold(button).perform() # 正常人類老是自信滿滿地開始正向滑動,自信地表現是瘋狂加速 for track in tracks['forward_tracks']: ActionChains(driver).move_by_offset(xoffset=track, yoffset=0).perform() # 結果傻逼了,正常的人類停頓了一下,回過神來發現,臥槽,滑過了,而後開始反向滑動 time.sleep(0.5) for back_track in tracks['back_tracks']: ActionChains(driver).move_by_offset(xoffset=back_track, yoffset=0).perform() # # # 小範圍震盪一下,進一步迷惑極驗後臺,這一步能夠極大地提升成功率 ActionChains(driver).move_by_offset(xoffset=3, yoffset=0).perform() ActionChains(driver).move_by_offset(xoffset=-3, yoffset=0).perform() # # 成功後,騷包人類總喜歡默默地欣賞一下本身拼圖的成果,而後依依不捨地鬆開那隻髒手 time.sleep(0.5) ActionChains(driver).release().perform() def login_luffy(username, password): driver = webdriver.Chrome('/Users/wupeiqi/drivers/chromedriver') driver.set_window_size(960, 800) try: # 一、輸入帳號密碼回車 driver.implicitly_wait(3) driver.get('https://www.luffycity.com/login') input_username = driver.find_element_by_xpath('//*[@id="router-view"]/div/div/div[2]/div[2]/input[1]') input_pwd = driver.find_element_by_xpath('//*[@id="router-view"]/div/div/div[2]/div[2]/input[2]') input_username.send_keys(username) input_pwd.send_keys(password) # 二、破解滑動認證 crack(driver) time.sleep(10) # 睡時間長一點,肯定登陸成功 finally: pass # driver.close() if __name__ == '__main__': login_luffy(username='wupeiqi', password='123123123')
四:總結
經過selenium模擬人類單機瀏覽器的行爲,破解滑動驗證碼,讓我有get到了爬蟲的一個本領,首先須要掌握selenium點擊行爲的通常模式,最後能夠好好的參考peiqi老師的代碼,做爲模板用到之後的工做中,頗有幫助,謝謝!下一步想再學學其餘驗證碼的破解方式,多多益善!