以前有寫過關於puppeteer的相關文章前端
puppeteer初探node
沒想到你是這樣的SSRgithub
前一段時間,LZ又接到一個需求,要爬取某快遞公司網站的訂單數據,起初以爲不就是爬一下數據嘛,雖然nodejs玩的不是特別溜,但爬一些簡單數據仍是難不倒我這種戰五渣的。canvas
當我打開網站,輸入數據,準備來一波頁面結構分析的時候,忽然間跳出來一個滑塊驗證碼。臥槽......bash
WTF,你讓我爬個鳥啊.....less
盯着滑塊驗證碼瞅了兩天,終於我得出一個結論 滑塊驗證碼阻止了人類文明的進步
!async
天天早上產品色笑眯眯來問我進度的時候,個人心裏都是崩潰的ide
難受歸難受,但業務仍是要作的。最後,我想到了以前用puppeteer開發的模擬cas(單點登陸)來解決我司某些應用在開發、測試環境自動登陸的功能。如今我就以一種狀況爲例,來看下怎麼用node+puppeteer高效的破解滑塊驗證碼。post
以前有一兄弟在掘金上寫過用puppeteer破解滑塊驗證碼, 接下來咱們就用一些另外的思路去破解
這裏咱們也之前端網爲例:
const puppeteer = require("puppeteer");
const fs = require("fs");
const path = require("path");
const pixels = require("image-pixels");
const resemble = require("resemblejs");
let page = null;
const bgImg = path.resolve(__dirname, "bg.png");
const fullbgImg = path.resolve(__dirname, "fullbg.png");
async function run() {
const browser = await puppeteer.launch({
headless: false
});
page = await browser.newPage();
// 打開前端網
await page.goto("https://www.qdfuns.com/");
await page.waitForSelector(".hand");
await page.click("a[data-type=login]");
const geetest_btn = ".geetest_btn";
await page.waitForSelector(geetest_btn);
await page.click(geetest_btn);
await page.waitFor(1000);
// 獲取滑動距離
async function getDistance() {
// 獲取canvas
let { bg, fullbg } = await page.evaluate(() => {
const fullbg = document.querySelector(".geetest_canvas_fullbg");
const bg = document.querySelector(".geetest_canvas_bg");
return {
bg: bg.toDataURL(),
fullbg: fullbg.toDataURL()
};
});
bg = bg.replace(/^data:image\/\w+;base64,/, "");
fullbg = fullbg.replace(/^data:image\/\w+;base64,/, "");
var bgDataBuffer = new Buffer(bg, "base64");
var fullbgDataBuffer = new Buffer(fullbg, "base64");
fs.writeFileSync(bgImg, bgDataBuffer);
fs.writeFileSync(fullbgImg, fullbgDataBuffer);
// 經過resemble比較背景圖和缺口圖的不一樣
resemble(bgImg)
.compareTo(fullbgImg)
.ignoreColors()
.onComplete(async function(data) {
fs.writeFileSync(path.resolve(__dirname, `diff.png`), data.getBuffer());
});
var { data } = await pixels(path.resolve(__dirname, `diff.png`), {
cache: false
});
// 獲取缺口距離左邊的作小位置,即計爲須要滑動的距離
let arr = [];
for (let i = 10; i < 150; i++) {
for (let j = 80; j < 220; j++) {
var p = 260 * i + j;
p = p << 2;
if (data[p] === 255 && data[p + 1] === 0 && data[p + 2] === 255) {
arr.push(j);
break;
}
}
}
return Math.min(...arr);
}
const distance = await getDistance();
const button = await page.$(".geetest_slider_button");
const box = await button.boundingBox();
const axleX = Math.floor(box.x + box.width / 2);
const axleY = Math.floor(box.y + box.height / 2);
await btnSlider(distance);
// 滑動滑塊
async function btnSlider(distance) {
await page.mouse.move(axleX, axleY);
await page.mouse.down();
await page.waitFor(200);
await page.mouse.move(box.x + distance / 4, axleY, { steps: 20 });
await page.waitFor(200);
await page.mouse.move(box.x + distance / 3, axleY, { steps: 18 });
await page.waitFor(350);
await page.mouse.move(box.x + distance / 2, axleY, { steps: 15 });
await page.waitFor(400);
await page.mouse.move(box.x + (distance / 3) * 2, axleY, { steps: 15 });
await page.waitFor(350);
await page.mouse.move(box.x + (distance / 4) * 3, axleY, { steps: 10 });
await page.waitFor(350);
await page.mouse.move(box.x + distance + 30, axleY, { steps: 10 });
await page.waitFor(300);
await page.mouse.up();
await page.waitFor(1000);
const text = await page.evaluate(() => {
return document.querySelector(".geetest_result_box").innerText;
});
console.log(text);
let step = 0;
if (text) {
// 若是失敗從新獲取滑塊
if (
text.includes("怪物吃了拼圖") ||
text.includes("拖動滑塊將懸浮圖像正確拼合")
) {
await page.waitFor(2000);
await page.click(".geetest_refresh_1");
await page.waitFor(1000);
step = await getDistance();
await btnSlider(step);
} else if (text.includes("速度超過")) {
console.log("success");
}
}
}
}
run();
複製代碼
執行該程序,控制檯輸出以下(運氣好的話,可能一次就過了,具體要看中間的處理過程怎麼優化求解)
這裏面,須要注意如下幾點
缺口圖存在干擾缺口圖,resemble在比對的時候須要會獲得兩個缺口,這裏目前沒有一個很好的辦法來肯定到底哪一個缺口是咱們所須要的(下面咱們會提到一個針對該問題的方法來避免該干擾項)
滑動的時候須要控制下滑動速度,具體怎麼個滑動法,那就仁者見仁智者見智了
你覺得這樣就結束了
不少狀況下滑塊驗證碼並不會給咱們完整的背景圖,這時候咱們該怎麼有效的去定位缺口呢,在這裏咱們可使用gm 把咱們的背景圖片模糊如下,而後在用resemblejs
去比對下兩個圖片,可是此時圖片會有不少地方比對出不一樣,此時咱們能夠獲取到小滑塊圖片距離父輩元素的位置,藉此來減小像素比對範圍(這能夠有效解決咱們上面所提到的避免干擾項問題)
const puppeteer = require("puppeteer");
const fs = require("fs");
const path = require("path");
const pixels = require("image-pixels");
const resemble = require("resemblejs");
const gm = require("gm");
let page = null;
const bgImg = path.resolve(__dirname, "bg.png");
const bgBlurImg = path.resolve(__dirname, "bgBlur.png");
const bgDiffImg = path.resolve(__dirname, "bgDiff.png");
async function run() {
const browser = await puppeteer.launch({
headless: false
});
page = await browser.newPage();
await page.goto(
"https://x.tongdun.cn/onlineExperience/slidingPuzzle?source=baidu&plan=%E5%8F%8D%E6%AC%BA%E8%AF%88&unit=%E6%99%BA%E8%83%BD%E9%AA%8C%E8%AF%81&keyword=%E6%99%BA%E8%83%BD%E9%AA%8C%E8%AF%81%E7%A0%81&e_creative=24659987438&e_adposition=cl1&e_keywordid=101045415224&e_keywordid2=101045415224&audience=236369"
);
await page.waitForSelector("#loginBtn");
await page.click("#loginBtn");
const slidetrigger = ".td-pop-slidetrigger";
await page.waitForSelector(slidetrigger);
await page.click(slidetrigger);
await page.waitFor(1000);
const slideIdentity = ".td-pop-slide-identity";
await page.waitFor(slideIdentity);
// 獲取小滑塊的top值,來減小比對範圍
const top = await page.evaluate(() => {
const identity = document.querySelector(".td-pop-slide-identity");
return identity.offsetTop;
});
async function getDistance() {
// 獲取缺口圖片
let { bg } = await page.evaluate(() => {
const bg = document.querySelector(".td-bg-img");
return {
bg: bg.toDataURL()
};
});
bg = bg.replace(/^data:image\/\w+;base64,/, "");
var bgDataBuffer = new Buffer(bg, "base64");
fs.writeFileSync(bgImg, bgDataBuffer);
// 圖片模糊
gm(bgImg)
.blur(1)
.write(bgBlurImg, function(err) {
if (!err) console.log("done");
});
// 圖片對比
resemble(bgImg)
.compareTo(bgBlurImg)
.ignoreColors()
.onComplete(async function(data) {
fs.writeFileSync(bgDiffImg, data.getBuffer());
});
var { data } = await pixels(bgDiffImg, {
cache: false
});
let arr = [];
// 比對範圍內的像素點
for (let i = top; i < top + 44; i++) {
for (let j = 60; j < 320; j++) {
var p = 320 * i + j;
p = p << 2;
if (data[p] === 255 && data[p + 1] === 0 && data[p + 2] === 255) {
arr.push(j);
break;
}
}
}
const { maxStr } = getMoreNum(arr);
return Number(maxStr);
}
const distance = await getDistance();
const button = await page.$(slidetrigger);
const box = await button.boundingBox();
const axleX = Math.floor(box.x + box.width / 2);
const axleY = Math.floor(box.y + box.height / 2);
console.log(distance, "distance");
console.log(box.x + distance);
await btnSlider(distance);
async function btnSlider(distance) {
await page.mouse.move(axleX, axleY);
await page.mouse.down();
await page.waitFor(200);
await page.mouse.move(box.x + distance / 4, axleY, { steps: 20 });
await page.waitFor(200);
await page.mouse.move(box.x + distance / 3, axleY, { steps: 18 });
await page.waitFor(350);
await page.mouse.move(box.x + distance / 2, axleY, { steps: 15 });
await page.waitFor(400);
await page.mouse.move(box.x + (distance / 3) * 2, axleY, { steps: 15 });
await page.waitFor(350);
await page.mouse.move(box.x + (distance / 4) * 3, axleY, { steps: 10 });
await page.waitFor(350);
await page.mouse.move(box.x + distance + 20, axleY, { steps: 10 });
await page.waitFor(300);
await page.mouse.up();
await page.waitFor(1000);
}
}
run();
function getMoreNum(arr) {
var obj = {};
var arr1 = [];
for (var i = 0; i < arr.length; i++) {
if (arr1.indexOf(arr[i]) == -1) {
obj[arr[i]] = 1;
arr1.push(arr[i]);
} else {
obj[arr[i]]++;
}
}
var max = 0;
var maxStr;
for (var i in obj) {
if (max < obj[i]) {
max = obj[i];
maxStr = i;
}
}
return { max, maxStr };
}
複製代碼
該示例沒添加錯誤以後重滑邏輯
此種方法存在的問題
本身模糊化背景圖片就行像素比較,成功率較低,需優化(亦能夠經過比對的圖片經過其灰度值來鎖定區域)
以上,咱們介紹了兩種方法來破解解決滑塊驗證碼。此外,LZ還嘗試了使用圖片二值化方法來進行圖片缺口的定位,該方法的成功率遠高於第二種方法,具體實現方法就不寫了,讀者能夠自行探索哈。
示例代碼都可在 github查看