項目有個需求是須要捕獲某個頁面的數據(後端完成),由於以前用過phantom,因此就堅決果斷的選擇了它,關於phantom的介紹,安裝和簡單使用百度很容易找到,這裏就再也不贅述了。 以後就開始大刀闊斧的碼起來了,興致沖沖的利用網上找到的截取某網頁的代碼(見附錄1)測試。web
'use strict'; var page = require('webpage').create(), system = require('system'), args = system.args, page_url = 'http://www.oschina.net/', filename = '../phantomjs_temp/capture0.png'; function capture(url,filename,callback){ console.log("ready to capture"); page.open(url,function(status){ if("success" === status){ console.log("open page succeed"); onPageReady(url,filename,callback); }else{ console.log("open page failed"); closePhantom(); } }); } function onPageReady(url,filename,callback){ page.render(filename); closePhantom(); } function closePhantom(){ console.log("page is closing..."); page.close(); console.log("phantom is closing..."); phantom.exit(1); } capture(page_url,filename);
沒問題。而後發現了問題, 我oschina明明登陸了,爲何這裏是未登陸狀態呢(由於項目需求截圖的頁面也有權限驗證),查閱相關資料以後,找到了解決方案 方案1:將用戶登陸的cookie加入到phantomjs中(登陸oschina後打開控制檯,查看請求裏面的cookie,有一條是oscid的) 以下圖 代碼以下後端
'use strict'; var page = require('webpage').create(), system = require('system'), args = system.args, page_url = 'http://www.oschina.net/', filename = '../phantomjs_temp/capture1.png'; function capture(url,filename,callback){ console.log("ready to capture"); page.open(url,function(status){ if("success" === status){ console.log("open page succeed"); onPageReady(url,filename,callback); }else{ console.log("open page failed"); closePhantom(); } }); } function onPageReady(url,filename,callback){ page.render(filename); closePhantom(); } function closePhantom(){ console.log("page is closing..."); page.close(); console.log("phantom is closing..."); phantom.exit(1); } phantom.addCookie({"name":"oscid","value":"mljV7ERwRhP3eH62HnFisZP1qaXlr2txLKufSq%2FUuhCTXQq%2B1RKVm0vp96Iu7MfX6O9lOOYfQG3DmlglDvlk8YvI0DSaPefEGJtGLkSfdZQ%2F5qN340KTUg0PiaZwDvHaucuWHExhfuavuZfodZNJKtGWRFkZxL6V","domain":'www.oschina.net'}); capture(page_url,filename);
運行,binggo,完成。 方案2:打開oschina的登陸頁面,用phantom模擬登錄過程,而後截圖 依然沒問題api
'use strict'; var page = require('webpage').create(), system = require('system'), args = system.args, page_url = 'http://www.oschina.net/', login_url = 'https://www.oschina.net/home/login?goto_page=http%3A%2F%2Fwww.oschina.net%2F', filename = '../phantomjs_temp/capture2.png'; function login(){ page.open(login_url,function(status){ if("success" === status){ page.evaluate(function(){ document.querySelector("#userMail").value = 'your user name'; document.querySelector("#userPassword").value = 'your password'; document.querySelector(".btn-login").click(); }); setTimeout('print_cookies()',15000); } }); } function capture(url,filename,callback){ console.log("ready to capture"); page.open(url,function(status){ if("success" === status){ console.log("open page succeed"); onPageReady(url,filename,callback); }else{ console.log("open page failed"); closePhantom(); } }); } function print_cookies(){ console.log("running print_cookies"); for(var i in page.cookies){ console.log(JSON.stringify(page.cookies[i])); } capture(page_url,filename); } function onPageReady(url,filename,callback){ page.render(filename); closePhantom(); } function closePhantom(){ console.log("page is closing..."); page.close(); console.log("phantom is closing..."); phantom.exit(1); } login();
這裏已經完成了一大筆工做了,長長的出口氣吧~服務器
可是我項目裏面有另一個問題就是異步請求特別多,截圖的時候雖然頁面加載完成了,可是部分異步請求數據還沒返回,沒有渲染到頁面裏,因此截圖會有部分loading。。cookie
找了不少資料,有個拙劣的解決方法,就是在截圖前在wait一段時間(本身根據實際狀況約定,幾秒到幾分鐘均可以),可是這明顯不合理,時間定的過短,可能仍是有上面的問題,定的太長,可能頁面在就等着你截圖了,你還在那傻傻的wait,多不合適啊。最合適的不過頁面全部資源和元素都完成了返回和渲染的時刻,這個時刻怎麼獲得呢。dom
這時候就發現百度好坑。搜索的結果全是重複的,還不能解決這個問題 因此我就把目光放到了QQ羣裏,這裏感謝highchart中文站長的幫助,他告訴我能夠用document.readyState是否是等於‘complete’來判斷,測試一下確實能夠啦 然而屢次測試仍是存在巧合,當我在服務器端將異步請求的處理方法增長sleep阻塞後,這部分就又回到了loading狀態,氣氣氣氣氣。。。異步
屢次翻閱資料,終於在stackoverflow上面找到個相似的問題 http://stackoverflow.com/questions/11340038/phantomjs-not-waiting-for-full-page-load測試
最後Dave的方法解決了個人問題,就是用page.onResourceReceived 和 page.onResourceRequested 一個是page發送請求執行的callback 一個是page接收到返回執行的callback API:http://phantomjs.org/api/webpage/ui
每次requested的時候增長一個請求,每次received的時候減小一次請求,當全部請求都獲得反饋了,那麼他們差值不就是0了嗎?this
懷着忐忑的心情測試瞭如下,oh,yeah!終於解決了,代碼以下
var page = require('webpage').create(), system = require('system'), args = system.args, page_url = 'url***********', filename = '../phantomjs_temp/'+Math.random()+'.png', countTotal = 1000, seconds = 1000, requestIDArr = []; function capture(url,filename,callback){ console.log("ready to capture"); page.open(url,function(status){ if("success" === status){ console.log("open page succeed"); checkReadyState(url,filename,callback); }else{ console.log("open page failed"); closePhantom(); } }); } function checkReadyState(url,filename,callback,count){ var count = count || 0; console.log("this is the "+count+"time check ready state"); var timeout = setTimeout(function(){ if(requestIDArr.length==0){ onPageReady(url,filename,callback); }else{ console.log("still waiting for resoinse id is "+requestIDArr.join(",")) if(count>countTotal){ clearTimeout(timeout); console.log("has tryed "+(countTotal*seconds/1000)+" seconds,but still failed get correct data"); closePhantom(); return false; } count++; checkReadyState(url,filename,callback,count); } },seconds); } function onPageReady(url,filename,callback){//頁面徹底加載完了(包含異步請求的數據的渲染也完成了) var scroll = page.evaluate(function(){ var mainDiv = document.querySelector(".main"); return {"height":mainDiv.scrollHeight,"width":mainDiv.scrollWidth}; }); page.clipRect.height = scroll.height || page.clipRect.height; page.clipRect.width = scroll.width || page.clipRect.width; page.viewportSize.width = scroll.width || page.viewportSize.width; page.render(filename); closePhantom(); } function closePhantom(){ console.log("page is closing..."); page.close(); console.log("phantom is closing..."); phantom.exit(1); } page.viewportSize = { width: 400, height: 550 }; page.clipRect = { top: 95, left: 191, width: 1100, height: 2200 }; page.onResourceRequested = function (request) { requestIDArr.push(request.id); console.log("add is ",request.id); }; page.onResourceReceived = function (response) { spliceRequestID(response.id); }; function spliceRequestID(id){ var spliceTimeout = setTimeout(function(){ var index = requestIDArr.indexOf(id); if(index>=0){ requestIDArr.splice(index,1); console.log("delete is ",id); }else{ spliceRequestID(id); } },100); } phantom.addCookie({"name":"JSESSIONID","value":"00AF0CF1FB333A5268A9CD5C8FF0487A","domain":'192.168.12.35','path':'/local_adreport/'}); capture(page_url,filename);
至此,整個探究就結束了,可能後面還會遇到其餘問題,可是同樣須要耐心解決;