java+phantomjs實現動態網頁抓取

1.下載地址:http://phantomjs.org/download.htmlhtml

2.java代碼java

public     void   getHtml(String url)
{
	 HTML="";
    String jsPath = "C:\\phantomjs\\examples\\myjs.js";
    String exePath = "C:\\phantomjs\\bin\\phantomjs.exe";
    System.out.println(jsPath);
    System.out.println(exePath);
    Runtime rt = Runtime.getRuntime();
	Process p;
	try {
		p = rt.exec(exePath + " " + jsPath + " " + url);

	InputStream is = p.getInputStream();
	BufferedReader br = new BufferedReader(new InputStreamReader(is));
	StringBuffer sbf = new StringBuffer();
	String tmp = "";
	while ((tmp = br.readLine()) != null)
	{
		sbf.append(tmp);
	}
	HTML=sbf.toString();
 
  is.close();
  br.close();
  sbf=null;
  is=null;
  br=null;
	} catch (IOException e) {
	 
		e.printStackTrace();
	}
 
}

 3.jsweb

   

var page = require('webpage').create(), system = require('system'), t, address; page.settings.loadImages = false;  //爲了提高加載速度,不加載圖片
page.settings.resourceTimeout = 10000;//超過10秒放棄加載 //此處是用來設置截圖的參數。不截圖沒啥用
page.viewportSize = { width: 1280, height: 800 }; block_urls = ['baidu.com'];//爲了提高速度,屏蔽一些須要時間長的。好比百度廣告
page.onResourceRequested = function(requestData, request){ for(url in block_urls) { if(requestData.url.indexOf(block_urls[url]) !== -1) { request.abort(); return; } } } address = system.args[1]; page.open(address, function(status) { if (status !== 'success') { console.log('FAIL to load the address'); } else { console.log(page.content); setTimeout(function(){ phantom.exit(); }, 6000); } phantom.exit(); });
相關文章
相關標籤/搜索