抓jsoup_01_方案代碼

一、方案選擇:html

  1.一、HttpClient庫 獲取 原始的 json數據node

  1.二、JSON庫 取得 咱們須要的HTML內容ajax

  1.三、使用 jsoup 解析 咱們取得的HTML內容apache

 

二、不直接使用 jsoup,緣由:json

  2.一、它會自動補全 HTML的頭和尾(<html/><body/>等),jsoup中沒有這個服務器

    處理方法:手動指定 Parser.xmlParser()app

  2.二、若是屬性 沒有用 2個雙引號包裹起來,它會將 這2個雙引號補全... 這個功能 在jsoup裏面無法關閉...工具

 

三、示例代碼:post

  3.一、工具類測試

package z_utils;

import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils;

public class TzHttpClient
{
    public static void main(String[] args) throws Exception
    {
        String strRtn = PostZ(
            "http://ajax.mianbao99.com/vod-showlist-id-8-order-time-c-3719-p-2.html",
            null,
            true);
        System.out.println(strRtn);
    }
    
// ***

    @SuppressWarnings("deprecation")
    public static String PostZ(String _strUrl, String _strParam, boolean _bNeedResponse) throws Exception
    {
        //post請求返回結果
        DefaultHttpClient httpClient = new DefaultHttpClient();
        HttpPost method = new HttpPost(_strUrl);
        if (null != _strParam)
        {
            //解決中文亂碼問題
            StringEntity entity = new StringEntity(_strParam, "utf-8");
            entity.setContentEncoding("UTF-8");
            entity.setContentType("application/json");
            method.setEntity(entity);
        }
        HttpResponse result = httpClient.execute(method);
        /**請求發送成功,並獲得響應**/
        if (result.getStatusLine().getStatusCode() == 200)
        {
            if (! _bNeedResponse)
                return null;
            String str = EntityUtils.toString(result.getEntity());
            //System.out.println(str);
            return str;
        }
        return null;
    }
    
    @SuppressWarnings("deprecation")
    public static String GetZ(String _strUrl) throws Exception
    {
        DefaultHttpClient client = new DefaultHttpClient();
        //發送get請求
        HttpGet request = new HttpGet(_strUrl);
        HttpResponse response = client.execute(request);

        /**請求發送成功,並獲得響應**/
        if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK)
        {
            /**讀取服務器返回過來的json字符串數據**/
            String strResult = EntityUtils.toString(response.getEntity());
            //System.out.println(strResult);
            return strResult;
        }
        System.out.println("get請求提交失敗:" + _strUrl);
        return null;
    }
}

  3.二、測試 功能代碼

package test;

import org.jsoup.Connection;
///import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.*;
import org.jsoup.parser.Parser;

import net.sf.json.*;
import z_utils.TzHttpClient;

public class Ttest01
{
    public static void main(String[] args) throws Exception
    {
        String strHtml = TzHttpClient.GetZ("http://ajax.mianbao99.com/vod-showlist-id-8-order-time-c-3719-p-2.html");
        JSONObject jsoupObj = JSONObject.fromObject(strHtml);
        if (! jsoupObj.containsKey("ajaxtxt"))
            return;
        
//        Connection conn = null;
//        conn.parser(Parser.xmlParser());
        String strAjaxtxt = jsoupObj.getString("ajaxtxt");
        Document doc =  Jsoup.parse(strAjaxtxt, "", Parser.xmlParser());
        System.out.println(doc.html());
    }

}

 

四、

相關文章
相關標籤/搜索