HttpClient+jsoup登陸+解析 163郵箱

找了幾個,只有這個靠譜,用的是httpclient4,另外還須要commons-lang和jsoup包javascript

http://jsoup.org/
 

 

 

http://www.oschina.net/code/snippet_128625_12592?p=2

 

————————————————————————————————————————————————————————————html

 

如題:
支用用jsoup解析頁面很是方便,當時jsoup作登陸就比較麻煩,反正我不知道怎麼作。
HttpClient作登陸比較方便所以用HttpClient摸得登陸獲取html內容用jsoup作解析是一個很是完美的組合
替換本身的163郵箱看一下吧。

 

HttpClientHelper 封裝

 

 

import java.io.IOException;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;

import org.apache.commons.lang.StringUtils;
import org.apache.http.Header;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.conn.ClientConnectionManager;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.scheme.SchemeRegistry;
import org.apache.http.conn.ssl.SSLSocketFactory;
import org.apache.http.cookie.Cookie;
import org.apache.http.impl.client.BasicCookieStore;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicHeader;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.protocol.BasicHttpContext;
import org.apache.http.protocol.HttpContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
 * HttpClient 封裝
 * 
 * 
@author  bangis.wangdf
 
*/
public  class HttpClientHelper {

     private  static Logger    LOG              = LoggerFactory.getLogger(HttpClientHelper. class);
     private HttpClient       httpclient       =  new DefaultHttpClient();
     private HttpContext      localContext     =  new BasicHttpContext();
     private BasicCookieStore basicCookieStore =  new BasicCookieStore();                           //  cookie存儲用來完成登陸後記錄相關信息

     private  int              TIME_OUT         = 3;                                               //  鏈接超時時間

     public HttpClientHelper() {
        instance();
    }

     /**
     * 啓用cookie存儲
     
*/
     private  void instance() {
        httpclient.getParams().setIntParameter("http.socket.timeout", TIME_OUT * 1000);
        localContext.setAttribute("http.cookie-store", basicCookieStore); //  Cookie存儲
    }

     /**
     * 
@param  ssl boolean=true 支持https網址,false同默認構造
     
*/
     public HttpClientHelper( boolean ssl) {
        instance();
         if (ssl) {
             try {
                X509TrustManager tm =  new X509TrustManager() {

                     public  void checkClientTrusted(X509Certificate[] xcs, String string)  throws CertificateException {
                    }

                     public  void checkServerTrusted(X509Certificate[] xcs, String string)  throws CertificateException {
                    }

                     public X509Certificate[] getAcceptedIssuers() {
                         return  null;
                    }
                };
                SSLContext ctx = SSLContext.getInstance("TLS");
                ctx.init( nullnew TrustManager[] { tm },  null);
                SSLSocketFactory ssf =  new SSLSocketFactory(ctx);
                ClientConnectionManager ccm = httpclient.getConnectionManager();
                SchemeRegistry sr = ccm.getSchemeRegistry();
                sr.register( new Scheme("https", ssf, 443));
            }  catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

     /**
     * 
@param  url
     * 
@param  headers 指定headers
     * 
@return
     
*/
     public HttpResult get(String url, Header... headers) {
        HttpResponse response;
        HttpGet httpget =  new HttpGet(url);
         if (headers !=  null) {
             for (Header h : headers) {
                httpget.addHeader(h);
            }
        }  else { //  如不指定則使用默認
            Header header =  new BasicHeader(
                                            "User-Agent",
                                            "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1;  .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; InfoPath.2)");
            httpget.addHeader(header);
        }
        HttpResult httpResult = HttpResult.empty();
         try {
            response = httpclient.execute(httpget, localContext);
            httpResult =  new HttpResult(localContext, response);
        }  catch (IOException e) {
            LOG.error(" get ", e);
            httpget.abort();
        }
         return httpResult;
    }

     public HttpResult post(String url, Map<String, String> data, Header... headers) {
        HttpResponse response;
        HttpPost httppost =  new HttpPost(url);
        String contentType =  null;
         if (headers !=  null) {
             int size = headers.length;
             for ( int i = 0; i < size; ++i) {
                Header h = (Header) headers[i];
                 if (!(h.getName().startsWith("$x-param"))) {
                    httppost.addHeader(h);
                }
                 if ("Content-Type".equalsIgnoreCase(h.getName())) {
                    contentType = h.getValue();
                }
            }

        }
         if (contentType !=  null) {
            httppost.setHeader("Content-Type", contentType);
        }  else  if (data !=  null) {
            httppost.setHeader("Content-Type", "application/x-www-form-urlencoded");
        }

        List<NameValuePair> formParams =  new ArrayList<NameValuePair>();
         for (String key : data.keySet()) {
            formParams.add( new BasicNameValuePair(key, (String) data.get(key)));
        }
        HttpResult httpResult = HttpResult.empty();
         try {
            UrlEncodedFormEntity entity =  new UrlEncodedFormEntity(formParams, "UTF-8");
            httppost.setEntity(entity);
            response = httpclient.execute(httppost, localContext);
            httpResult =  new HttpResult(localContext, response);
        }  catch (IOException e) {
            LOG.error(" post ", e);
            httppost.abort();
        }  finally {
        }
         return httpResult;
    }

     public String getCookie(String name, String... domain) {
        String dm = "";
         if (domain !=  null && domain.length >= 1) {
            dm = domain[0];
        }
         for (Cookie c : basicCookieStore.getCookies()) {
             if (StringUtils.equals(name, c.getName()) && StringUtils.equals(dm, c.getDomain())) {
                 return c.getValue();
            }
        }
         return  null;
    }

     public  void pringCookieAll() {
         for (Cookie c : basicCookieStore.getCookies()) {
            System.out.println(c);
        }
    }
}

 

對HttpClient返回的結果進一步封裝 

 

import java.io.IOException;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;

import org.apache.commons.lang.StringUtils;
import org.apache.http.Header;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.conn.ClientConnectionManager;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.scheme.SchemeRegistry;
import org.apache.http.conn.ssl.SSLSocketFactory;
import org.apache.http.cookie.Cookie;
import org.apache.http.impl.client.BasicCookieStore;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicHeader;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.protocol.BasicHttpContext;
import org.apache.http.protocol.HttpContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
 * 對HttpClient返回的結果進一步封裝
 * 
@author  bangis.wangdf
 *
 
*/
public  class HttpResult {
    
     private  static Logger LOG = LoggerFactory.getLogger(HttpResult. class);
    
     private  static Pattern headerCharsetPattern = Pattern.compile(
            "charset=((gb2312)|(gbk)|(utf-8))", 2);
     private  static Pattern pattern = Pattern
            .compile(
                    "<meta[^>]*content=(['\"])?[^>]*charset=((gb2312)|(gbk)|(utf-8))\\1[^>]*>",
                    2);
     private String headerCharset;
     private String headerContentType;
     private String headerContentEncoding;
     private List<Header> headers;
     private String metaCharset;
     private  byte[] response;
     private String responseUrl;
     private  int statuCode = -1;
     private  static  final  int BUFFER_SIZE = 4096;

     public  static HttpResult empty() {
         return  new HttpResult();
    }

     public String getHeaderCharset() {
         return  this.headerCharset;
    }

     public String getHeaderContentType() {
         return  this.headerContentType;
    }

     public  final List<Header> getHeaders() {
         return  this.headers;
    }

     public String getHtml() {
         try {
             return getText();
        }  catch (UnsupportedEncodingException e) {
            LOG.error("[AGDS-SPIDER]" + e.getMessage(), e);
        }
         return "";
    }
    
     public String getHtml(String encoding) {
         try {
             return getText(encoding);
        }  catch (UnsupportedEncodingException e) {
            LOG.error("[AGDS-SPIDER]" + e.getMessage(), e);
        }
         return "";
    }

     public String getMetaCharset() {
         return  this.metaCharset;
    }

     public  byte[] getResponse() {
         return Arrays.copyOf( this.response,  this.response.length);
    }

     public String getResponseUrl() {
         return  this.responseUrl;
    }

     public  int getStatuCode() {
         return  this.statuCode;
    }

     public String getText()  throws UnsupportedEncodingException {
         return getText("");
    }

     public String getText(String encoding)  throws UnsupportedEncodingException {
         if ( this.response ==  null){
             return "";
        }
        String encodingStr = encoding;
         if (StringUtils.isBlank(encoding)){
            encodingStr =  this.metaCharset;
        }

         if (StringUtils.isBlank(encoding)){
            encodingStr =  this.headerCharset;
        }

         if (StringUtils.isBlank(encoding)){
            encodingStr = "UTF-8";
        }

         return  new String( this.response, encodingStr);
    }

     private String getCharsetFromMeta() {
        StringBuilder builder =  new StringBuilder();
        String charset = "";
         for ( int i = 0; (i <  this.response.length) && ("".equals(charset)); ++i) {
             char c = ( charthis.response[i];
             switch (c) {
             case '<':
                builder.delete(0, builder.length());
                builder.append(c);
                 break;
             case '>':
                 if (builder.length() > 0){
                    builder.append(c);
                }
                String meta = builder.toString();

                 if (meta.toLowerCase().startsWith("<meta")){
                    charset = getCharsetFromMeta(meta);
                }
                 break;
             case '=':
             default:
                 if (builder.length() > 0){
                    builder.append(c);
                }
            }

        }

         return charset;
    }

     private String getCharsetFromMeta(String meta) {
         if (StringUtils.isBlank(meta)){
             return "";
        }
        Matcher m = pattern.matcher(meta);
         if (m.find()){
             return m.group(2);
        }
         return "";
    }

     private  void getHttpHeaders(HttpResponse httpResponse) {
        String headerName = "";
        String headerValue = "";
         int index = -1;

        Header[] rspHeaders = httpResponse.getAllHeaders();
         for ( int i = 0; i < rspHeaders.length; ++i) {
            Header header = rspHeaders[i];
             this.headers.add(header);

            headerName = header.getName();
             if ("Content-Type".equalsIgnoreCase(headerName)) {
                headerValue = header.getValue();
                index = headerValue.indexOf(';');
                 if (index > 0){
                     this.headerContentType = headerValue.substring(0, index);
                }
                Matcher m = headerCharsetPattern.matcher(headerValue);
                 if (m.find()){
                     this.headerCharset = m.group(1);
                }
            }

             if ("Content-Encoding".equalsIgnoreCase(headerName)){
                 this.headerContentEncoding = header.getValue();
            }
        }
    }

     private  void getResponseUrl(HttpContext httpContext) {
        HttpHost target = (HttpHost) httpContext
                .getAttribute("http.target_host");

        HttpUriRequest req = (HttpUriRequest) httpContext
                .getAttribute("http.request");

         this.responseUrl = target.toString() + req.getURI().toString();
    }

     public HttpResult(HttpContext httpContext, HttpResponse httpResponse) {
         this.headers =  new ArrayList<Header>();

         this.statuCode = httpResponse.getStatusLine().getStatusCode();

         if (httpContext !=  null) {
            getResponseUrl(httpContext);
        }

         if (httpResponse !=  null) {
            getHttpHeaders(httpResponse);
             try {
                 if (("gzip".equalsIgnoreCase( this.headerContentEncoding))
                        || ("deflate".equalsIgnoreCase( this.headerContentEncoding))) {
                    GZIPInputStream is =  new GZIPInputStream(httpResponse.getEntity().getContent());
                    ByteArrayOutputStream os =  new ByteArrayOutputStream();
                     byte[] buffer =  new  byte[BUFFER_SIZE];
                     int count = 0;
                     while ((count = is.read(buffer)) > 0){
                        os.write(buffer, 0, count);
                    }
                     this.response = os.toByteArray();
                    os.close();
                    is.close();
                } else{
                     this.response = EntityUtils.toByteArray(httpResponse.getEntity());
                }
            }  catch (Exception e) {
                LOG.error("[AGDS-SPIDER]" + e.getMessage(), e);
            }
             if ( this.response !=  null){
                 this.metaCharset = getCharsetFromMeta();
            }
        }
    }

     private HttpResult() {
    }
}

 

Mail163Test 

import java.text.MessageFormat;
import java.util.HashMap;
import java.util.Map;

import org.apache.http.Header;
import org.apache.http.message.BasicHeader;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

public  class Mail163Test {
     public  static  final String SESSION_INIT = "http://mail.163.com";
     public  static  final String LOGIN_URL = "https://ssl.mail.163.com/entry/coremail/fcg/ntesdoor2?df=webmail163&from=web&funcid=loginone&iframe=1&language=-1&net=t&passtype=1&product=mail163&race=-2_-2_-2_db&style=-1&uid=";
     public  static  final String MAIL_LIST_URL = "http://twebmail.mail.163.com/js4/s?sid={0}&func=mbox:listMessages";
     /**
     * 
@param  args
     
*/
     public  static  void main(String[] args) {
        HttpClientHelper hc =  new HttpClientHelper( true);
        HttpResult lr = hc.get(SESSION_INIT); //  目的是獲得 csrfToken 相似
        
//  拼裝登陸信息
        Map<String, String> data =  new HashMap<String, String>();
        data.put("url2", "http://mail.163.com/errorpage/err_163.htm");
        data.put("savelogin", "0");
        data.put("username", "bangis");
        data.put("password", "*******");
        lr = hc.post(LOGIN_URL, data,setHeader()); //  執行登陸
        Document doc = Jsoup.parse(lr.getHtml());
        String sessionId=doc.select("script").html().split("=")[2];
        sessionId = sessionId.substring(0,sessionId.length()-2);
        data.clear();
        data.put("var", "<?xml version=\"1.0\"?><object><int name=\"fid\">1</int><boolean name=\"skipLockedFolders\">false</boolean><string name=\"order\">date</string><boolean name=\"desc\">true</boolean><int name=\"start\">0</int><int name=\"limit\">50</int><boolean name=\"topFirst\">true</boolean><boolean name=\"returnTotal\">true</boolean><boolean name=\"returnTag\">true</boolean></object>");
        lr = hc.post(MessageFormat.format(MAIL_LIST_URL, sessionId),
                data,setQueryHeader(sessionId)); //  執行登陸
        System.out.println(lr.getHtml());
    }
    
     public  static Header[] setHeader() {
        Header[] result = { 
                 new BasicHeader("User-Agent","Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)"), 
                 new BasicHeader("Accept-Encoding","gzip, deflate"),
                 new BasicHeader("Accept-Language","zh-CN"),
                 new BasicHeader("Cache-Control","no-cache"),
                 new BasicHeader("Connection","Keep-Alive"),
                 new BasicHeader("Content-Type","application/x-www-form-urlencoded"),
                 new BasicHeader("Host","ssl.mail.163.com"),
                 new BasicHeader("Referer","http://mail.163.com/"),
                 new BasicHeader("Accept","text/html, application/xhtml+xml, */*")
                
        };
         return result;
    }
     public  static Header[] setQueryHeader(String sessionId) {
        Header[] result = { 
                 new BasicHeader("User-Agent","Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)"), 
                 new BasicHeader("Accept-Encoding","gzip, deflate"),
                 new BasicHeader("Accept-Language","zh-CN"),
                 new BasicHeader("Cache-Control","no-cache"),
                 new BasicHeader("Connection","Keep-Alive"),
                 new BasicHeader("Content-Type","application/x-www-form-urlencoded"),
                 new BasicHeader("Host","twebmail.mail.163.com"),
                 new BasicHeader("Referer","http://twebmail.mail.163.com/js4/index.jsp?sid="+sessionId),
                 new BasicHeader("Accept","text/javascript")
                
        };
         return result;     } }
相關文章
相關標籤/搜索