很久沒有寫博客了,上一篇博客模擬登陸smartqq瀏覽量達到了2000+,也許你們對這些很獵奇吧,鑑於不少緣由,以前smartqq將百度雲連接給關閉了,至於緣由,是由於核心代碼已經給出了,拒絕伸手黨,也是促進爬蟲事業進步的一大原則性問題。好了,廢話很少說,上代碼! html
package com.login;java
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;web
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
/**
* 來自一個有理想的爬蟲工程師
* 爬蟲工程師羣:142351055
* qq:1069478446
* @author lotenr
*
*/
public class Login {
private String iamgeImg="D:\\jj";
String redirect_uri="";
CloseableHttpClient https = HttpClients.createDefault();
public static void main(String[] args)
{
System.setProperty ("jsse.enableSNIExtension", "false");
Login l=new Login();
l.initpage();
String appid=l.getPng1();
if(!"".equals(appid))
{
l.getPng2(appid);
}
for(int i=0;;i++)
{
int cf=l.checklogin(appid);
if(cf==3)
{
System.out.println("已在手機端確認");
break;
}
if(cf==2)
{
appid=l.getPng1();
if(!"".equals(appid))
{
l.getPng2(appid);
}
}
if(cf==1)
{
continue;
}
try {
Thread.sleep(13000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
l.login();apache
}
/**
* 初始化頁面
*/
public void initpage()
{
HttpGet httpPost=new HttpGet("https://wx.qq.com/");app
String html="";
try {
HttpResponse response = https.execute(httpPost);
HttpEntity entitySort = response.getEntity();
html=EntityUtils.toString(entitySort, "utf-8");
//System.out.println(html);
} catch (ClientProtocolException e) {分佈式
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 下載二維碼之獲取參數
*/
public String getPng1()
{
String url="https://login.wx.qq.com/jslogin?appid=wx782c26e4c19acffb&redirect_uri=https%3A%2F%2Fwx.qq.com%2Fcgi-bin%2Fmmwebwx-bin%2Fwebwxnewloginpage&fun=new&lang=zh_CN&_="+System.currentTimeMillis();
HttpGet httpPost=new HttpGet(url);ui
String html="";
try {
HttpResponse response = https.execute(httpPost);
HttpEntity entitySort = response.getEntity();
html=EntityUtils.toString(entitySort, "utf-8");
System.out.println(html);
if(html.indexOf("window.QRLogin.code = 200")!=-1)
{
return html.replace("window.QRLogin.code = 200; window.QRLogin.uuid = \"", "").replace("\";", "");
}
} catch (ClientProtocolException e) {this
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return "";
}
/**
* 下載二維碼
* @param appid
* @return
*/
public void getPng2(String appid)
{
String url="https://login.weixin.qq.com/qrcode/"+appid;
HttpGet httpget = new HttpGet(url);
System.out.println("獲取二維碼:Executing request " + httpget.getURI());//開始
String html="";
FileOutputStream fos;
try {
HttpResponse response = https.execute(httpget);
System.out.println(response.getStatusLine());
InputStream inputStream = response.getEntity().getContent();
File file = new File(this.iamgeImg);
if (!file.exists()) {
file.mkdirs();
}
fos = new FileOutputStream("D:\\jj\\test.jpg");
byte[] data = new byte[1024];
int len = 0;
while ((len = inputStream.read(data)) != -1) {
fos.write(data, 0, len);
}
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
System.out.println(html);
}
public int checklogin(String appid)
{
String url="https://login.wx.qq.com/cgi-bin/mmwebwx-bin/login?loginicon=true&uuid="+appid+"&tip=0&r=123&_="+System.currentTimeMillis();
System.out.println(url);
HttpGet httpPost=new HttpGet(url);
httpPost.setHeader("Host", "login.wx.qq.com");
httpPost.setHeader("Pragma", "no-cache");
httpPost.setHeader("Referer", "https://wx.qq.com/");
httpPost.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36");
httpPost.setHeader("Connection", "keep-alive");
int timeout = 200000;
// System.out.println("Executing request " +
// httpget.getRequestLine());
RequestConfig config = RequestConfig.custom().setSocketTimeout(timeout).setConnectTimeout(timeout)
.setConnectionRequestTimeout(timeout).build();
httpPost.setConfig(config);
String html="";
try {
HttpResponse response = https.execute(httpPost);
HttpEntity entitySort = response.getEntity();
html=EntityUtils.toString(entitySort, "utf-8");
System.out.println(html);
if(html.indexOf("408")!=-1)
{
return 1;
}
if(html.indexOf("400")!=-1)
{url
return 2;
}
if(html.indexOf("200")!=-1)
{
int start=html.indexOf("https");
html=html.substring(start).replace("\";", "");
this.redirect_uri=html;
System.out.println(this.redirect_uri);
return 3;
}
} catch (ClientProtocolException e) {.net
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return 0;
}
public void login()
{
HttpGet httpPost=new HttpGet(this.redirect_uri);
httpPost.setHeader("Host", "wx.qq.com");
httpPost.setHeader("Pragma", "no-cache");
httpPost.setHeader("Referer", "https://wx.qq.com/?&lang=zh_CN");
httpPost.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36");
httpPost.setHeader("Connection", "keep-alive");
String html="";
try {
HttpResponse response = https.execute(httpPost);
HttpEntity entitySort = response.getEntity();
html=EntityUtils.toString(entitySort, "utf-8");
System.out.println(html);
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
} } 好,到這裏,登陸功能已經實現了,是否是很簡單呢?其實爬蟲的難度並非在爬上,而是在爬蟲系統的設計上,如何讓爬蟲高效,健壯,可分佈式,使用起來跟家方便纔是難點