以前想要爬取人人上的一些信息,由於須要登錄,因此就從網上直接拷貝了一份代碼來實現人人登錄,但發現搜素到的大部分都沒有實現驗證碼驗證。故本身添加了驗證碼驗證,固然本身偷懶,沒有解析圖片,而是直接把圖片保存到本地,還須要手動輸入驗證碼。固然登錄成功以後,就能夠爬取大量的其餘人的信息,這裏只返回的本身的頁面的源碼。 web
public class RenrenLogin {
public static String userName = "xxxx@126.com";
public static String password = "xxxxx";
public static String redirectURL = "http://www.renren.com/home";
public static String renRenLoginURL = "http://www.renren.com/PLogin.do";
private HttpResponse response;
private HttpResponse resPic;
public static String picUrl = "http://icode.renren.com/getcode.do?t=web_login&rnd=Math.random()";
private HttpClient httpClient = new DefaultHttpClient();
public static void main(String []args) throws IOException{
RenrenLogin rl = new RenrenLogin();
rl.printText();
}
private void getPic(){
try {
HttpGet httpget=new HttpGet(picUrl);
resPic = httpClient.execute(httpget);
HttpEntity entity = resPic.getEntity();
InputStream is = entity.getContent();
FileOutputStream out = new FileOutputStream (new File("F:\\new.png"));
int read = 0;
byte[]bytes=new byte[1024];
while((read=is.read(bytes))!=-1)
out.write(bytes,0,read);
is.close();
out.flush();
out.close();
} catch (IllegalStateException e1) {
e1.printStackTrace();
} catch (IOException e1) {
e1.printStackTrace();
}
}
public boolean login() throws IOException{
HttpPost httpPost = new HttpPost(renRenLoginURL);
List<NameValuePair> nvps = new ArrayList<NameValuePair>();
nvps.add(new BasicNameValuePair("origURL", redirectURL));
nvps.add(new BasicNameValuePair("domain", "renren.com"));
nvps.add(new BasicNameValuePair("isplogin", "false"));
nvps.add(new BasicNameValuePair("formName", ""));
nvps.add(new BasicNameValuePair("method", "post"));
nvps.add(new BasicNameValuePair("submit", "登陸人人網"));
nvps.add(new BasicNameValuePair("email", userName));
nvps.add(new BasicNameValuePair("password", password));
nvps.add(new BasicNameValuePair("autoLogin", "true"));
BufferedReader strin=new BufferedReader(new InputStreamReader(System.in));
getPic();//下載圖片並保存到本地,本身需打開圖片查看
System.out.println("請到F盤查看圖片驗證碼並在控制檯中輸入:");
nvps.add(new BasicNameValuePair("icode", strin.readLine().toString()));
nvps.add(new BasicNameValuePair("key_id", "1"));
nvps.add(new BasicNameValuePair("captcha_type", "web_login"));
try {
httpPost.setEntity(new UrlEncodedFormEntity(nvps, HTTP.UTF_8));
response = httpClient.execute(httpPost);
System.out.println(response.getStatusLine());
System.out.println(response.getParams());
System.out.println(response.getFirstHeader("Location").getValue());
} catch (Exception e) {
e.printStackTrace();
return false;
} finally{
httpPost.abort();
}
return true;
}
public String getText(String redirectLocation){
HttpGet httpGet = new HttpGet(redirectLocation);
ResponseHandler<String> responseHandler = new BasicResponseHandler();
String responseBody = "";
try {
responseBody = httpClient.execute(httpGet, responseHandler);
} catch (Exception e) {
e.printStackTrace();
responseBody = null;
} finally {
httpGet.abort();
httpClient.getConnectionManager().shutdown();
}
return responseBody;
}
private String getRedirectLocation() {
Header locationHeader = response.getFirstHeader("Location");
if (locationHeader == null) {
return null;
}
return locationHeader.getValue();
}
private void printText() throws IOException{
if(login()){
String redirectLocation = getRedirectLocation();
if (redirectLocation != null) {
System.out.println(getText(redirectLocation));
}
}
} dom