Java實例——基於jsoup的簡單爬蟲


添加代理示例

import java.io.IOException;html

import java.net.Authenticator;java

import java.net.InetSocketAddress;node

import java.net.PasswordAuthentication;服務器

import java.net.Proxy;dom

import org.jsoup.Jsoup;url

import org.jsoup.nodes.Document;.net

public class Demo{代理

// 代理隧道驗證信息htm

final static String ProxyUser = "16KASDA";ip

final static String ProxyPass = "1231321";

// 代理服務器

final static String ProxyHost = "t.16yun.cn";

final static Integer ProxyPort = 31111;

// 設置IP切換頭

final static String ProxyHeadKey = "Proxy-Tunnel";

public static String getUrlProxyContent(String url)

{

Authenticator.setDefault(new Authenticator() {

public PasswordAuthentication getPasswordAuthentication()

{

return new PasswordAuthentication(ProxyUser, ProxyPass.toCharArray());

}

});

// 設置Proxy-Tunnel

Random random = new Random();

int tunnel = random.nextInt(10000);

String ProxyHeadVal = String.valueOf(tunnel);

Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(ProxyHost, ProxyPort));

try

{

// 處理異常、其餘參數

Document doc = Jsoup.connect(url).timeout(3000).header(ProxyHeadKey, ProxyHeadVal).proxy(proxy).get();

if(doc != null) {

System.out.println(doc.body().html());

}

}

catch (IOException e)

{

e.printStackTrace();

}

return null;

}

public static void main(String[] args) throws Exception

{

// 要訪問的目標頁面

String targetUrl = "http://httpbin.org/ip";

getUrlProxyContent(targetUrl);

}}

相關文章
相關標籤/搜索