看到後不少人問,過濾網頁信息什麼的。其實用HTMLPARSER很是方便,如今沒事要作個用代理訪問網頁的軟件,首先是提取各個免費代理網站的代理:用到HTMLPARSER
1
package
com.pmjava.search;
2
![](http://static.javashuo.com/static/loading.gif)
3
import
java.io.BufferedReader;
4
import
java.io.File;
5
import
java.io.FileReader;
6
import
java.io.FileWriter;
7
import
org.htmlparser.Parser;
8
import
org.htmlparser.filters.NodeClassFilter;
9
import
org.htmlparser.tags.TableTag;
10
import
org.htmlparser.util.NodeList;
11
![](http://static.javashuo.com/static/loading.gif)
12
![](http://static.javashuo.com/static/loading.gif)
public
class
Search
...
{
13![](http://static.javashuo.com/static/loading.gif)
14![](http://static.javashuo.com/static/loading.gif)
/** *//**
15
* @param args
16
* @author Qing
17
* @throws Exception
18
*/
19![](http://static.javashuo.com/static/loading.gif)
public static void main(String[] args) throws Exception ...{
20
String[] url= new String[4] ;
21
url[0] = "[url]http://www.cnproxy.com/proxy1.html[/url]";
22
String currentUrl = url[0] ;
23
String[] encoding = new String[4] ;
24
encoding[1] = "gb2312";
25
String currentEncoding = encoding[1] ;
26
Parser parser = new Parser() ;
27
parser.setURL(currentUrl) ;
28
parser.setEncoding(currentEncoding) ;
29
NodeClassFilter f=new NodeClassFilter(TableTag.class);
30
NodeList nodelist = parser.extractAllNodesThatMatch(f);
31
String list=null;
32
String []Temp ;
33
String []Temp1;
34![](http://static.javashuo.com/static/loading.gif)
if (nodelist.size()>0)...{
35![](http://static.javashuo.com/static/loading.gif)
for (int i = 0; i < nodelist.size(); i++) ...{
36
TableTag linkTag = (TableTag)nodelist.elementAt(i);
37
list=linkTag.getChildrenHTML().replace("<tr>", "").replace("<td>", "").replace("<td>", "").replace("</td>", "").replace("</tr>", "").replace("<SCRIPT type=text/javascript>document.write(", "").replace(")</SCRIPT>", "").replace("p_w_picpathURl=","").replace("<td width=\"140\">IP:Port<td width=\"40\">Type<td width=\"90\">Speed<td width=\"160\"> Country/Area","").replace("", "");
38
}
39
File file=new File("f://2.txt");
40
FileWriter writer=new FileWriter(file,true);
41
writer.write(list);
42
writer.close();
43
String readFile,writerFile = null,t,t1;
44
FileReader br=new FileReader(file);
45
BufferedReader bufread = new BufferedReader(br);
46
String port;
47![](http://static.javashuo.com/static/loading.gif)
String []port2=...{z,m,k,l,d,x,i,w,q,b};
48![](http://static.javashuo.com/static/loading.gif)
while ((readFile = bufread.readLine()) != null) ...{
49
if(readFile.length()>1)
50![](http://static.javashuo.com/static/loading.gif)
...{
51
Temp=readFile.split("HTTP");
52
int a=Temp[0].trim().indexOf(":");
53
port=Temp[0].trim().substring(a,Temp[0].trim().length()).replace("\"", "").replace("+", "").replace(":","");
54
char []port1=port.toCharArray();
55
String temp1 = null,temp2 = "";
56
for(int j=0;j<port1.length;j++)
57![](http://static.javashuo.com/static/loading.gif)
...{
58
System.out.println(port1[j]);
59
for(int e=0;e<port2.length;e++)
60![](http://static.javashuo.com/static/loading.gif)
...{
61
if(String.valueOf(port1[j]).equals(port2[e]))
62![](http://static.javashuo.com/static/loading.gif)
...{
63
temp2=temp2+temp1;
64
}
65
}
66
}
67
}
68
}
69
bufread.close();
70
br.close();
71
}
72
73
74
}
75![](http://static.javashuo.com/static/loading.gif)
76![](http://static.javashuo.com/static/loading.gif)
77![](http://static.javashuo.com/static/loading.gif)
78
}
79