import java.io.BufferedReader; import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; import java.util.regex.Matcher; import java.util.regex.Pattern; public class RegexWeb { /** * 網頁爬蟲 */ public static void main(String[] args) throws Exception { //URL String str_url = "http://tieba.baidu.com/p/2314539885"; //規則 //String regex = "\\w+@\\w+\\.[a-zA-Z]{2,3}"; String regex = "(\\w)+(\\.\\w+)*@(\\w)+((\\.\\w{2,3}){1,3})"; regexForWeb(str_url,regex); } private static void regexForWeb(String str_url,String regex) throws Exception { URL url = new URL(str_url); //打開URL鏈接 URLConnection conn = url.openConnection(); //設置網絡鏈接時間 conn.setConnectTimeout(1000*10); //讀取指定網絡地址中的文件 BufferedReader buf = new BufferedReader(new InputStreamReader(conn.getInputStream())); //把正則表達式轉換成正則對象 Pattern p = Pattern.compile(regex); //每行讀取的內容 String line = null; while((line=buf.readLine())!=null){ //Pattern對象轉換成Matcher對象,操做字符串 Matcher m = p.matcher(line); //部分匹配 while(m.find()){ //返回匹配成功的部分 System.out.println(m.group()); } } } }