11年前正抓緊高考,記得當時對計算機特別着迷,基本每週都要買一份電腦報,介紹計算機硬件,軟件方面的東西,上課也偷偷的拿出來你看。html
無心中接觸到了互聯網開發語言java,便下載了一些尚學堂的基礎視頻教程,主講老師 馬士兵,講課特幽默,聽他講課也是一種享受,從那時就走上了it之路。java
最近想作一個數據採集器,須要用到正則表達式,也想回味聽一下當年馬士兵老師講課的視頻,便整理了以下java正則表達式學習筆記linux
1.正則表達式基礎正則表達式
2.郵件地址頁面抓取微信
3.代碼統計app
正則表達式基礎:學習
1 public static void main(String[] args) { 2 //簡單認識java正則表達式 3 p("abc".matches("..."));//一個"."表示一個字符 4 p("a8729a".replaceAll("\\d", "-"));//替換,java裏面用兩個\\表明一個\ 5 6 //編譯後執行 7 Pattern p = Pattern.compile("[a-z]{3}"); 8 Matcher m = p.matcher("fgh"); 9 p(m.matches()); 10 p("fgh".matches("[a-z]{3}"));//上面能夠這樣寫 11 12 13 //初步認識 . * + ? 14 p("a".matches("."));//.表示一個字符 15 p("aa".matches("aa")); 16 p("aaaa".matches("a*"));//*表示0個或多個 17 p("".matches("a*")); 18 p("aaaa".matches("a+"));//+表示1個或多個 19 p("aaaa".matches("a?"));//?表示0個或1個 20 p("".matches("a?")); 21 p("a".matches("a?")); 22 p("214523145234532".matches("\\d{3,100}"));//數字 3位至100位 23 p("192.168.0.aaa".matches("\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}"));//ip地址驗證 24 p("192".matches("[0-2][0-9][0-9]")); 25 26 27 //範圍 28 p("a".matches("[abc]"));//取其中abc裏面的一個字符 29 p("a".matches("[^abc]"));//非abc任意字符 30 p("A".matches("[a-zA-Z]")); 31 p("A".matches("[a-z]|[A-Z]"));//或 32 p("A".matches("[a-z[A-Z]]")); 33 p("R".matches("[A-Z&&[RFG]]"));//取交集 34 35 36 //類型 認識\s \w \d \b \ 37 /** 38 \s:表示\t\n\x0B\f\r and \S:表示非\s 39 \w:表示 [a-zA-Z_0-9]構成單詞字符 and \W:表示非\w 40 \d:表示0-9 and \D:表示非0-9 41 */ 42 p(" \n\r\t".matches("\\s{4}")); 43 p(" ".matches("\\S")); 44 p("a_8".matches("\\w{3}")); 45 p("abc888&^%".matches("[a-z]{1,3}\\d+[&^#%]+")); 46 p("\\".matches("\\\\")); 47 48 49 //POSIX Style linux操做系統標準寫法 50 p("a".matches("\\p{Lower}")); 51 52 //邊界處理 ^ $ \b:單詞邊界 (^位於[]是取反) 53 p("hello sir".matches("^h.*"));//^ 開頭的爲h 54 p("hello sir".matches(".*ir$"));//$ 前面有0-多個而且以ir結尾 55 p("hello sir".matches("^h[a-z]{1,3}o\\b.*")); 56 p("hellosir".matches("^h[a-z]{1,3}o\\b.*")); 57 //whilte lines 58 p(" \n".matches("^[\\s&&[^\\n]]*\\n$")); 59 p("aaa 8888c".matches(".*\\d{4}.")); 60 p("aaa 8888c".matches(".*\\b\\d{4}.")); 61 p("aaa8888c".matches(".*\\d{4}.")); 62 p("aaa8888c".matches(".*\\b\\d{4}.")); 63 64 65 //email 66 p("asdfasdfsafsf@dsdfsdf.com".matches("[\\w[.-]]+@[\\w[.-]]+\\.[\\w]+")); 67 68 /*//查找方式 matches find lookingAt 69 Pattern p = Pattern.compile("\\d{3,5}"); 70 String s = "123-34345-234-00"; 71 Matcher m = p.matcher(s); 72 p(m.matches()); 73 m.reset(); 74 p(m.find()); 75 p(m.start() + "-" + m.end()); 76 p(m.find()); 77 p(m.start() + "-" + m.end()); 78 p(m.find()); 79 p(m.start() + "-" + m.end()); 80 p(m.find()); 81 //p(m.start() + "-" + m.end()); 82 p(m.lookingAt()); 83 p(m.lookingAt()); 84 p(m.lookingAt()); 85 p(m.lookingAt());*/ 86 87 88 //字符串替換 replacement 89 /* 90 Pattern p = Pattern.compile("java", Pattern.CASE_INSENSITIVE); 91 Matcher m = p.matcher("java Java JAVa JaVa IloveJAVA you hateJava afasdfasdf"); 92 StringBuffer buf = new StringBuffer(); 93 int i=0; 94 while(m.find()) { 95 i++; 96 if(i%2 == 0) { 97 m.appendReplacement(buf, "java"); 98 } else { 99 m.appendReplacement(buf, "JAVA"); 100 } 101 } 102 m.appendTail(buf); 103 p(buf); 104 */ 105 106 //分組 group 107 /* 108 Pattern p = Pattern.compile("(\\d{3,5})([a-z]{2})"); 109 String s = "123aa-34345bb-234cc-00"; 110 Matcher m = p.matcher(s); 111 while(m.find()) { 112 p(m.group()); 113 } 114 */ 115 116 //qulifiers 117 /* 118 Pattern p = Pattern.compile(".{3,10}+[0-9]"); 119 String s = "aaaa5bbbb68"; 120 Matcher m = p.matcher(s); 121 if(m.find()) 122 p(m.start() + "-" + m.end()); 123 else 124 p("not match!"); 125 */ 126 127 //non-capturing groups 128 /* 129 Pattern p = Pattern.compile(".{3}(?=a)"); 130 String s = "444a66b"; 131 Matcher m = p.matcher(s); 132 while(m.find()) { 133 p(m.group()); 134 } 135 */ 136 137 //back refenrences 138 /* 139 Pattern p = Pattern.compile("(\\d(\\d))\\2"); 140 String s = "122"; 141 Matcher m = p.matcher(s); 142 p(m.matches()); 143 */ 144 145 //flags簡寫 146 //Pattern p = Pattern.compile("java", Pattern.CASE_INSENSITIVE); 147 // p("Java".matches("(?i)(java)")); 148 } 149 150 public static void p(Object o) { 151 System.out.println(o); 152 }
郵件地址頁面抓取網站
1 public static void main(String[] args) { 2 try { 3 BufferedReader br = new BufferedReader(new FileReader("D:\\share\\courseware\\1043633.html")); 4 String line = ""; 5 while((line=br.readLine()) != null) { 6 parse(line); 7 } 8 } catch (FileNotFoundException e) { 9 // TODO Auto-generated catch block 10 e.printStackTrace(); 11 } catch (IOException e) { 12 // TODO Auto-generated catch block 13 e.printStackTrace(); 14 } 15 } 16 17 private static void parse(String line) { 18 Pattern p = Pattern.compile("[\\w[.-]]+@[\\w[.-]]+\\.[\\w]+"); 19 Matcher m = p.matcher(line); 20 while(m.find()) { 21 System.out.println(m.group()); 22 } 23 }
代碼統計spa
1 static long normalLines = 0; 2 static long commentLines = 0; 3 static long whiteLines = 0; 4 5 public static void main(String[] args) { 6 File f = new File("D:\\share\\JavaProjects\\TankWar1.9.11\\src"); 7 File[] codeFiles = f.listFiles(); 8 for(File child : codeFiles){ 9 if(child.getName().matches(".*\\.java$")) { 10 parse(child); 11 } 12 } 13 14 System.out.println("normalLines:" + normalLines); 15 System.out.println("commentLines:" + commentLines); 16 System.out.println("whiteLines:" + whiteLines); 17 18 } 19 20 private static void parse(File f) { 21 BufferedReader br = null; 22 boolean comment = false; 23 try { 24 br = new BufferedReader(new FileReader(f)); 25 String line = ""; 26 while((line = br.readLine()) != null) { 27 line = line.trim(); 28 if(line.matches("^[\\s&&[^\\n]]*$")) { 29 whiteLines ++; 30 } else if (line.startsWith("/*") && !line.endsWith("*/")) { 31 commentLines ++; 32 comment = true; 33 } else if (line.startsWith("/*") && line.endsWith("*/")) { 34 commentLines ++; 35 } else if (true == comment) { 36 commentLines ++; 37 if(line.endsWith("*/")) { 38 comment = false; 39 } 40 } else if (line.startsWith("//")) { 41 commentLines ++; 42 } else { 43 normalLines ++; 44 } 45 } 46 } catch (FileNotFoundException e) { 47 e.printStackTrace(); 48 } catch (IOException e) { 49 e.printStackTrace(); 50 } finally { 51 if(br != null) { 52 try { 53 br.close(); 54 br = null; 55 } catch (IOException e) { 56 e.printStackTrace(); 57 } 58 } 59 } 60 }
我的網站:https://www.liyuan3210.com操作系統