正則表達式

時間 2019-11-17
原文原文鏈接
  1 package com.yyq;
  2 
  3 import java.io.BufferedReader;
  4 import java.io.FileNotFoundException;
  5 import java.io.FileReader;
  6 import java.util.Arrays;
  7 import java.util.TreeSet;
  8 import java.util.regex.*; 
  9 /*
 10  *     重要的，必須掌握的 校驗。
 11  *  對郵件地址的校驗。。
 12  *  網頁爬蟲（蜘蛛）
 13  *  
 14  */
 15 /*
 16  * 正則表達式的功能。
 17  * 1.匹配：matches() 真假
 18  * 2.切割：split() 切割後的
 19  * 3.替換：replace replaceAll() 替換後的
 20  * 4.獲取： 將字符串中的符合規則的字串取出
 21  * 1，  將正則表達是封裝成對象。
 22  * 2， 讓正則對象和要操做的字符串相關聯
 23  * 3， 經過引擎對符合規則的字串進行操做，好比取出。
 24  * 其實 string類中的matches 方法，用的就是pattern 和Matcher
 25  * 對象來完成的，只不過被string方法封裝後，用起來較爲簡單，可是功能卻單一。
 26  *
 27  * 
 28  * 正則表達式： 符合必定規則的表達式。
 29  * 做用：用於專門操做字符串
 30  * 特色： 用於一些特定的符號來表示一些代碼操做。這樣就簡化書寫。
 31  * 學習正則表達式 就是學習符號的使用。
 32  *  matches 方法： 用規則匹配整個字符串，只要有一次不匹配，返回false
 33      好處：簡化對字符串的複雜操做
 34      弊端：閱讀性差
 35  */
 36 /*
 37  *     對QQ號碼進行校驗，
 38  * 要求;5-15 0 不能開頭，只能是數字
 39  * 具體操做功能： 匹配： string matches 方法
 40  * [] 校驗的特色： 只校驗一個位置上的字符，
 41  * [a-zA-Z] 全部字符。
 42  * [a-z&&[^m-p]]
 43  * [a-z&&[^bc]]
 44  * 字符類 
 45         [abc] a、b 或 c（簡單類） 
 46         [^abc] 任何字符，除了 a、b 或 c（否認） 
 47         [a-zA-Z] a 到 z 或 A 到 Z，兩頭的字母包括在內（範圍） 
 48         [a-d[m-p]] a 到 d 或 m 到 p：[a-dm-p]（並集） 
 49         [a-z&&[def]] d、e 或 f（交集） 
 50         [a-z&&[^bc]] a 到 z，除了 b 和 c：[ad-z]（減去） 
 51         [a-z&&[^m-p]] a 到 z，而非 m 到 p：[a-lq-z]（減去） 
 52           
 53         預約義字符類 
 54         . 任何字符（與行結束符可能匹配也可能不匹配） 
 55         \d 數字：[0-9] 
 56         \D 非數字： [^0-9] 
 57         \s 空白字符：[ \t\n\x0B\f\r] 
 58         \S 非空白字符：[^\s] 
 59         \w 單詞字符：[a-zA-Z_0-9] 
 60         \W 非單詞字符：[^\w] 
 61 
 62  * Greedy 數量詞 
 63         X? X，一次或一次也沒有 
 64         X* X，零次或屢次 
 65         X+ X，一次或屢次 
 66         X{n} X，剛好 n 次 
 67         X{n,} X，至少 n 次 
 68         X{n,m} X，至少 n 次，可是不超過 m 次 
 69         特殊構造（非捕獲） 
 70         (?:X) X，做爲非捕獲組 
 71         (?idmsux-idmsux)  Nothing，可是將匹配標誌i d m s u x on - off 
 72         (?idmsux-idmsux:X)   X，做爲帶有給定標誌 i d m s u x on - off 
 73         的非捕獲組  (?=X) X，經過零寬度的正 lookahead 
 74         (?!X) X，經過零寬度的負 lookahead 
 75         (?<=X) X，經過零寬度的正 lookbehind 
 76         (?<!X) X，經過零寬度的負 lookbehind 
 77         (?>X) X，做爲獨立的非捕獲組 
 78     邊界匹配器 
 79     ^ 行的開頭 
 80     $ 行的結尾 
 81     \b 單詞邊界 
 82     \B 非單詞邊界 
 83     \A 輸入的開頭 
 84     \G 上一個匹配的結尾 
 85     \Z 輸入的結尾，僅用於最後的結束符（若是有的話） 
 86     \z 輸入的結尾 
 87  */
 88 
 89 public class MyDemo {
 90     private static String ss = "1111qqq11111111";
 91     public static void main(String[] args) throws Exception {
 92         //splitDemo();
 93         //String str = "aaaaadddffdfdfsdddfaffqwerqsddddafaf"; // 將疊詞替換成#號
 94         //replaceAllDemo(str,"(.)\\1+","$1");  // 將多個變成一個， 獲取組 使用$符號
 95         // 拿前一個第一個組 $1 沒有大括號
 96         //String str = "wer4564664646ssdffff4646546466dddf46498ddd54"; // 將疊詞替換成#號
 97         //replaceAllDemo(str,"\\d{5,}","#");
 98         //checkMain();
 99         getMails();
100     }
101     // 到底用四種功能中的哪個呢？ 或者使用哪幾個呢？？
102     // 1.匹配：判斷對錯
103     // 2.替換
104     // 3.切割： 獲取規則之外的字串
105     // 4.獲取：獲取符合規則的字串。
106     
107     // 192.068.001.254 102.049.023.013 10.10.10.10 2.2.2.2 8.109.90.30
108     // 將ip 地址進行地址段順序的排序。
109     // 還按照字符串天然順序，只要讓他們每一段都是3位便可
110     /*
111      * 1. 按照每一段須要的最多的0進行補齊，那麼每一段就會至少有三位。
112      * 2. 將每一段只保留三位，這樣全部的ip地址都是每一段三位。
113      */
114     // 對郵件地址的校驗
115     public static void checkMain(){
116         String mail = "1015604150@qq.com.cn.abc";
117         // 較爲精確的匹配。
118         String reg = "[a-zA-Z0-9_]{6,12}@[a-zA-Z0-9]+(\\.[a-zA-Z]+){1,3}";
119         //相對不精確的匹配。
120         //reg = "\\w+@\\w(\\.\\w)+";
121         System.out.println(mail.matches(reg));
122     }
123     public static void test_2(){
124         String ip = "192.068.001.254 102.049.023.013 10.10.10.10 2.2.2.2 8.109.90.30";
125 
126         ip = ip.replaceAll("(\\d+)", "00$1");
127         ip = ip.replaceAll("0*(\\d{3})", "$1");
128         ip = ip.replaceAll("0*(\\d+)", "$1");
129         String [] arr = ip.split(" +");
130         /*Arrays.sort(arr);  // 數組排序。
131         for(String s : arr){
132             System.out.println(s);
133         }*/
134         TreeSet<String> ts = new TreeSet<String>();
135         for(String s:arr){
136             ts.add(s);
137         }
138         for(String s : ts){
139             System.out.println(s);
140         }
141     }
142     public static void test_1(){
143         String str = "我我...........我我要...要要...學學學....編編編.編..程....程";
144         /*
145          * 操做： 替換 將一個字符串編程另外一個字符串。
146          * 1.先將點去掉
147          * 2.將重複內容編程單個內容。
148          */
149         str = str.replaceAll("\\.+", "");
150         
151         // 去除疊詞  （.）\\1+
152         str = str.replaceAll("(.)\\1+", "$1");
153         System.out.println(str);
154     }
155     // 按照疊詞進行切割。
156     public static void splitDemo(){
157         //String str = "zhangsan.lisi.wangwu";
158         //String reg = "[ ]+"; // 按照多個空格來進行切割。
159         //String reg = "\\."; // 按照. 進行切割
160         //String str = "C:\\abc\\a.txt";
161         //String reg = "\\\\";
162         String str = "abcaaaaabccdef";
163         String reg = "(.)\\1+"; // 疊詞進行切割  (要重用的部分叫作組 使用 （） 包含，從1 開始編號。) (.)\\1
164         // 爲了可讓規則的結果能夠被重用，能夠將規則封裝成一個捕獲組，用（）完成，組的出現都有編號，//
165         // 從1開始，想要使用已有的組能夠經過\\n 來使用組。
166         
167         String [] arr = str.split(reg);
168         for(String s:arr){
169             System.out.println(s);
170         }
171     }
172     public static void checkTel(){
173         //匹配手機號段 15***，13***，18***
174         String str = "18753377360";
175         String regex = "[1][538]\\d{9}";
176         if(str.matches(regex)){
177             System.out.println("手機號正確");
178         }
179         else{
180             System.out.println("手機號錯誤");
181         }
182     }
183     public static void demo(){
184         String str = "b1111";
185         //該只能校驗兩個位置。
186         String reg = "[a-zA-Z]\\d+";
187         boolean b = str.matches(reg);
188         System.out.println(b);
189     }
190     public static void checkQQ_2(){
191         String qq = "9188536";
192         String regex = "[1-9]\\d{4,14}";
193         if(qq.matches(regex)){
194             System.out.println("it is ok!!!");
195         }
196         else{
197             System.out.println("youw");
198         }
199     }
200     public static void checkQQ(){
201         String qq = ss;
202         int len = qq.length();
203         System.out.println(qq.length());
204         if(!(len>15 || len<5)){
205             if(!qq.startsWith("0")){
206                 try{
207                     long l = Long.parseLong(qq);
208                     System.out.println("qq:" + l);
209                 }
210                 catch(NumberFormatException e){
211                     
212                 }
213                 /*char[] arr = qq.toCharArray();
214                 boolean b = false;
215                 for(int x = 0;x<arr.length;x++){
216                     if(!(arr[x]>='0' && arr[x]<='9')){
217                     
218                         b = true;
219                         break;
220                     }
221                 }
222                 if(!b){
223                     System.out.println("qq:"+qq);
224                 }
225                 else{
226                     System.out.println("QQ號中出現了非法字符");
227                 }
228             
229             */
230             }
231             else{
232                 System.out.println("不能夠0開頭");
233             }
234         }
235         else{
236             System.out.println("密碼長度不正確。");
237         }
238     }
239     public static void replaceAllDemo(String str,String reg,String newChar){
240         str = str.replaceAll(reg, newChar);
241         System.out.println(str);
242     }
243     
244     public static void getDemo(){
245         String str = "ming tian jiu yao fang jia le";
246         String reg = "\\b[a-zA-Z]{4}\\b";
247         //str = "123456789";
248         //String reg ="[1-9]\\d{4,14}";
249         //1. 將正則表達式封裝成對象。
250         Pattern p = Pattern.compile(reg);
251         //2.讓正則對象和要做用字符串相關連。 獲取匹配器對象。
252         Matcher m = p.matcher(str);
253         //m.find();  // 將規則做用到字符串上，並進行符合規則的查找。
254         //m.find();
255         //System.out.println(m.matches());
256         // group() 方法，用於獲取匹配後的結果。
257         // 只有找了，才能去。
258         while(m.find()){
259         System.out.println(m.group());
260         }
261     }
262     // 編寫網絡爬蟲  
263     /*
264      * 使用獲取功能  使用pattern 和matcher
265      *  爬網頁上的
266      */
267     public static void getMails() throws Exception{
268         BufferedReader bufr = new BufferedReader(new  FileReader("E:\\myjavaee\\aaa\\mail.txt"));
269         String line = null;
270         //1.編寫正則表達式
271         String reg = "[a-zA-Z0-9_]{6,14}@[a-zA-Z0-9]+(\\.[a-zA-Z]+){1,3}";
272         //2.將正則表達式封裝成對象
273         Pattern p = Pattern.compile(reg);
274         
275         while((line = bufr.readLine())!=null){
276             Matcher m = p.matcher(line);
277             while(m.find()){
278                 System.out.println(m.group());
279             }
280         }
281     }
282 }