編譯原理-詞法分析（lexical analysis）初識（續）

時間 2021-08-15
標籤 java 正則表達式 ide this spa orm blog token get 數學欄目 Java 简体版
原文原文鏈接
這一篇與前一篇的思路是同樣的，只不過這個例子稍微可以處理多行註釋的問題。java
原來的思路是這樣的：正則表達式
使用Java的正則表達式功能，並對Tiny源程序代碼作了必定的前提條件所完成的 Tiny 語言，其實就是模仿教材中的最終輸出文件解析出每個 Token。
ide
默認每一個Token之間都以空格「」隔開，所以可用Java中的正則表達式將每一行依此規律拆分爲一個個Token，而後再對每個Token進行類別匹配（也用到正則表達式），最後按類別打印輸出。如下實現可是不可以處理多行註釋的問題。this
對教材中的源程序樣例代碼修改以下（加了很多空格變成sample2.tny）：spa
掃描以後的輸出結果：orm
代碼實現：blog
  
  
           
  
  
   
   
            
   
   package lexical_analysis; 
   
   
            
   
    
   
   
            
   
   import java.io.BufferedReader; 
   
   
            
   
   import java.io.FileReader; 
   
   
            
   
   import java.util.regex.Pattern; 
   
   
            
   
    
   
   
            
   
   public class OriginalLexicalAnalyser { 
   
   
            
   
    
   
   
            
   
       private static final int RESERVE_DWORD = 1; 
   
   
            
   
       private static final int ARITHMETIC_SYMBOLS = 2; 
   
   
            
   
       private static final int ID = 3; 
   
   
            
   
       private static final int NUM = 4; 
   
   
            
   
    
   
   
            
   
       // 保留字 
   
   
            
   
       private String[] reservedWords = new String[] { "read", "if", "then", 
   
   
            
   
                                                       "repeat", "until", "write",  
   
   
            
   
                                                       "end" }; 
   
   
            
   
       // 數學運算符 
   
   
            
   
       private String[] arithmeticSymbols = new String[] { "+", "-", "*", "/", 
   
   
            
   
                                                           "%", ":=", "=", "<",  
   
   
            
   
                                                           ">", "<=", ">=" }; 
   
   
            
   
       // 源程序文件輸入流 
   
   
            
   
       private BufferedReader sourceFile; 
   
   
            
   
       // 代碼行數 
   
   
            
   
       private int lineCount = 0; 
   
   
            
   
       boolean commentFlag = false; 
   
   
            
   
    
   
   
            
   
       public OriginalLexicalAnalyser(String sourceFilePath) throws Exception { 
   
   
            
   
           // 建立並加載源程序文件輸入流 
   
   
            
   
           this.sourceFile = new BufferedReader(new FileReader(sourceFilePath)); 
   
   
            
   
       } 
   
   
            
   
    
   
   
            
   
       public void scan() throws Exception { 
   
   
            
   
           String eachLine = ""; 
   
   
            
   
    
   
   
            
   
           while ((eachLine = this.sourceFile.readLine()) != null) { 
   
   
            
   
               ++lineCount; 
   
   
            
   
               System.out.printf("%2d: %s\n", lineCount, eachLine); 
   
   
            
   
                
   
   
            
   
               int start = 0; 
   
   
            
   
               int end = 0; 
   
   
            
   
               int lineLen = eachLine.length(); 
   
   
            
   
    
   
   
            
   
               String nextChar; 
   
   
            
   
               String token = ""; 
   
   
            
   
                
   
   
            
   
               if("}".equals(eachLine)) { 
   
   
            
   
                   commentFlag = false; 
   
   
            
   
                   printToken(eachLine); 
   
   
            
   
                   continue; 
   
   
            
   
               } 
   
   
            
   
    
   
   
            
   
               while (end < lineLen - 1) { 
   
   
            
   
                   nextChar = eachLine.substring(end, end + 1); 
   
   
            
   
                    
   
   
            
   
                   // 上一行是多行註釋開始,即 { 
   
   
            
   
                   if (commentFlag == true) { 
   
   
            
   
                       end = processComment(eachLine); 
   
   
            
   
                       token = eachLine.substring(start, end); 
   
   
            
   
                       printToken(token); 
   
   
            
   
                        
   
   
            
   
                   } else { 
   
   
            
   
                       if (" ".equals(nextChar)) { 
   
   
            
   
                           token = eachLine.substring(start, end); 
   
   
            
   
                           printToken(token); 
   
   
            
   
                           start = end + 1; 
   
   
            
   
                           end = start; 
   
   
            
   
                       } else if (";".equals(nextChar)) { 
   
   
            
   
                           token = eachLine.substring(start, end); 
   
   
            
   
                           printToken(token); 
   
   
            
   
                           printToken(";"); 
   
   
            
   
                           break; 
   
   
            
   
                       } else if("{".equals(nextChar)){ 
   
   
            
   
                           commentFlag = true; 
   
   
            
   
                           start = end + 1; 
   
   
            
   
                           end = start; 
   
   
            
   
                       } else { 
   
   
            
   
                           end++; 
   
   
            
   
                       } 
   
   
            
   
                   } 
   
   
            
   
               } 
   
   
            
   
           } 
   
   
            
   
       } 
   
   
            
   
    
   
   
            
   
       private int processComment(String eachLine) { 
   
   
            
   
           String ch; 
   
   
            
   
           int start = 0; 
   
   
            
   
           int lineLen = eachLine.length(); 
   
   
            
   
           for (int i = 1; i < lineLen; ++i) { 
   
   
            
   
               ch = eachLine.substring(start, i); 
   
   
            
   
               start++; 
   
   
            
   
               if ("}".equals(ch)) { 
   
   
            
   
                   commentFlag = false; 
   
   
            
   
                   return i; 
   
   
            
   
               } 
   
   
            
   
           } 
   
   
            
   
           return lineLen - 1; 
   
   
            
   
       } 
   
   
            
   
        
   
   
            
   
       private void printToken(String token) { 
   
   
            
   
           if(isArithmeticSymbol(token)) {     // 數學運算符         
   
   
            
   
               System.out.println("    " + lineCount + ": " + token); 
   
   
            
   
           } else if(isReservedWord(token)) {  // 保留字 
   
   
            
   
               if(lineCount == 7) { 
   
   
            
   
                   System.out.println("==========" + token + "==="); 
   
   
            
   
               } 
   
   
            
   
               System.out.println("    " + lineCount + ": " + "reserved word: " + token); 
   
   
            
   
               // 源程序文件結束符 
   
   
            
   
               if("end".equals(token)) { 
   
   
            
   
                   System.out.printf("%2d: %s\n", ++lineCount, "EOF"); 
   
   
            
   
               } 
   
   
            
   
           } else if(";".equals(token)) {      // 行結束符,即分號 
   
   
            
   
               System.out.println("    " + lineCount + ": " + token); 
   
   
            
   
           }  else if(isID(token)) {           // 自定義標識符ID 
   
   
            
   
               System.out.println("    " + lineCount + ": " + "ID, name= " + token); 
   
   
            
   
           } else if(isNum(token)) {           // 數值NUM 
   
   
            
   
               System.out.println("    " + lineCount + ": " + "NUM, val= " + token); 
   
   
            
   
           } 
   
   
            
   
       } 
   
   
            
   
    
   
   
            
   
       /** 
   
   
            
   
        * 判斷是否爲「保留字」 
   
   
            
   
        * @param token 
   
   
            
   
        * @return 
   
   
            
   
        */ 
   
   
            
   
       private boolean isReservedWord(String token) { 
   
   
            
   
           int size = this.reservedWords.length; 
   
   
            
   
           for(int i = 0; i < size; i++) { 
   
   
            
   
               if(token.equals(reservedWords[i])) { 
   
   
            
   
                   return true; 
   
   
            
   
               } 
   
   
            
   
           } 
   
   
            
   
           return false; 
   
   
            
   
       } 
   
   
            
   
        
   
   
            
   
       /** 
   
   
            
   
        * 判斷是否爲「數學運算符」 
   
   
            
   
        * @param token 
   
   
            
   
        * @return 
   
   
            
   
        */ 
   
   
            
   
       private boolean isArithmeticSymbol(String token) { 
   
   
            
   
           int size = this.arithmeticSymbols.length; 
   
   
            
   
           for(int i = 0; i < size; i++) { 
   
   
            
   
               if(token.equals(arithmeticSymbols[i])) { 
   
   
            
   
                   return true; 
   
   
            
   
               } 
   
   
            
   
           } 
   
   
            
   
           return false; 
   
   
            
   
       } 
   
   
            
   
        
   
   
            
   
       /** 
   
   
            
   
        * 判斷是否爲「數值NUM」 
   
   
            
   
        * @param token 
   
   
            
   
        * @return 
   
   
            
   
        */ 
   
   
            
   
       private boolean isNum(String token) { 
   
   
            
   
           boolean flag = Pattern.matches("\\d+?", token); 
   
   
            
   
           return flag; 
   
   
            
   
       } 
   
   
            
   
        
   
   
            
   
       /** 
   
   
            
   
        * 判斷是否爲「ID」 
   
   
            
   
        * @param token 
   
   
            
   
        * @return 
   
   
            
   
        */ 
   
   
            
   
       private boolean isID(String token) { 
   
   
            
   
           boolean flag = Pattern.matches("[a-zA-Z]+?", token); 
   
   
            
   
           return flag; 
   
   
            
   
       } 
   
   
            
   
        
   
   
            
   
    
   
   
            
   
       /** 
   
   
            
   
        * 「詞法分析程序」的啓動入口 
   
   
            
   
        * @param args 
   
   
            
   
        */ 
   
   
            
   
       public static void main(String[] args) throws Exception { 
   
   
            
   
           String sourceFilePath = "sample2.tny"; 
   
   
            
   
           OriginalLexicalAnalyser lexicalAnalyser = new OriginalLexicalAnalyser(sourceFilePath); 
   
   
            
   
           lexicalAnalyser.scan(); 
   
   
            
   
       } 
   
   
            
   
    
   
   
            
   
   }