編譯原理-詞法分析(lexical analysis)初識(續)

這一篇與前一篇的思路是同樣的,只不過這個例子稍微可以處理多行註釋的問題。java

原來的思路是這樣的:正則表達式

使用Java的正則表達式功能,並對Tiny源程序代碼作了必定的前提條件所完成的 Tiny 語言,其實就是模仿教材中的最終輸出文件解析出每個 Token。 
ide

默認每一個Token之間都以空格「 」隔開,所以可用Java中的正則表達式將每一行依此規律拆分爲一個個Token,而後再對每個Token進行類別匹配(也用到正則表達式),最後按類別打印輸出。如下實現可是不可以處理多行註釋的問題。this

對教材中的源程序樣例代碼修改以下(加了很多空格變成sample2.tny):spa

掃描以後的輸出結果:orm

 

代碼實現:blog

  
  
           
  
  
  1. package lexical_analysis; 
  2.  
  3. import java.io.BufferedReader; 
  4. import java.io.FileReader; 
  5. import java.util.regex.Pattern; 
  6.  
  7. public class OriginalLexicalAnalyser { 
  8.  
  9.     private static final int RESERVE_DWORD = 1
  10.     private static final int ARITHMETIC_SYMBOLS = 2
  11.     private static final int ID = 3
  12.     private static final int NUM = 4
  13.  
  14.     // 保留字 
  15.     private String[] reservedWords = new String[] { "read""if""then"
  16.                                                     "repeat""until""write",  
  17.                                                     "end" }; 
  18.     // 數學運算符 
  19.     private String[] arithmeticSymbols = new String[] { "+""-""*""/"
  20.                                                         "%"":=""=""<",  
  21.                                                         ">""<="">=" }; 
  22.     // 源程序文件輸入流 
  23.     private BufferedReader sourceFile; 
  24.     // 代碼行數 
  25.     private int lineCount = 0
  26.     boolean commentFlag = false
  27.  
  28.     public OriginalLexicalAnalyser(String sourceFilePath) throws Exception { 
  29.         // 建立並加載源程序文件輸入流 
  30.         this.sourceFile = new BufferedReader(new FileReader(sourceFilePath)); 
  31.     } 
  32.  
  33.     public void scan() throws Exception { 
  34.         String eachLine = ""
  35.  
  36.         while ((eachLine = this.sourceFile.readLine()) != null) { 
  37.             ++lineCount; 
  38.             System.out.printf("%2d: %s\n", lineCount, eachLine); 
  39.              
  40.             int start = 0
  41.             int end = 0
  42.             int lineLen = eachLine.length(); 
  43.  
  44.             String nextChar; 
  45.             String token = ""
  46.              
  47.             if("}".equals(eachLine)) { 
  48.                 commentFlag = false
  49.                 printToken(eachLine); 
  50.                 continue
  51.             } 
  52.  
  53.             while (end < lineLen - 1) { 
  54.                 nextChar = eachLine.substring(end, end + 1); 
  55.                  
  56.                 // 上一行是多行註釋開始,即 { 
  57.                 if (commentFlag == true) { 
  58.                     end = processComment(eachLine); 
  59.                     token = eachLine.substring(start, end); 
  60.                     printToken(token); 
  61.                      
  62.                 } else { 
  63.                     if (" ".equals(nextChar)) { 
  64.                         token = eachLine.substring(start, end); 
  65.                         printToken(token); 
  66.                         start = end + 1
  67.                         end = start; 
  68.                     } else if (";".equals(nextChar)) { 
  69.                         token = eachLine.substring(start, end); 
  70.                         printToken(token); 
  71.                         printToken(";"); 
  72.                         break
  73.                     } else if("{".equals(nextChar)){ 
  74.                         commentFlag = true
  75.                         start = end + 1
  76.                         end = start; 
  77.                     } else { 
  78.                         end++; 
  79.                     } 
  80.                 } 
  81.             } 
  82.         } 
  83.     } 
  84.  
  85.     private int processComment(String eachLine) { 
  86.         String ch; 
  87.         int start = 0
  88.         int lineLen = eachLine.length(); 
  89.         for (int i = 1; i < lineLen; ++i) { 
  90.             ch = eachLine.substring(start, i); 
  91.             start++; 
  92.             if ("}".equals(ch)) { 
  93.                 commentFlag = false
  94.                 return i; 
  95.             } 
  96.         } 
  97.         return lineLen - 1
  98.     } 
  99.      
  100.     private void printToken(String token) { 
  101.         if(isArithmeticSymbol(token)) {     // 數學運算符         
  102.             System.out.println("    " + lineCount + ": " + token); 
  103.         } else if(isReservedWord(token)) {  // 保留字 
  104.             if(lineCount == 7) { 
  105.                 System.out.println("==========" + token + "==="); 
  106.             } 
  107.             System.out.println("    " + lineCount + ": " + "reserved word: " + token); 
  108.             // 源程序文件結束符 
  109.             if("end".equals(token)) { 
  110.                 System.out.printf("%2d: %s\n", ++lineCount, "EOF"); 
  111.             } 
  112.         } else if(";".equals(token)) {      // 行結束符,即分號 
  113.             System.out.println("    " + lineCount + ": " + token); 
  114.         }  else if(isID(token)) {           // 自定義標識符ID 
  115.             System.out.println("    " + lineCount + ": " + "ID, name= " + token); 
  116.         } else if(isNum(token)) {           // 數值NUM 
  117.             System.out.println("    " + lineCount + ": " + "NUM, val= " + token); 
  118.         } 
  119.     } 
  120.  
  121.     /** 
  122.      * 判斷是否爲「保留字」 
  123.      * @param token 
  124.      * @return 
  125.      */ 
  126.     private boolean isReservedWord(String token) { 
  127.         int size = this.reservedWords.length; 
  128.         for(int i = 0; i < size; i++) { 
  129.             if(token.equals(reservedWords[i])) { 
  130.                 return true
  131.             } 
  132.         } 
  133.         return false
  134.     } 
  135.      
  136.     /** 
  137.      * 判斷是否爲「數學運算符」 
  138.      * @param token 
  139.      * @return 
  140.      */ 
  141.     private boolean isArithmeticSymbol(String token) { 
  142.         int size = this.arithmeticSymbols.length; 
  143.         for(int i = 0; i < size; i++) { 
  144.             if(token.equals(arithmeticSymbols[i])) { 
  145.                 return true
  146.             } 
  147.         } 
  148.         return false
  149.     } 
  150.      
  151.     /** 
  152.      * 判斷是否爲「數值NUM」 
  153.      * @param token 
  154.      * @return 
  155.      */ 
  156.     private boolean isNum(String token) { 
  157.         boolean flag = Pattern.matches("\\d+?", token); 
  158.         return flag; 
  159.     } 
  160.      
  161.     /** 
  162.      * 判斷是否爲「ID」 
  163.      * @param token 
  164.      * @return 
  165.      */ 
  166.     private boolean isID(String token) { 
  167.         boolean flag = Pattern.matches("[a-zA-Z]+?", token); 
  168.         return flag; 
  169.     } 
  170.      
  171.  
  172.     /** 
  173.      * 「詞法分析程序」的啓動入口 
  174.      * @param args 
  175.      */ 
  176.     public static void main(String[] args) throws Exception { 
  177.         String sourceFilePath = "sample2.tny"
  178.         OriginalLexicalAnalyser lexicalAnalyser = new OriginalLexicalAnalyser(sourceFilePath); 
  179.         lexicalAnalyser.scan(); 
  180.     } 
  181.  
相關文章
相關標籤/搜索