Java 實現《編譯原理》簡單詞法分析功能

時間 2019-12-06

原文原文鏈接

Java 實現《編譯原理》簡單詞法分析功能

簡易詞法分析功能java

要求及功能

（1）讀取一個 txt 程序文件（最後的 # 做爲結束標誌，不可省去）git

{
  int a, b;
  a = 10;
  if(a>=1){
    b = a + 20;
  }
}
複製代碼

（2）詞法識別分析表編程

單詞類別	單詞自身值	內部編碼
關鍵字	int、for、while、do、return、break、continue	1
標識符	除關鍵字外的以字母開頭，後跟字母、數字的字符序列	2
常數	無符號整型數	3
運算符	+、-、*、/、>、<、=、>=、<=、!=	4
界限符	,、;、{、}、(、)	5
換行符	\n	6

（3）輸出結果：數組

(5,{)
(6,\n)
(1,int)
(2,a)
(5,,)
(2,b)
(5,;)
(6,\n)
(2,a)
(4,=)
(3,10)
(5,;)
(6,\n)
(2,if)
(5,()
(2,a)
(4,>=)
(3,1)
(5,))
(5,{)
(6,\n)
(2,b)
(4,=)
(2,a)
(4,+)
(3,20)
(5,;)
(6,\n)
(5,})
(6,\n)
(5,})
(6,\n)
(0,#)
複製代碼

並保存成新的 txt 文件app

編程實現

（1）程序文件目錄：測試

（2）Word.java 文件：ui

package com.java997.analyzer.lexical;

/** * <p> * 表示識別後的詞實體類 * * @author XiaoPengwei * @since 2019-06-13 */
public class Word {

    /** * 種別碼 */
    private int typeNum;

    /** * 掃描獲得的詞 */
    private String word;

    public int getTypeNum() {
        return typeNum;
    }

    public void setTypeNum(int typeNum) {
        this.typeNum = typeNum;
    }

    public String getWord() {
        return word;
    }

    public void setWord(String word) {
        this.word = word;
    }
}

複製代碼

（3）CodeScanner.java 文件：this

package com.java997.analyzer.lexical;

/** * <p> * 字符掃描 * * @author XiaoPengwei * @since 2019-06-13 */
public class CodeScanner {

    private static String _KEY_WORD_END = "end string of string";
    private int charNum = 0;
    private Word word;

    private char[] input = new char[255];
    private char[] token = new char[255];
    private int p_input = 0;
    private int p_token = 0;

    private char ch;

    /** * 關鍵字數組 */
    private String[] rwtab = {"int", "if", "while", "do", "return", "break", "continue", _KEY_WORD_END};

    /** * 邏輯運算數組 */
    private String[] logicTab = {"==",">=","<=","!=", _KEY_WORD_END};

    public CodeScanner(char[] input) {
        this.input = input;
    }

    /** * 取下一個字符 * * @return */
    public char m_getch() {
        if (p_input < input.length) {
            ch = input[p_input];
            p_input++;
        }
        return ch;
    }

    /** * 若是是標識符或者空白符就取下一個字符 */
    public void getbc() {
        while ((ch == ' ' || ch == '\t') && p_input < input.length) {
            ch = input[p_input];
            p_input++;
        }
    }

    /** * 把當前字符和原有字符串鏈接 */
    public void concat() {
        token[p_token] = ch;
        p_token++;
        token[p_token] = '\0';
    }

    /** * 回退一個字符 */
    public void retract() {
        p_input--;
    }

    /** * 判斷是否爲字母 * * @return boolean * @author XiaoPengwei */
    public boolean isLetter() {
        return ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z';
    }

    /** * 判斷是否爲數字 * * @return boolean * @author XiaoPengwei */
    public boolean isDigit() {
        return ch >= '0' && ch <= '9';
    }

    /** * 查看 token 中的字符串是不是關鍵字，是的話返回關鍵字種別編碼，不然返回 2 * * @return */
    public int isKey() {
        int i = 0;
        while (rwtab[i].compareTo(_KEY_WORD_END) != 0) {
            if (rwtab[i].compareTo(new String(token).trim()) == 0) {
                return i + 1;
            }
            i++;
        }
        return 2;
    }

    /** * 多是邏輯預算字符 * * @return */
    public Boolean isLogicChar() {
        return ch == '>' || ch == '<'|| ch == '='|| ch == '!';
    }


    /** * 查看 token 中的字符串是不是邏輯運算符，是的話返回關鍵字種別編碼，不然返回 2 * * @return */
    public int isLogicTab() {
        int i = 0;
        while (logicTab[i].compareTo(_KEY_WORD_END) != 0) {
            if (logicTab[i].compareTo(new String(token).trim()) == 0) {
                return i + 1;
            }
            i++;
        }
        return 4;
    }

    /** * 可以識別換行，單行註釋和多行註釋的 * 換行的種別碼設置成30 * 多行註釋的種別碼設置成31 * * @return */
    public Word scan() {
        token = new char[255];
        Word myWord = new Word();
        myWord.setTypeNum(10);
        myWord.setWord("");

        p_token = 0;
        m_getch();
        getbc();
        if (isLetter()) {
            while (isLetter() || isDigit()) {
                concat();
                m_getch();
            }
            retract();
            myWord.setTypeNum(isKey());
            myWord.setWord(new String(token).trim());
            return myWord;
        } else if (isLogicChar()) {
            while (isLogicChar()) {
                concat();
                m_getch();
            }
            retract();
            myWord.setTypeNum(4);
            myWord.setWord(new String(token).trim());
            return myWord;
        } else if (isDigit()) {
            while (isDigit()) {
                concat();
                m_getch();
            }
            retract();
            myWord.setTypeNum(3);
            myWord.setWord(new String(token).trim());
            return myWord;
        } else {
            switch (ch) {
                //5
                case ',':
                    myWord.setTypeNum(5);
                    myWord.setWord(",");
                    return myWord;
                case ';':
                    myWord.setTypeNum(5);
                    myWord.setWord(";");
                    return myWord;
                case '{':
                    myWord.setTypeNum(5);
                    myWord.setWord("{");
                    return myWord;
                case '}':
                    myWord.setTypeNum(5);
                    myWord.setWord("}");
                    return myWord;
                case '(':
                    myWord.setTypeNum(5);
                    myWord.setWord("(");
                    return myWord;
                case ')':
                    myWord.setTypeNum(5);
                    myWord.setWord(")");
                    return myWord;
                //4
                case '=':
                    myWord.setTypeNum(4);
                    myWord.setWord("=");
                    return myWord;
                case '+':
                    myWord.setTypeNum(4);
                    myWord.setWord("+");
                    return myWord;
                case '-':
                    myWord.setTypeNum(4);
                    myWord.setWord("-");
                    return myWord;
                case '*':
                    myWord.setTypeNum(4);
                    myWord.setWord("*");
                    return myWord;
                case '/':
                    myWord.setTypeNum(4);
                    myWord.setWord("/");
                    return myWord;

                case '\n':
                    myWord.setTypeNum(6);
                    myWord.setWord("\\n");
                    return myWord;
                case '#':
                    myWord.setTypeNum(0);
                    myWord.setWord("#");
                    return myWord;
                default:
                    concat();
                    myWord.setTypeNum(-1);
                    myWord.setWord("ERROR INFO: WORD = \"" + new String(token).trim() + "\"");
                    return myWord;
            }
        }
    }
}
複製代碼

（4）MainAnalyzer.java 文件：編碼

package com.java997.analyzer.lexical;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Scanner;

/** * <p> * 執行主程序 * * @author XiaoPengwei * @since 2019-06-13 */
public class MainAnalyzer {
    private File inputFile;
    private File outputFile;
    private String fileContent;
    private ArrayList<Word> list = new ArrayList<>();

    /** * 構造方法 * * @param input * @param output * @author XiaoPengwei */
    public MainAnalyzer(String input, String output) {
        //實例化輸入文件
        inputFile = new File(input);

        //實例化輸出文件
        outputFile = new File(output);
    }

    /** * 從指定的 txt 文件中讀取源程序文件內容 * * @return java.lang.String */
    public String getContent() {
        StringBuilder stringBuilder = new StringBuilder();
        try (Scanner reader = new Scanner(inputFile)) {
            while (reader.hasNextLine()) {
                String line = reader.nextLine();
                stringBuilder.append(line + "\n");
                System.out.println(line);
            }
            System.out.println("Successful reading of files：" + inputFile.getName());
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        return fileContent = stringBuilder.toString();
    }

    /** * 而後掃描程序，在程序結束前將掃描到的詞添加到 list 中 * 最後把掃描結果保存到指定的文件中 * * @param fileContent * @return void */
    public void analyze(String fileContent) {
        int over = 1;
        Word word = new Word();

        //調用掃描程序
        CodeScanner scanner = new CodeScanner(fileContent.toCharArray());
        System.out.println("The result:");
        while (over != 0) {
            word = scanner.scan();
            System.out.println("(" + word.getTypeNum() + "," + word.getWord() + ")");
            list.add(word);
            over = word.getTypeNum();
        }
        saveResult();
    }

    /** * 將結果寫入到到指定文件中 * 若是文件不存在，則建立一個新的文件 * 用一個 foreach 循環將 list 中的項變成字符串寫入到文件中 */
    public void saveResult() {

        //建立文件
        if (!outputFile.exists()) {
            try {
                outputFile.createNewFile();
            } catch (IOException e1) {
                e1.printStackTrace();
            }
        }

        //寫入文件
        try (Writer writer = new FileWriter(outputFile)) {
            for (Word word : list) {
                writer.write("(" + word.getTypeNum() + " ," + word.getWord() + ")\n");
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] args) {

        //注意輸入文件路徑/名稱必須對, 輸出文件能夠由程序建立
        MainAnalyzer analyzer = new MainAnalyzer("D:\\analyzer\\src\\main\\java\\com\\java997\\analyzer\\lexical\\input.txt", "D:\\analyzer\\src\\main\\java\\com\\java997\\analyzer\\lexical\\output.txt");

        analyzer.analyze(analyzer.getContent());
    }
}
複製代碼

（5）input.txt 文件：spa

{
  int a, b;
  a = 10;
  if(a>=1){
    b = a + 20;
  }
}
#
複製代碼

執行測試

相關標籤/搜索

每日一句

每一个你不满意的现在，都有一个你没有努力的曾经。