簡易詞法分析功能java
(1)讀取一個 txt 程序文件(最後的 # 做爲結束標誌,不可省去)git
{
int a, b;
a = 10;
if(a>=1){
b = a + 20;
}
}
複製代碼
(2)詞法識別分析表編程
單詞類別 | 單詞自身值 | 內部編碼 |
---|---|---|
關鍵字 | int、for、while、do、return、break、continue | 1 |
標識符 | 除關鍵字外的以字母開頭,後跟字母、數字的字符序列 | 2 |
常數 | 無符號整型數 | 3 |
運算符 | +、-、*、/、>、<、=、>=、<=、!= | 4 |
界限符 | ,、;、{、}、(、) | 5 |
換行符 | \n | 6 |
(3)輸出結果:數組
(5,{)
(6,\n)
(1,int)
(2,a)
(5,,)
(2,b)
(5,;)
(6,\n)
(2,a)
(4,=)
(3,10)
(5,;)
(6,\n)
(2,if)
(5,()
(2,a)
(4,>=)
(3,1)
(5,))
(5,{)
(6,\n)
(2,b)
(4,=)
(2,a)
(4,+)
(3,20)
(5,;)
(6,\n)
(5,})
(6,\n)
(5,})
(6,\n)
(0,#)
複製代碼
並保存成新的 txt 文件app
(1)程序文件目錄:測試
(2)Word.java 文件:ui
package com.java997.analyzer.lexical;
/** * <p> * 表示識別後的詞實體類 * * @author XiaoPengwei * @since 2019-06-13 */
public class Word {
/** * 種別碼 */
private int typeNum;
/** * 掃描獲得的詞 */
private String word;
public int getTypeNum() {
return typeNum;
}
public void setTypeNum(int typeNum) {
this.typeNum = typeNum;
}
public String getWord() {
return word;
}
public void setWord(String word) {
this.word = word;
}
}
複製代碼
(3)CodeScanner.java 文件:this
package com.java997.analyzer.lexical;
/** * <p> * 字符掃描 * * @author XiaoPengwei * @since 2019-06-13 */
public class CodeScanner {
private static String _KEY_WORD_END = "end string of string";
private int charNum = 0;
private Word word;
private char[] input = new char[255];
private char[] token = new char[255];
private int p_input = 0;
private int p_token = 0;
private char ch;
/** * 關鍵字數組 */
private String[] rwtab = {"int", "if", "while", "do", "return", "break", "continue", _KEY_WORD_END};
/** * 邏輯運算數組 */
private String[] logicTab = {"==",">=","<=","!=", _KEY_WORD_END};
public CodeScanner(char[] input) {
this.input = input;
}
/** * 取下一個字符 * * @return */
public char m_getch() {
if (p_input < input.length) {
ch = input[p_input];
p_input++;
}
return ch;
}
/** * 若是是標識符或者空白符就取下一個字符 */
public void getbc() {
while ((ch == ' ' || ch == '\t') && p_input < input.length) {
ch = input[p_input];
p_input++;
}
}
/** * 把當前字符和原有字符串鏈接 */
public void concat() {
token[p_token] = ch;
p_token++;
token[p_token] = '\0';
}
/** * 回退一個字符 */
public void retract() {
p_input--;
}
/** * 判斷是否爲字母 * * @return boolean * @author XiaoPengwei */
public boolean isLetter() {
return ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z';
}
/** * 判斷是否爲數字 * * @return boolean * @author XiaoPengwei */
public boolean isDigit() {
return ch >= '0' && ch <= '9';
}
/** * 查看 token 中的字符串是不是關鍵字,是的話返回關鍵字種別編碼,不然返回 2 * * @return */
public int isKey() {
int i = 0;
while (rwtab[i].compareTo(_KEY_WORD_END) != 0) {
if (rwtab[i].compareTo(new String(token).trim()) == 0) {
return i + 1;
}
i++;
}
return 2;
}
/** * 多是邏輯預算字符 * * @return */
public Boolean isLogicChar() {
return ch == '>' || ch == '<'|| ch == '='|| ch == '!';
}
/** * 查看 token 中的字符串是不是邏輯運算符,是的話返回關鍵字種別編碼,不然返回 2 * * @return */
public int isLogicTab() {
int i = 0;
while (logicTab[i].compareTo(_KEY_WORD_END) != 0) {
if (logicTab[i].compareTo(new String(token).trim()) == 0) {
return i + 1;
}
i++;
}
return 4;
}
/** * 可以識別換行,單行註釋和多行註釋的 * 換行的種別碼設置成30 * 多行註釋的種別碼設置成31 * * @return */
public Word scan() {
token = new char[255];
Word myWord = new Word();
myWord.setTypeNum(10);
myWord.setWord("");
p_token = 0;
m_getch();
getbc();
if (isLetter()) {
while (isLetter() || isDigit()) {
concat();
m_getch();
}
retract();
myWord.setTypeNum(isKey());
myWord.setWord(new String(token).trim());
return myWord;
} else if (isLogicChar()) {
while (isLogicChar()) {
concat();
m_getch();
}
retract();
myWord.setTypeNum(4);
myWord.setWord(new String(token).trim());
return myWord;
} else if (isDigit()) {
while (isDigit()) {
concat();
m_getch();
}
retract();
myWord.setTypeNum(3);
myWord.setWord(new String(token).trim());
return myWord;
} else {
switch (ch) {
//5
case ',':
myWord.setTypeNum(5);
myWord.setWord(",");
return myWord;
case ';':
myWord.setTypeNum(5);
myWord.setWord(";");
return myWord;
case '{':
myWord.setTypeNum(5);
myWord.setWord("{");
return myWord;
case '}':
myWord.setTypeNum(5);
myWord.setWord("}");
return myWord;
case '(':
myWord.setTypeNum(5);
myWord.setWord("(");
return myWord;
case ')':
myWord.setTypeNum(5);
myWord.setWord(")");
return myWord;
//4
case '=':
myWord.setTypeNum(4);
myWord.setWord("=");
return myWord;
case '+':
myWord.setTypeNum(4);
myWord.setWord("+");
return myWord;
case '-':
myWord.setTypeNum(4);
myWord.setWord("-");
return myWord;
case '*':
myWord.setTypeNum(4);
myWord.setWord("*");
return myWord;
case '/':
myWord.setTypeNum(4);
myWord.setWord("/");
return myWord;
case '\n':
myWord.setTypeNum(6);
myWord.setWord("\\n");
return myWord;
case '#':
myWord.setTypeNum(0);
myWord.setWord("#");
return myWord;
default:
concat();
myWord.setTypeNum(-1);
myWord.setWord("ERROR INFO: WORD = \"" + new String(token).trim() + "\"");
return myWord;
}
}
}
}
複製代碼
(4)MainAnalyzer.java 文件:編碼
package com.java997.analyzer.lexical;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Scanner;
/** * <p> * 執行主程序 * * @author XiaoPengwei * @since 2019-06-13 */
public class MainAnalyzer {
private File inputFile;
private File outputFile;
private String fileContent;
private ArrayList<Word> list = new ArrayList<>();
/** * 構造方法 * * @param input * @param output * @author XiaoPengwei */
public MainAnalyzer(String input, String output) {
//實例化輸入文件
inputFile = new File(input);
//實例化輸出文件
outputFile = new File(output);
}
/** * 從指定的 txt 文件中讀取源程序文件內容 * * @return java.lang.String */
public String getContent() {
StringBuilder stringBuilder = new StringBuilder();
try (Scanner reader = new Scanner(inputFile)) {
while (reader.hasNextLine()) {
String line = reader.nextLine();
stringBuilder.append(line + "\n");
System.out.println(line);
}
System.out.println("Successful reading of files:" + inputFile.getName());
} catch (FileNotFoundException e) {
e.printStackTrace();
}
return fileContent = stringBuilder.toString();
}
/** * 而後掃描程序,在程序結束前將掃描到的詞添加到 list 中 * 最後把掃描結果保存到指定的文件中 * * @param fileContent * @return void */
public void analyze(String fileContent) {
int over = 1;
Word word = new Word();
//調用掃描程序
CodeScanner scanner = new CodeScanner(fileContent.toCharArray());
System.out.println("The result:");
while (over != 0) {
word = scanner.scan();
System.out.println("(" + word.getTypeNum() + "," + word.getWord() + ")");
list.add(word);
over = word.getTypeNum();
}
saveResult();
}
/** * 將結果寫入到到指定文件中 * 若是文件不存在,則建立一個新的文件 * 用一個 foreach 循環將 list 中的項變成字符串寫入到文件中 */
public void saveResult() {
//建立文件
if (!outputFile.exists()) {
try {
outputFile.createNewFile();
} catch (IOException e1) {
e1.printStackTrace();
}
}
//寫入文件
try (Writer writer = new FileWriter(outputFile)) {
for (Word word : list) {
writer.write("(" + word.getTypeNum() + " ," + word.getWord() + ")\n");
}
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
//注意輸入文件路徑/名稱必須對, 輸出文件能夠由程序建立
MainAnalyzer analyzer = new MainAnalyzer("D:\\analyzer\\src\\main\\java\\com\\java997\\analyzer\\lexical\\input.txt", "D:\\analyzer\\src\\main\\java\\com\\java997\\analyzer\\lexical\\output.txt");
analyzer.analyze(analyzer.getContent());
}
}
複製代碼
(5)input.txt 文件:spa
{
int a, b;
a = 10;
if(a>=1){
b = a + 20;
}
}
#
複製代碼