繼續完成前面一篇「設計有窮自動機DFA實現C++簡單程序的詞法分析、掃描(編譯原理實驗)」詞法分析掃貓程序剩下來關於去除多餘空行、空格、註釋進行源程序壓縮的功能。java
按實驗要求(以下),這裏須要考慮下面帶星號*的第(3)(5)點:ios
實驗中用到的C++源程序以下圖:git
思路:緩存
其實也就是將源程序中的多餘空格、註釋、換行等都刪除,整理成單單一行的源代碼。
每次對掃描程序獲取到的Token進行判斷,根據上一個Token的類型(有關鍵字、標識符、數值、字符串、特殊符號)決定當前Token是否可以與上一個Token緊鄰,也即不加任何空格。
例如上面截圖中倒數第二行中的 else 和 cout 兩個關鍵字之間就必須有空格分開,不然代碼就會出錯了。針對上面這個簡單的C++源程序,觀察其DFA圖能夠得出如下特色:
一、關鍵字與標識符不能緊鄰,例如 int i中間必須有空格
二、關鍵字與關鍵字也不能緊鄰,如上所述
三、另外關鍵字與字符串也沒關係鄰app
對於以上樣例輸入,先進行詞法分析,而後將得到的Token壓縮並保存在StringBuilder對象中,在寫入到一個新的文件,最終再次對壓縮後的文件進行掃描,判斷壓縮先後的掃描結果是否一直。ide
程序輸出結果(包括壓縮後的源代碼)以下:測試
根據上面這三個特色,代碼實現以下(高亮部分是與上一篇源代碼不一樣之處):ui
- package lexical_analysis;
- import java.io.BufferedReader;
- import java.io.File;
- import java.io.FileOutputStream;
- import java.io.FileReader;
- import java.io.PrintWriter;
- public class Scanner_2 {
- // 定義DFA中的全部狀態表
- // enum StateType {Start, Num, ID, EQ, NE, NM, NL,
- // Com, LineCom, MulCom1, MulCom2, Special, Done, Str};
- // 定義DFA中的全部狀態表
- private static final int Start = 1;
- private static final int Num = 2;
- private static final int ID = 3;
- private static final int EQ = 4;
- private static final int NE = 5;
- private static final int NM = 6;
- private static final int NL = 7;
- private static final int Coms = 8;
- private static final int LineCom = 9;
- private static final int MulCom1 = 10;
- private static final int MulCom2 = 11;
- private static final int Special = 12;
- private static final int Done = 13;
- private static final int Str = 14;
- // Token類型,Initial爲初始類型
- private enum TokenType {
- Initial, ID, Special, Str, KeyWord
- };
- // 關鍵字
- private String[] keyWords = new String[] {
- "include", "define", "iostream", "int", "folat", "double",
- "main", "if", "else", "for", "while", "do", "goto", "switch",
- "case", "static", "cin", "cout"
- };
- // 特殊字符
- private String [] special = {"{", "}", "[", "]", "(", ")",
- "#", ",", ".", ";", ":", "\\",
- "'", "\"", ">>", "<<", "!=", "=",
- "==", "<=", ">=", "++", "--"};
- // 算術運算符
- private String [] arithmetic = {"+", "-", "-", "/", "%"};
- // 源代碼文件輸入流
- private BufferedReader sourceFile;
- // 壓縮後的文件輸出流
- private PrintWriter compressedFileWriter;
- // 上一個Token的類型
- private TokenType preType = TokenType.Initial;
- // 緩存去除多餘空格、註釋後的源代碼
- private StringBuilder compressedStr = new StringBuilder();
- // 掃描行的最大字符數
- private static final int BUF_SIZE = 256;
- // 當前行的字符長度
- private int bufSize = 0;
- // 當前行
- private String eachLine;
- // 當前掃描行的字符序列
- private char [] lineBuf = new char[BUF_SIZE];
- // 當前掃描的行數
- private int lineNum = 0;
- // 當前行的字符下標
- private int charPos = 0;
- // 是否已達文件尾
- private boolean isEOF = false;
- /**
- * 每次掃描前都要初始化一些必要變量值
- */
- private void initial(){
- bufSize = 0;
- lineNum = 0;
- charPos = 0;
- isEOF = false;
- }
- /**
- * 初始化並讀取源代碼文件
- * 掃描程序開始執行,直到讀取文件結束符EOF
- * @throws Exception
- */
- private void scanning(String originalFile) throws Exception {
- this.sourceFile = new BufferedReader(new FileReader(originalFile));
- this.initial();
- while(!isEOF) {
- getToken();
- }
- System.out.println("========================> end scanning ...");
- }
- /**
- * 獲取下一個字符
- * @return
- * @throws Exception
- */
- private char getNextChar() throws Exception {
- char nextChar = '\0';
- if(!(charPos < bufSize)) {
- if((eachLine = sourceFile.readLine()) != null) {
- lineNum++;
- System.out.println(lineNum + ": " + eachLine);
- lineBuf = eachLine.toCharArray();
- bufSize = eachLine.length();
- charPos = 0;
- nextChar = lineBuf[charPos++];
- } else {
- isEOF = true;
- nextChar = '\0';
- }
- } else {
- nextChar = lineBuf[charPos++];
- }
- return nextChar;
- }
- /**
- * 【按步長(step)】取消獲取下一個字符
- */
- private void unGetNextChar(int step) {
- if(!isEOF) {
- charPos -= step;
- }
- }
- /**
- * 獲取一個Token
- * @return
- * @throws Exception
- */
- private String getToken() throws Exception {
- String tokenStr = "";
- String currentToken = "";
- int currentState = Start;
- boolean isSave;
- // 不一樣時爲EOF和Done狀態
- while(currentState != Done && !isEOF) {
- char c = getNextChar();
- isSave = true;
- switch(currentState) {
- case Start:
- if(isDigit(c)) {
- currentState = Num;
- } else if(isLetter(c) || c == '.') { //點號是爲了處理頭文件iostream.h的格式
- currentState = ID;
- } else if(c == ' ' || c == '\t' || c == '\n') {
- isSave = false;
- } else if(c == '!') {
- currentState = NE;
- } else if(c == '=') {
- currentState = EQ;
- } else if(c == '<') {
- currentState = NM;
- } else if(c == '>') {
- currentState = NL;
- } else if(c == '/') {
- currentState = Coms;
- isSave = false;
- } else if(c == '"') {
- currentState = Str;
- } else {
- currentState = Done;
- // if(isSingle(c)) {
- // currentToken = "" + c;
- // currentState = Done;
- // isSave = false;
- // }
- }
- break;
- case Num:
- if(!isDigit(c)) {
- currentState = Done;
- unGetNextChar(1);
- isSave = false;
- }
- break;
- case ID:
- if(!isLetter(c) && !isDigit(c)) {
- currentState = Done;
- unGetNextChar(1);
- isSave = false;
- }
- break;
- case NE:
- if(c != '=') {
- currentState = Special;
- unGetNextChar(2);
- isSave = false;
- } else {
- currentState = Done;
- }
- break;
- case NM:
- if(c != '=' && c != '<') {
- currentState = Special;
- unGetNextChar(2);
- isSave = false;
- } else {
- currentState = Done;
- }
- break;
- case NL:
- if(c != '=' && c != '>') {
- currentState = Special;
- unGetNextChar(2);
- isSave = false;
- } else {
- currentState = Done;
- }
- break;
- case EQ:
- if(c != '=') {
- currentState = Special;
- unGetNextChar(2);
- isSave = false;
- } else {
- currentState = Done;
- }
- break;
- case Str:
- if(c == '"') {
- currentState = Done;
- }
- break;
- case Coms:
- isSave = false;
- if(c == '/') {
- currentState = LineCom;
- } else if(c == '*') {
- currentState = MulCom1;
- } else {
- currentState = Special;
- unGetNextChar(1);
- }
- break;
- case LineCom:
- isSave = false;
- if(c == '\n') {
- currentState = Done;
- }
- break;
- case MulCom2:
- isSave = false;
- if(c == '*') {
- currentState = MulCom2;
- } else if(c == '/') {
- currentState = Done;
- } else {
- currentState = MulCom1;
- }
- break;
- case Special:
- if(c == '!' || c == '=' || c == '<' || c == '>') {
- // if(isSpecialSingle(c)) {
- currentToken = "" + c;
- currentState = Done;
- isSave = false;
- } else {
- currentToken = "Error";
- currentState = Done;
- }
- break;
- default:
- System.out.println(lineNum + " >> Scanner Bug : state = " + currentState);
- currentState = Done;
- currentToken = "Error";
- break;
- }
- if(isSave) {
- tokenStr += c;
- }
- if(currentState == Done) {
- currentToken = tokenStr;
- printToken(currentToken);
- }
- }
- return currentToken;
- }
- /**
- * 判斷是否爲字母
- * @param c
- * @return
- */
- private boolean isLetter(char c) {
- if(('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')){
- return true;
- }
- return false;
- }
- /**
- * 判斷是否爲數字
- * @param c
- * @return
- */
- private boolean isDigit(char c) {
- if('0' <= c && c <= '9') {
- return true;
- }
- return false;
- }
- /**
- * 打印時判斷是否爲【數值Num】
- * @param token
- * @return
- */
- private boolean isNum(String token) {
- boolean flag = true;
- char [] chs = token.toCharArray();
- int len = chs.length;
- for(int i = 0; i < len; i++) {
- if(!isDigit(chs[i])) {
- flag = false;
- }
- }
- return flag;
- }
- /**
- * 打印時判斷是否爲【特殊符號】
- */
- private boolean isSpecial(String token) {
- int len = special.length;
- for(int i = 0; i < len; i++) {
- if(token.equals(special[i])) {
- return true;
- }
- }
- return false;
- }
- /**
- * 判斷是否爲算術運算符
- * @param token
- * @return
- */
- private boolean isArithmetic(String token) {
- int len = arithmetic.length;
- for(int i = 0; i < len; i++) {
- if(token.equals(arithmetic[i])) {
- return true;
- }
- }
- return false;
- }
- /**
- * 打印時判斷是否爲【關鍵字】
- * @param token
- * @return
- */
- private boolean isKeyWord(String token) {
- int len = keyWords.length;
- for(int i = 0; i < len; i++) {
- if(keyWords[i].equals(token)) {
- return true;
- }
- }
- return false;
- }
- /**
- * 判斷是否爲【單個字符】即 # * { } [ ] ( ) , . ; : '
- * @param c
- * @return
- */
- // private boolean isSingle(char c) {
- // char [] single = {'#', '*', '{', '}',
- // '[', ']', '(', ')',
- // ':', ';', '.', ',',
- // '\''};
- // int len = single.length;
- // for(int i = 0; i < len; i++) {
- // if(c == single[i]) {
- // return true;
- // }
- // }
- // return false;
- // }
- /**
- * 判斷是否爲【單個的特殊字符】即 ! = < >
- * 由於這幾個屬於多義字符,能造成 != == << >>
- * @param c
- * @return
- */
- // private boolean isSpecialSingle(char c) {
- // char [] special = {'!', '=', '<', '>'};
- // int len = special.length;
- // for(int i = 0; i < len; i++) {
- // if(c == special[i]) {
- // return true;
- // }
- // }
- // return false;
- // }
- /**
- * 按類別打印掃描獲得的Token
- * @param token
- */
- private void printToken(String token) {
- if(isKeyWord(token)) {
- System.out.printf("%4d: %s --- %s\n", lineNum, token, "關鍵字");
- token = (preType == TokenType.KeyWord ? " " : "") + token;
- preType = TokenType.KeyWord;
- this.compressedStr.append(token);
- } else if(isSpecial(token)) {
- System.out.printf("%4d: %s --- %s\n", lineNum, token,"特殊符號");
- preType = TokenType.Special;
- this.compressedStr.append(token);
- } else if(isArithmetic(token)) {
- System.out.printf("%4d: %s --- %s\n", lineNum, token,"算術運算符");
- preType = TokenType.Special;
- this.compressedStr.append(token);
- } else if(isNum(token)) {
- System.out.printf("%4d: %s --- %s\n", lineNum, token,"數值");
- preType = TokenType.Special;
- this.compressedStr.append(token);
- } else if(token.startsWith("\"")) {
- System.out.printf("%4d: %s --- %s\n", lineNum, token,"字符串");
- token = (preType == TokenType.KeyWord ? " " : "") + token;
- this.compressedStr.append(token);
- preType = TokenType.Str;
- } else {
- System.out.printf("%4d: %s --- %s\n", lineNum, token,"標識符");
- token = (preType == TokenType.KeyWord ? " " : "") + token;
- this.compressedStr.append(token);
- preType = TokenType.ID;
- }
- }
- /**
- * 打印並將被壓縮後的源代碼寫入新的文件中
- */
- public void printCompressedFile(String compressedFile) throws Exception {
- System.out.println(this.compressedStr);
- // 建立壓縮後的文件輸出流
- this.compressedFileWriter = new PrintWriter(
- new FileOutputStream(new File(compressedFile)));
- // 寫入到新的文件
- this.compressedFileWriter.write(new String(this.compressedStr));
- this.compressedFileWriter.flush();
- }
- /**
- * 測試
- */
- public static void main(String[] args) throws Exception {
- Scanner_2 scanner = new Scanner_2();
- System.out.println("掃描未壓縮源代碼文件 >> ");
- scanner.scanning("cppSrc.cpp");
- System.out.println("\n壓縮以後的源代碼 >> ");
- scanner.printCompressedFile("afterCompressed.cpp");
- System.out.println("\n掃描壓縮後的源代碼文件 >> ");
- scanner.scanning("afterCompressed.cpp");
- }
- }