正在看極客時間宮文學老師的編譯原理之美,用swift playground寫了一個第二課「int age >= 45」的詞法解析DEMOgit
爲了保持原課程代碼,DEMO用了順序結構,看起來有點散亂😂,後面我再抽時間優化一下swift
//識別:「int age >= 45」
import Foundation
enum DfaState: String {
case Initial = "Initial"
case Id = "Id"
case IntLiteral = "IntLiteral"
case GT = "GT"
case GE = "GE"
}
enum TokenType: String {
case Identifier = "Identifier"
case IntLiteral = "IntLiteral"
case GT = "GT"
case GE = "GE"
}
func isAlpha(_ ch:Character) -> Bool {
return ch.isLetter
}
func isDigit(_ ch:Character) -> Bool {
return ch.isNumber
}
class Token {
var type: TokenType?
var tokenText: String = ""
}
var newState = DfaState.Initial
var token = Token()
var tokenText = token.tokenText
//FIXME:這裏賦值識別的語句
let str = "int age >= 45"
func initToken() -> DfaState {
token = Token()
tokenText = token.tokenText
return .Initial
}
func read(_ string: String) {
let strArr = string.components(separatedBy: CharacterSet(charactersIn: " "))
for string in strArr {
readToken(string)
print("token is: \n" + (tokenText) + "\ntype is:\n" + ((token.type)?.rawValue ?? "undefined"))
initToken()
}
}
func readToken(_ string:String) {
checkType(string.first)
let str = String(string.dropFirst())
if (str.count == 0) {
return
}
for ch in str {
readLeft(ch)
}
}
func checkType(_ ch:Character?) {
if (ch == nil) {
return
}
let ch = ch!
if (isAlpha(ch)) { // 第一個字符是字母
newState = DfaState.Id; // 進入 Id 狀態
token.type = TokenType.Identifier;
tokenText.append(ch);
} else if (isDigit(ch)) { // 第一個字符是數字
newState = DfaState.IntLiteral;
token.type = TokenType.IntLiteral;
tokenText.append(ch);
} else if (ch == ">") { // 第一個字符是 >
newState = DfaState.GT;
token.type = TokenType.GT;
tokenText.append(ch);
}
}
func readLeft(_ ch: Character) {
var state = newState
switch state {
case .Initial:
state = initToken(); // 從新肯定後續狀態
break;
case .Id:
if (isAlpha(ch) || isDigit(ch)) {
tokenText.append(ch); // 保持標識符狀態
} else {
state = initToken(); // 退出標識符狀態,並保存 Token
}
break;
case .GT:
if (ch == "=") {
token.type = TokenType.GE; // 轉換成 GE
state = DfaState.GE;
tokenText.append(ch);
} else {
state = initToken(); // 退出 GT 狀態,並保存 Token
}
break;
case .GE:
state = initToken(); // 退出當前狀態,並保存 Token
break;
case .IntLiteral:
if (isDigit(ch)) {
tokenText.append(ch); // 繼續保持在數字字面量狀態
} else {
state = initToken(); // 退出當前狀態,並保存 Token
}
break;
}
}
read(str)
複製代碼
輸出結果bash
token is:
int
type is:
Identifier
token is:
age
type is:
Identifier
token is:
>=
type is:
GE
token is:
45
type is:
IntLiteral
複製代碼
END