詞法分析git
1.主要可以識別部分C語言的關鍵字、運算符、分界符、標識符、常量(包括整型常量,浮點數常量),並能處理註釋、部分複合運算符(如>=等)。算法
1)標識符:可由字母,數字和下劃線組成。標識符必須以字母或下劃線開測試
頭,大,小寫的字母分別認爲是兩個不一樣的字符。編碼
常量:包括整型常量和浮點數常量。spa
2)標識符(id)和常量(num)經過如下正規式定義:翻譯
id=(letter|dline)(letter|digit)*rest
num=digit (digit*|.digit*)code
2.單詞符號的種別編碼方案:orm
單詞符號 |
種別碼 |
單詞符號 | 種別碼 | 單詞符號 | 種別碼 | 單詞符號 | 種別碼 |
auto |
1 | short | 23 | . |
45 | ? | 67 |
break | 2 | signed | 24 | ! | 46 | | | 68 |
case | 3 | sezeof | 25 | ~+ | 47 | , | 69 |
char | 4 | static | 26 | + | 48 | ; | 70 |
const | 5 | struct | 27 | ++ | 49 | : | 71 |
continue | 6 | switch | 28 | - | 50 | \ | 72 |
default | 7 | typedef | 29 | -- | 51 | -= | 73 |
do | 8 | union | 30 | * | 52 | += | 74 |
double | 9 | unsigned | 31 | & | 53 | *= | 75 |
else | 10 | void | 32 | && | 54 | /= | 76 |
enum | 11 | volatile | 33 | / | 55 | %= | 77 |
extern | 12 | while | 34 | % | 56 | >>= | 78 |
float | 13 | _bool | 35 | < | 57 | <<= | 79 |
for | 14 | _Complex | 36 | > | 58 | &= | 80 |
goto | 15 | _Imaginary | 37 | << | 59 | ^= | 81 |
if | 16 | ( | 38 | >> | 60 | |= | 82 |
inline | 17 | ) | 39 | <= | 61 | 1= | 83 |
int | 18 | [ | 40 | >= | 62 | 標識符(id) | 84 |
long | 19 | ] |
41 | 0= | 63 | 常量(num) | 85 |
register | 20 | { | 42 | != | 64 | ||
restrict | 21 | } | 43 | ^ | 65 | ||
return | 22 | -> |
44 | || | 66 |
3.詞法分析程序的算法思想遞歸
從字符串表示的源程序中識別出具備獨立意義的單詞符號,其基本思想是根據掃描到單詞符號的第一個字種類,拼出相應的單詞符號。
語法分析:
1.所識別的C 語言上下文無關文法用擴充的BNF表示以下:
E_E –> id = E ;
E –> T { +T | -T }
T –> F { *F | /F }
F –> ( E ) | id
S –> P Q ;
P –> int | float | double | long
Q –> id { , id }
2.語法分析程序的算法思想
有順序地掃描具備獨立意義的單詞符號,對於每個單詞按照上面文法進行歸約,若是不符合上述文法的單詞則報錯並作相應的錯誤恢復,直至結束。
語義分析:
1.語義分析程序算法思想
採用遞歸降低語法制導翻譯的方法,對算術表達式,賦值語句在語法分析的基礎上對於每個文法的歸約作相應的語義處理。在聲明語句⾥作的主要動做是將遇到的每個新的變量填入變量表中,在簡單賦值語句裏作的主要語義處理是當遇到新的變量時即查變量表,對每次歸約作相應的加,減,乘,除的動做並生成相應的四元式序列.
能夠執行程序代碼:
#include<stdio.h> #include<stdlib.h> #include<string.h> #define _KEY_WORD_END "waiting for your expanding" int flag = 1; struct { char result[10]; char ag1[10]; char op[10]; char ag2[10]; } quad[100]; typedef struct { int typenum; char *word; }WORD; WORD *oneword = new WORD; char input[255]; char token[255]=""; int p_input; int p_token; char ch; char * rwtab[] = {"auto","break","case","char","const","continue", "default","do","double","else","enum","extern","float", "for","goto","if","inline","int","long","register","restrict", "return","short","signed","sizeof","static","struct","switch","typedef","union", "unsigned","void","volatile","while","_bool","_Complex","_Imaginary",_KEY_WORD_END}; WORD *scaner(); //-----------------------------------------------------------------------------------語義分析 char table[100][10]; int t_n=0; void add(char *t_a) { strcpy(table[t_n],t_a); t_n++; } int lookup(char *name) { int i=0; for(i=0;i<t_n;i++) { if(strcmp(name,table[i])==0) return 1; } return 0; } void itoa(int k1,char* m1) // { int i=0,s=0; char temp; while(k1) { s=k1%10; k1=k1/10; m1[i]='0'+s; i++; } for(s=i-1,i=s;i>s/2;i--) { temp=m1[i]; m1[i]=m1[s-i]; m1[s-i]=temp; } m1[s+1]='\0'; } int k=0;// 臨時變量的個數 int ek=0;//四元式的個數 char *newtemp() { char *p; char m[10]; p=(char *)malloc(10); k++; itoa(k,m); strcpy(p+1,m); p[0]='t'; return (p); } void emit(char *r,char *a1,char *o,char *a2) { strcpy(quad[ek].result,r); strcpy(quad[ek].ag1,a1); strcpy(quad[ek].op,o); strcpy(quad[ek].ag2,a2); ek++; printf("(%d) %s = %s %s %s\n",ek,r,a1,o,a2); } //--------------------------------------------------賦值語句 void retract(); void all_s(); char *E(); char *F() { char r[10]; char a1[10]; char a2[10]; char op[10]; if(oneword->typenum==84||oneword->typenum==85) { strcpy(r,oneword->word); if(oneword->typenum==84) {if(lookup(oneword->word)==0) {printf("%s no defind\n",oneword->word);flag=0;}} oneword=scaner(); return r; } else { if(oneword->word==";"||oneword->typenum==1000) {printf("miss id\n");flag=0;return "";} if(oneword->word=="*"||oneword->word=="+"||oneword->word=="-"||oneword->word=="/") { printf("error,extra char\n");flag=0; } else { if(oneword->word!="(") { printf("error,extra char \n"); flag=0; oneword=scaner(); } else { oneword= scaner(); strcpy(r,E()); if(oneword->word==")") { oneword=scaner(); return r; } else { printf("error,miss ) in F()\n");flag=0;retract();return "null"; } } } } } char *T() { char r[10]; char a1[10]; char a2[10]; char op[10]; strcpy(a1,F()); strcpy(r,a1); if(oneword->typenum==84||oneword->typenum==85) {printf("error,extra id \n");flag=0;oneword=scaner();} while(oneword->word=="*"||oneword->word=="/") { strcpy(op,oneword->word); oneword = scaner(); while(oneword->word=="*"||oneword->word=="-"||oneword->word=="+"||oneword->word=="/"||oneword->word==")") {printf("error,extra char \n");flag=0;oneword=scaner();} strcpy(a2,F()); strcpy(r,newtemp()); emit(r,a1,op,a2); strcpy(a1,r); if(oneword->typenum==84||oneword->typenum==85) {printf("error,extra id \n");flag=0;oneword=scaner();} } return r; } char *E() { char r[10]; char a1[10]; char a2[10]; char op[10]; strcpy(a1,T()); strcpy(r,a1); if(oneword->typenum==84||oneword->typenum==85) {printf("error,extra id \n");flag=0;oneword=scaner();} while(oneword->word=="+"||oneword->word=="-") { strcpy(op,oneword->word); oneword=scaner(); if(oneword->word=="*"||oneword->word=="-"||oneword->word=="+"||oneword->word=="/"||oneword->word==")") {printf("error,extra char \n");flag=0;oneword=scaner();} strcpy(a2,T()); strcpy(r,newtemp()); emit(r,a1,op,a2); strcpy(a1,r); if(oneword->typenum==84||oneword->typenum==85) {printf("error,extra id \n");flag=0;oneword=scaner();} else if(oneword->word=="(") { printf("error,extra or ( in E1()\n"); flag=0; oneword=scaner(); } } return r; } void E_E() { char r[10]; char a1[10]; char a2[10]; char op[10]; if(oneword->typenum!=84) {printf("error,miss letter\n");flag=0;} else { if(oneword->typenum==84) { if(lookup(oneword->word)==0) {printf("%s no defind\n",oneword->word);}} strcpy(r,oneword->word); oneword= scaner(); if(oneword->word!="=") {printf("error,miss =\n");retract();flag=0;} oneword=scaner(); if(oneword->word==")") {printf("error,extra ) in E_E()\n");flag=0;oneword=scaner();} strcpy(a1,E()); emit(r,a1,"",""); } } //---------------------------------------------------------------------------- void S_1() { if(oneword->typenum!=84) {printf("error,miss element in defind\n");flag=0;retract();} add(oneword->word); oneword = scaner(); if(oneword->word=="(") { printf("error\n"); flag=0; } else { while(oneword->word==",") { oneword = scaner(); if(oneword->typenum!=84) {printf("error,miss id in defind\n");flag=0;retract();} add(oneword->word); oneword=scaner(); } if(oneword->word!=";") { printf("error,miss ;\n");flag=0; } oneword=scaner(); } } void S() //聲明語句 { int i; i=oneword->typenum; if(i==4||i==9||i==13||i==18||i==19||i==23) { oneword=scaner(); S_1(); } else {printf("error,in defind\n");flag=0;} } //---------------------------------------------------------------------------- void all_s1() { int i=oneword->typenum; if(i==84||i==14||i==16||i==34||i==8||i==4||i==9||i==18||i==19||i==23) { while(i==84||i==14||i==16||i==34||i==8||i==4||i==9||i==18||i==19||i==23) { switch(i) { case 4: case 9: case 13: case 18: case 19: case 23:S();i=oneword->typenum;break; case 84:E_E(); if(oneword->word==";") {oneword = scaner();} else {printf("error\n");flag=0;} i=oneword->typenum; break; default: printf("error,no sentence in all_s1\n");flag=0;break; } } } else {printf("error,no sentence in all_s1\n");flag=0;} } void all_s() { if(oneword->word=="{") { oneword = scaner(); all_s1(); if(oneword->word=="}") { oneword=scaner(); } else {printf("error,miss } in all_s\n");flag=0;} } else {all_s1();} } //---------------------------------------------------------------------------- int main() { int over = 1; int i=0; scanf("%[^#]s",input); p_input=0; i=0; oneword=scaner(); all_s(); //printf("word=%s\ntypenum=%d\n",oneword->word,oneword->typenum); if(flag==1) printf("success\n"); return 0; } // char m_getch() { ch=input[p_input]; p_input = p_input+1; return ch; } // void getbc() { while(ch==' '||ch==10) { ch=input[p_input]; p_input = p_input+1; } } // void concat() { token[p_token] = ch; p_token = p_token+1; token[p_token] = '\0'; } // int dline() { if(ch=='_') return 1; else return 0; } // int letter() { if((ch>='a'&&ch<='z')||(ch>='A'&&ch<='Z')) return 1; else return 0; } // int digit() { if(ch>='0'&&ch<='9') return 1; else return 0; } // int reserve() { int i=0; while(strcmp(rwtab[i],_KEY_WORD_END)) { if(!strcmp(rwtab[i],token)) { return i+1; } i=i+1; } return 84; } // void retract() { p_input = p_input-1; } // char *dtb() { return NULL; } WORD *scaner() { WORD *myword = new WORD; myword->typenum = 10; myword->word=""; p_token=0; m_getch(); getbc(); if(letter()||dline()) //標識符 { while(letter()||digit()||dline()) { concat(); m_getch(); } retract(); myword->typenum=reserve(); myword->word=token; return myword; } else if(digit()) //數字 { while(digit()) { concat(); m_getch(); } if(ch=='.') { concat(); m_getch(); while(digit()) { concat(); m_getch(); } } retract(); myword->typenum =85; myword->word = token; return myword; } else switch(ch) { case '(': myword->typenum=38; myword->word="("; return myword; break; case ')': myword->typenum=39; myword->word=")"; return myword; break; case '[': myword->typenum=40; myword->word="["; return myword; break; case '{': myword->typenum=41; myword->word="{"; return myword; break; case '}': myword->typenum=42; myword->word="}"; return myword; break; case '-': m_getch(); if(ch=='>') { myword->typenum=44; myword->word="->"; return myword; } if(ch=='-') { myword->typenum=51; myword->word="--"; return myword; } retract(); myword->typenum=50; myword->word="-"; return myword; break; case '.': myword->typenum=45; myword->word="."; return myword; break; case '!': m_getch(); if(ch=='=') { myword->typenum=64; myword->word="!="; return myword; } retract(); myword->typenum=46; myword->word="!"; return myword; break; case '~': myword->typenum=47; myword->word="~"; return myword; break; case '+': m_getch(); if(ch=='=') { myword->typenum=74; myword->word="+="; return myword; } if(ch=='+') { myword->typenum=49; myword->word="++"; return myword; } retract(); myword->typenum=48; myword->word="+"; return myword; break; case '*': m_getch(); if(ch=='=') { myword->typenum=75; myword->word="*="; return myword; } retract(); myword->typenum=52; myword->word="*"; return myword; break; case '&': m_getch(); if(ch=='&') { myword->typenum=54; myword->word="&&"; return myword; } if(ch=='=') { myword->typenum=70; myword->word="&="; return myword; } retract(); myword->typenum=53; myword->word="&"; return myword; break; case '/': m_getch(); if(ch=='/') { m_getch(); while(ch!='\n') { m_getch(); } myword->typenum=-2; myword->word=""; return myword; } if(ch=='*') { m_getch(); while(ch!='\0') { if(ch=='*') {m_getch(); if(ch=='/') { myword->typenum=-2; myword->word=""; return myword; } } m_getch(); } } if(ch=='=') { myword->typenum=76; myword->word="/="; return myword; } retract(); myword->typenum=55; myword->word="/"; return myword; break; case '%': m_getch(); if(ch=='=') { myword->typenum=77; myword->word="&="; return myword; } retract(); myword->typenum=56; myword->word="%"; return myword; break; case '<': m_getch(); if(ch=='=') { myword->typenum=61; myword->word="<="; return myword; } if(ch=='<') { m_getch(); if(ch=='=') { myword->typenum=79; myword->word="<<="; return myword; } retract(); myword->typenum=59; myword->word="<<"; return myword; } retract(); myword->typenum=57; myword->word="<"; return myword; break; case '>': m_getch(); if(ch=='=') { myword->typenum=62; myword->word=">="; return myword; } if(ch=='>') { m_getch(); if(ch=='=') { myword->typenum=78; myword->word=">>="; return myword; } retract(); myword->typenum=60; myword->word=">>"; return myword; } retract(); myword->typenum=58; myword->word=">"; return myword; break; case '=': m_getch(); if(ch=='=') { myword->typenum=63; myword->word="=="; return myword; } retract(); myword->typenum=83; myword->word="="; return myword; break; case '^': m_getch(); if(ch=='=') { myword->typenum=81; myword->word="^="; return myword; } retract(); myword->typenum=65; myword->word="^"; return myword; break; case '|': m_getch(); if(ch=='|') { myword->typenum=66; myword->word="||"; return myword; } retract(); myword->typenum=68; myword->word="|"; return myword; break; case '?': myword->typenum=67; myword->word="?"; return myword; break; case ',': myword->typenum=69; myword->word=","; return myword; break; case ';': myword->typenum=70; myword->word=";"; return myword; break; case ':': myword->typenum=71; myword->word=":"; return myword; break; case '\\': myword->typenum=72; myword->word="\\"; return myword; break; case '\0': myword->typenum=1000; myword->word="OVER"; return myword; break; case '#': myword->typenum=1000; myword->word="OVER"; return myword; break; default: myword->typenum=-1; myword->word="ERROR"; return myword; } }
測試結果與分析:
操做說明:1鍵盤輸入; 2能夠輸入多行; 3以#號結束。
測試數據 1: int a;#
輸出:success
分析:輸入爲正確的聲明語句,因此輸出success表正確輸入,#是結束符。
測試數據 2:
Int a, ;
int ;
int #
輸出:
error,miss id in defind
error,miss element in defind
error,miss element in defind
error,miss ;
分析:第一行的輸入中少了一個變量名,在第二行的輸入中也是少了一個變量名,在第三行的輸入中少了一個變量名和一個‘;’符,程序報了4個錯,並作了相應的錯誤恢復。
測試數據 3:a=2+-3;#
輸出:
a no defind
error,extra char
(1) t1 = 2 + 3
(2) a = t1
分析:變量名a沒有提早聲明就使用,程序報了1個錯;‘+’號和‘-’號之間少了一個變量,程序再報一個錯,並作了錯誤恢復輸出2條四元式序列。
測試數據 4:
int a;
a 2+3;#
輸出:
error,miss =
(1) t1 = a + 3
(2) a = t1
分析:變量以後缺乏‘=’號,程序報錯,並作了錯誤恢復。
測試數據 5:
int a;
a= ; #
輸出:
miss id
(1) a =
分析:賦值語句的‘=’後面至少應該有一個變量,輸入裏‘=’後面是‘;’號表明一行結束,因此至少缺乏一個變量名,程序報錯並作了錯誤恢復。
測試數據 6:
int a;
a=3+((a+4;#
輸出:
error,extra char
(1) t1 = a + 4
error,miss ) in F()
error,miss ) in F()
(2) t2 = 3 + null
(3) a = t2
分析:輸入的賦值語句裏少了2個‘)’號,因此程序報2個錯。
測試數據 7:
int a;
a=3+4*(5+6));#
輸出:
(1) t1 = 5 + 6
(2) t2 = 4 * t1
(3) t3 = 3 + t2
(4) a = t3
error
分析:輸入的賦值語句最後多了1個‘)’號,程序報了一個錯。賦值語句a=3+4*(5+6)的計算順序是先計算括號裏的加法,再計算括號外的乘法,最後計算括號外的加法,輸出結果代表計算順序是正確的。
測試數據 8:
int a;
a=a+4 4;#
輸出:
error,extra id
(1) t1 = a + 4
(2) a = t1
分析:賦值語句的最後多了1個數字,程序報了1個錯。
測試數據 9:
int a;
a=3+4*(5+7);#
輸出:
(1) t1 = 5 + 7
(2) t2 = 4 * t1
(3) t3 = 3 + t2
(4) a = t3
success
分析:輸入的是正確的聲明語句和賦值語句,程序輸出的四元式序列也是正確的。
測試數據 10:
int a;
a=(3+5)-4*+)*4+99*8;
int b;
a b+3;
a=b*(b+c)+4;
a=b 3;#
輸出:
(1) t1 = 3 + 5
error,extra char
error,extra char
error,extra char
(2) t2 = 4 * 4
(3) t3 = t1 - t2
(4) t4 = 99 * 8
(5) t5 = t3 + t4
(6) a = t5
error,miss =
(7) t6 = b + 3
(8) a = t6
c no defind
(9) t7 = b + c
(10) t8 = b * t7
(11) t9 = t8 + 4
(12) a = t9
error,extra id
(13) a = b
分析:這裏輸入了多條聲明語句和多條賦值語句,聲明語句和賦值語句的順序也有交叉,其中第一條賦值語句裏有3個錯誤,第二條賦值語句裏少了一個‘=’號,第三條賦值語句裏的變量c沒有聲明,第四條賦值語句有一個錯誤,程序作了相應錯誤恢復和四元式序列的輸出。