上週幫一個剛剛找到本身方向的好友作了一個編譯原理的課程實驗,要求是作一個詞法分析器,具體要求以下:數組
對下述文法和單詞表定義的語言設計編制一個語法分析器。ide
(1)單詞符號及種別表 函數
單詞符號編碼 |
種別編碼spa |
單詞值設計 |
maintoken |
1ci |
|
int字符串 |
2input |
|
float |
3 |
|
double |
4 |
|
char |
5 |
|
if |
6 |
|
else |
7 |
|
do |
8 |
|
while |
9 |
|
l(l|d)* |
10 |
內部字符串 |
( +|-|ε ) dd*(.dd* | ε)( e ( +|-|ε ) dd*|ε) |
20 |
二進制數值表示 |
= |
21 |
|
+ |
22 |
|
- |
23 |
|
* |
24 |
|
/ |
25 |
|
( |
26 |
|
) |
27 |
|
{ |
28 |
|
} |
29 |
|
, |
30 |
|
; |
31 |
|
> |
32 |
|
>= |
33 |
|
< |
34 |
|
<= |
35 |
|
== |
36 |
|
!= |
37 |
(2)語法結構定義
<表達式> ::= <項>{ +<項>|-<項>}
<項> ::= <因子>{*<因子>|/<因子>}
<因子> ::=ID|num|(<表達式>)
num::= ( +|-|ε ) 數字數字*(.數字數字* | ε)( e ( +|-|ε ) 數字數字*|ε)
ID::=字母(字母|數字)*
字母::=a|b|c…|z|A|B|C…|Z
數字::=0|1|2…|9
原本不是個很難的程序,但我仍是寫的較爲複雜,有這方面心得的朋友能夠和我聯繫,一塊兒探討一下吧~~~下面我把本身簡陋的代碼貼在下面:
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #define INVALUECHAR '.' //非法字符,用於給token賦初值
- #define MAXCHARNUM 80 //接受字符串輸入的長度
- #define TOKENLENGTH 15 //詞組的最大長度
- #define RETABLENGTH 9 //關鍵字的個數
- #define TRUE 0
- #define FALSE -1
- int syn, sum = 0;
- char token[TOKENLENGTH] = { '\0' }; //記錄找到的詞組
- char *rwtab[RETABLENGTH] = { "main", "int", "float", "double", "char", "if", "else", "do", "while" }; //記錄關鍵字
- int IsNumber(char num); //判斷num是否爲數字
- int GetSum(int i); //求token前i個數的加權和
- int HoldE(int *local, char *string, int num, int *i); //讀取浮點數時進行遇到符號e的操做
- int NumberOP(int *local, char *string, int num, int *i, char c); //讀取浮點數時進行遇到非數字字符的操做
- int Analyze(char *string, int num); //搜尋最長字串的函數
- int main()
- {
- int p = 0, i, j;
- char string[MAXCHARNUM] = { '\0' };
- printf("please input a string:\n");
- //讀取輸入的字符串,超過長度限定就讓用戶從新輸入
- do {
- scanf("%c", &string[p++]);
- if (MAXCHARNUM <= p) {
- printf("\nYou have input more than 80 characters!\n");
- printf("Please input again:\n");
- string[0] = '\0';
- p = 0;
- }
- }while ('\n' != string[p-1]); //字符串以'\n'結束
- //遍歷輸入的字符串,忽略回車和空格
- for (i = 0; i < p; i++) {
- if ((' ' == string[i]) || ('\n' == string[i]))
- continue;
- else {
- j = Analyze(string, i); //當出現錯誤時Analyze返回FALSE,正確時返回找到字串的最後一個字符的位置
- if (FALSE == j) {
- printf("\nYour input is wrong!\n");
- return 1;
- }
- i = j; //將i賦值爲查找到字串的最後一個字符的位置
- switch (syn) {
- //-1爲出現非法字符,0爲正常結束,11爲整型數的sum輸出
- case -1: { printf("\nYou have input illegal characters!\nSo it ended!\n");
- break; }
- case 0: { break; }
- case 20: { if (0 != sum) {
- printf("( %-5d%15d )\n", syn, sum);
- sum = 0;
- }
- else
- printf("( %-5d%15s )\n", syn, token);
- break; }
- default: { printf("( %-5d%15s )\n", syn, token);
- break; }
- }
- }
- }
- return 0;
- }
- //判斷num是否爲數字
- int IsNumber(char num)
- {
- if (('0' <= num) && ('9' >= num))
- return TRUE;
- else
- return FALSE;
- }
- //求token前i個數字的加權和
- int GetSum(int i)
- {
- int sum, j;
- char a[TOKENLENGTH];
- for (j = 0; j < i; j++)
- a[j] = token[j];
- a[j] = '\0';
- sum = atoi(a);
- return sum;
- }
- //讀取浮點數時遇到e的處理
- int HoldE(int *local, char *string, int num, int *i)
- {
- char ch = 'e';
- token[(*i)++] = ch;
- ch = string[++num];
- //e的後一位爲數字
- if (TRUE == IsNumber(ch)) {
- token[(*i)++] = ch;
- ch = string[++num];
- while (TRUE == IsNumber(ch)) {
- token[(*i)++] = ch;
- ch = string[++num];
- }
- *local = num - 1; //改變string中下標須要指向的位置
- return TRUE;
- }
- //e的後一位爲'+'或'-'
- else if (('+' == ch) || ('-' == ch)) {
- token[(*i)++] = ch;
- ch = string[++num];
- if (TRUE != IsNumber(ch))
- return FALSE;
- while (TRUE == IsNumber(ch)) {
- token[(*i)++] = ch;
- ch = string[++num];
- }
- *local = num - 1;
- return TRUE;
- }
- //其餘狀況就出現錯誤
- else
- return FALSE;
- }
- //讀取浮點數時遇到第一個非數字的字符的處理
- int NumberOP(int *local, char *string, int num, int *i, char c)
- {
- int result = TRUE;
- char ch = c;
- //這個非數字字符爲小數點
- if ('.' == ch) {
- token[(*i)++] = ch;
- ch = string[++num];
- //小數點後一位若是不是數字就出現錯誤
- if (TRUE != IsNumber(ch))
- return FALSE;
- while (TRUE == IsNumber(ch)) {
- token[(*i)++] = ch;
- ch = string[++num];
- }
- //小數點以後一串數字事後遇到e
- if ('e' == ch) {
- result = HoldE(local, string, num, i);
- if (FALSE == result)
- return FALSE;
- }
- else
- *local = num - 1;
- }
- //這個字符爲e
- else if ('e' == ch) {
- result = HoldE(local, string, num, i);
- if (FALSE == result)
- return FALSE;
- }
- //其餘狀況就不用再讀取了,直接修改local並返回
- else
- *local = num - 1;
- syn = 20;
- return TRUE;
- }
- //查詢最長字串
- int Analyze(char *string, int num)
- {
- int m, i = 0, local = num, n, flag = 0;
- int result = TRUE;
- char ch;
- for (m = 0; m < TOKENLENGTH; m++)
- token[m++] = INVALUECHAR;
- ch = string[num];
- //第一種狀況爲字符
- if (((ch <= 'z') && (ch >= 'a')) || ((ch <= 'Z') && (ch >= 'A'))) {
- while(((ch <= 'z') && (ch >= 'a')) || (( ch <= 'Z') && (ch >= 'A')) || (TRUE == IsNumber(ch))) {
- token[i++] = ch;
- //token長度限定
- if (i >= (TOKENLENGTH - 1))
- return FALSE;
- else
- ch = string[++num];
- }
- local = num - 1;
- //syn預設爲l(l | d)*的形式
- syn = 10;
- token[i] = '\0';
- //遍歷關鍵字數組,若是找到就修改syn並
- for (n = 0; n < RETABLENGTH; n++)
- if (0 == strcmp(token, rwtab[n])) {
- syn = n + 1;
- break;
- }
- }
- //第二種狀況爲遇到'+'、'-'或數字
- else if (('+' == ch) || ('-' == ch) || (TRUE == IsNumber(ch))) {
- token[i++] = ch;
- //若是爲數字就用flag標記一下
- if (TRUE == IsNumber(ch))
- flag = 1;
- ch = string[++num];
- //下一位也爲數字
- if (TRUE == IsNumber(ch)) {
- token[i++] = ch;
- ch = string[++num];
- while (TRUE == IsNumber(ch)) {
- token[i++] = ch;
- ch = string[++num];
- }
- //遇到第一個
- result = NumberOP(&local, string, num, &i, ch);
- if (FALSE == result)
- return FALSE;
- }
- //下一位不爲數字要分狀況,當第一位爲數字時執行下列操做,
- //第一位爲'+'、'-'時,下一位必須爲數字
- else if (1 == flag) {
- result = NumberOP(&local, string, num, &i, ch);
- if (FALSE == result)
- return FALSE;
- }
- //當第一位爲'+'、'-'時,下一位不爲數字證實'+'、'-'已是最長的字串了
- else {
- if ('+' == token[i-1])
- syn = 22;
- else
- syn = 23;
- }
- }
- else {
- switch (ch) {
- case '=': { token[i++] = ch;
- ch = string[++num];
- if ('=' == ch) {
- syn = 36;
- token[i++] = ch;
- local = num;
- break;
- }
- syn = 21;
- break; }
- case '*': { syn = 24;
- token[i++] = ch;
- break; }
- case '/': { syn = 25;
- token[i++] = ch;
- break; }
- case '(': { syn = 26;
- token[i++] = ch;
- break; }
- case ')': { syn = 27;
- token[i++] = ch;
- break; }
- case '{': { syn = 28;
- token[i++] = ch;
- break; }
- case '}': { syn = 29;
- token[i++] = ch;
- break; }
- case ',': { syn = 30;
- token[i++] = ch;
- break; }
- case ';': { syn = 31;
- token[i++] = ch;
- break; }
- case '>': { token[i++] = ch;
- ch = string[++num];
- if ('=' == ch) {
- syn = 33;
- token[i++] = ch;
- local = num;
- break;
- }
- syn = 32;
- break; }
- case '<': { token[i++] = ch;
- ch = string[++num];
- if ('=' == ch) {
- syn = 35;
- token[i++] = ch;
- local = num;
- break;
- }
- syn = 34;
- break; }
- case '!': { token[i++] = ch;
- ch = string[++num];
- if ('=' != ch) {
- return FALSE;
- }
- syn = 37;
- token[i++] = ch;
- local = num;
- break; }
- case '\n': { syn = 0;
- token[i++] = ch;
- break; }
- default: { syn = -1;
- break; }
- }
- }
- token[i] = '\0';
- return local;
- }