實驗一 《實現C語言小子集程序的詞法分析》實驗要求git
以表1的小語言爲例實現詞法分析緩存
設計單詞屬性值,各種表格(表示標識符表、常量表),單詞符號及機內表示
編碼實現詞法分析程序測試
採用文本輸入和輸出的方式。程序從名爲「test.txt」的文件中讀入代碼,將詞法分析的結果保存到「output.txt」中。編碼
要求實現:spa
(1)對正確源程序的識別;設計
(2)對包含有註釋//和/* */的源程序的識別;code
(3)對包含錯誤標識符的源程序的識別。blog
測試樣例:test.txt token
int i = 3;內存
int 5j = 10;
int m = max(i, j);
while(i<m) do
{
i = i+ 1;
}
void max(int x, int y)
{
int 3temp = 0;
if(x > y)
temp = x;
else
temp = y;
return temp;
}
輸出:out.txt
<26,->, <1,i>, <16,->, <2,3>, <23,->,
<26,->, LexicalError, <16,->, <2,10>, <23,->,
<26,->, <1,m>, <16,->, <1,max>, <17,->, <1,i>, <24,->, <1,j>, <18,->, <23,->,
<31,->, <17,->, <1,i>, <8,->, <1,m>, <18,->, <32,->,
<21,->,
<1,i>, <16,->, <1,i>, <3,->, <2,1>,<23,->,
<22,->,
<25,->, <1,max>, <17,->, <26,->, <1,x>, <24,->, <26,->, <1,y>, <18,->,
<21,->,
<26,->, LexicalError ,<16,->, <2,0>, <23,->,
<29,->, <17,->, <1,x>, <10,->, <1,y>, <18,->,
<1,temp>, <16,->, <1,x>, <23,->,
<30,->
<1,temp>, <16,->, <1,y>,
#include <stdio.h> #include <stdlib.h> #include <string.h> #define BASIZE 20 #define EOS '\0' #define True 1 #define False 0 #define key_num 9 //關鍵字數量 #define buff_size 1024 char buff[buff_size]; int lineno = 1; //行數 int tokenval = 1; //數字 char lexbuf[BASIZE]; //字符緩存區 int error[10]; //錯誤記錄 //關鍵字 char keyWord[key_num][10]={ "void","int","float","char","if","else", "while","do","return" }; //數字判斷 int isDigit(char ch) { if(ch>='0'&&ch<='9') return True; else return False; } //字母判斷 int isAlpha(char ch) { if((ch>='a' && ch<='z') || (ch>='A' && ch <='Z')) return True; else return False; } //操做符 int isOperator(char ch) { if(ch == '+'||ch=='-'||ch=='*'||ch=='/'||ch=='>'||ch=='<'||ch=='!'||ch=='=') return True; else return False; } //分隔符 int isDelimater(char ch) { if(ch=='('||ch==')'||ch=='['||ch==']'||ch=='{'||ch=='}'||ch==';'||ch==',') return True; else return False; } /× 詞發分析器 @auth: finch @time: 2017/4/1 ×/ int lexan() { int error_num = 0; FILE *fp_in=NULL; FILE *fp_out=NULL; if((fp_in=fopen("test.txt","r"))!=NULL) //讀取源碼進入緩存buff中 { char ch=NULL; int x =0; while(ch!=EOF) //文件結束符 { printf("%d\n",x); ch=fgetc(fp_in); buff[x]= ch; x++; if(x>=buff_size) { printf("Error"); exit(-1); } } buff[x]=EOF; fclose(fp_in); } else { printf("Error:can't find the file"); exit(-1); } if((fp_out=fopen("output.txt","w+"))==NULL) //詞法分析的輸出 { printf("Error:can't create a file"); exit(-1); } int t=0; while(buff[t]!=EOF) //讀讀入內存的源碼進行分析 { if (buff[t] ==' '||buff[t]=='\t') ; //刪除空格 else if(buff[t]== '\n') //換行 { fprintf(fp_out,"\n"); printf("\n"); lineno = lineno +1; //行數 } else if(isDigit(buff[t])) //數字 { tokenval = buff[t] - '0'; t++; while(isDigit(buff[t])) //整數 { tokenval = tokenval *10 + buff[t] -'0'; t++; } if(isAlpha(buff[t])) //數字+字母 :報錯 { fprintf(fp_out," LexicalError,"); error[error_num++]=lineno; //出錯行記錄 printf("<error:%d,%c>",tokenval,buff[t]); } else { t--; fprintf(fp_out,"<%d,->,",2); printf("<%d:%d>,",2,tokenval); } } else if(True==isAlpha(buff[t])||buff[t]=='_') //標識符或者關鍵字 { int b = 0; while(isAlpha(buff[t])||isDigit(buff[t])||buff[t]=='_') //字母或數字 { lexbuf[b] = buff[t]; t++; b = b +1; if(b >=BASIZE) printf("compiler error"); } t--; lexbuf[b] ='\0'; if(True==isKeyWord(lexbuf)) //關鍵字判斷 { int code = 2; if(0==strcmp(lexbuf,"void")) //查找對應代碼 code =25; else if(0==strcmp(lexbuf,"int")) code =26; else if(0==strcmp(lexbuf,"float")) code =27; else if(!strcmp(lexbuf,"char")) code =28; else if(!strcmp(lexbuf,"if")) code =29; else if(!strcmp(lexbuf,"else")) code =30; else if(!strcmp(lexbuf,"while")) code =31; else if(!strcmp(lexbuf,"do")) code =32; else if(!strcmp(lexbuf,"return")) code =33; else if(!strcmp(lexbuf,"main")) code =34; else if(!strcmp(lexbuf,"printf")) code =35; fprintf(fp_out,"<%d,->,",code); printf("<%d:%s>,",code,lexbuf); } else { //標識符輸出 fprintf(fp_out,"<%d,%s>,",1,lexbuf); printf("<%d:%s>,",1,lexbuf); } } else if(isDelimater(buff[t])) //分隔符 { int code = 0; if(buff[t]==',') code=24; else if(buff[t]==';') code=23; else if(buff[t]=='{') code=21; else if(buff[t]=='}') code=22; else if(buff[t]=='(') code=17; else if(buff[t]==')') code=18; else if(buff[t]=='[') code=19; else if(buff[t]==']') code=20; fprintf(fp_out,"<%d,->,",code); printf("<%d,%c>,",code,buff[t]); } else if(isOperator(buff[t])) //運算符 { int code = 0; if(buff[t]=='+') code=3; else if(buff[t]=='-') code=4; else if(buff[t]=='*') code=5; else if(buff[t]=='/') { t++; if(buff[t]=='*') //多行註釋判斷 { while(buff[t]!='/') { t++; //忽略註釋 } } else if(buff[t]=='/') //單行註釋 { while(buff[t]!='\n') { t++; } }else{ code = 4; t--; } } else if(buff[t]=='<') { t++; if(buff[t]=='=') //== { } else { t--; code = 8; // < } } else if(buff[t]=='>') { t++; if(buff[t]=='=') // >= { code =11; } else{ code=10; t--; } } else if(buff[t]=='=') { t++; if(buff[t]=='=') // == { code =12; } else{ code=16; // = t--; } } fprintf(fp_out,"<%d,->,",code); printf("<%d,%c>",code,buff[t]); } t++; } if(error_num!=0) //錯誤輸出 { fprintf(fp_out,"LexicalError(s) on line(s) "); for(int i=0;i<error_num;i++) if(i!=error_num) fprintf(fp_out,"%d,",error[i]); else fprintf(fp_out,"%d",error[i]); } fclose(fp_out); return 0; } int main() //主程序入口 { lexan(); return 0; }