簡單的詞法分析器的實現

實驗一 《實現C語言小子集程序的詞法分析》實驗要求git

以表1的小語言爲例實現詞法分析緩存

 

設計單詞屬性值,各種表格(表示標識符表、常量表),單詞符號及機內表示
編碼實現詞法分析程序測試

採用文本輸入和輸出的方式。程序從名爲「test.txt」的文件中讀入代碼,將詞法分析的結果保存到「output.txt」中。編碼

要求實現:spa

(1)對正確源程序的識別;設計

(2)對包含有註釋//和/* */的源程序的識別;code

(3)對包含錯誤標識符的源程序的識別。blog

 

測試樣例:test.txt token

int i = 3;內存

int 5j = 10;

int m = max(i, j);

while(i<m) do

{

i = i+ 1;

}

 

void max(int x, int y)

{

int 3temp = 0;

if(x > y)

temp = x;

else

temp = y;

return temp;

}


輸出:out.txt


<26,->, <1,i>, <16,->, <2,3>, <23,->,

<26,->, LexicalError, <16,->, <2,10>, <23,->,

<26,->, <1,m>, <16,->, <1,max>, <17,->, <1,i>, <24,->, <1,j>, <18,->, <23,->,

<31,->, <17,->, <1,i>, <8,->, <1,m>, <18,->, <32,->,

<21,->,

<1,i>, <16,->, <1,i>, <3,->, <2,1>,<23,->,

<22,->,

<25,->, <1,max>, <17,->, <26,->, <1,x>, <24,->, <26,->, <1,y>, <18,->,

<21,->,

<26,->, LexicalError ,<16,->, <2,0>, <23,->,

<29,->, <17,->, <1,x>, <10,->, <1,y>, <18,->,

<1,temp>, <16,->, <1,x>, <23,->,

<30,->

<1,temp>, <16,->, <1,y>,

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define BASIZE 20
#define EOS '\0'
#define True 1
#define False 0

#define key_num 9 //關鍵字數量
#define buff_size 1024

char buff[buff_size];
int lineno = 1; //行數
int tokenval = 1; //數字
char lexbuf[BASIZE]; //字符緩存區
int error[10]; //錯誤記錄

//關鍵字
char keyWord[key_num][10]={
"void","int","float","char","if","else",
"while","do","return"
};

 

//數字判斷
int isDigit(char ch)
{
if(ch>='0'&&ch<='9')
return True;
else
return False;
}


//字母判斷
int isAlpha(char ch)
{
if((ch>='a' && ch<='z') || (ch>='A' && ch <='Z'))
return True;
else
return False;
}


//操做符
int isOperator(char ch)
{
if(ch == '+'||ch=='-'||ch=='*'||ch=='/'||ch=='>'||ch=='<'||ch=='!'||ch=='=')
return True;
else
return False;
}


//分隔符
int isDelimater(char ch)
{
if(ch=='('||ch==')'||ch=='['||ch==']'||ch=='{'||ch=='}'||ch==';'||ch==',')
return True;
else
return False;
}

 


/×
詞發分析器
@auth: finch
@time: 2017/4/1
×/

int lexan()
{
int error_num = 0;
FILE *fp_in=NULL;
FILE *fp_out=NULL;

if((fp_in=fopen("test.txt","r"))!=NULL) //讀取源碼進入緩存buff中
{
char ch=NULL;
int x =0;
while(ch!=EOF) //文件結束符
{
printf("%d\n",x);
ch=fgetc(fp_in);
buff[x]= ch;
x++;
if(x>=buff_size)
{

printf("Error");
exit(-1);
}
}
buff[x]=EOF;
fclose(fp_in);
}
else
{
printf("Error:can't find the file");
exit(-1);
}


if((fp_out=fopen("output.txt","w+"))==NULL) //詞法分析的輸出
{
printf("Error:can't create a file");
exit(-1);
}


int t=0;
while(buff[t]!=EOF) //讀讀入內存的源碼進行分析
{
if (buff[t] ==' '||buff[t]=='\t')
; //刪除空格
else if(buff[t]== '\n') //換行
{

fprintf(fp_out,"\n");
printf("\n");
lineno = lineno +1; //行數

}
else if(isDigit(buff[t])) //數字
{
tokenval = buff[t] - '0';
t++;
while(isDigit(buff[t])) //整數
{
tokenval = tokenval *10 + buff[t] -'0';
t++;
}
if(isAlpha(buff[t])) //數字+字母 :報錯
{
fprintf(fp_out," LexicalError,");
error[error_num++]=lineno; //出錯行記錄
printf("<error:%d,%c>",tokenval,buff[t]);
}
else
{
t--;
fprintf(fp_out,"<%d,->,",2);
printf("<%d:%d>,",2,tokenval);
}

}
else if(True==isAlpha(buff[t])||buff[t]=='_') //標識符或者關鍵字
{

int b = 0;
while(isAlpha(buff[t])||isDigit(buff[t])||buff[t]=='_') //字母或數字
{
lexbuf[b] = buff[t];
t++;
b = b +1;
if(b >=BASIZE)
printf("compiler error");
}
t--;
lexbuf[b] ='\0';
if(True==isKeyWord(lexbuf)) //關鍵字判斷
{
int code = 2;
if(0==strcmp(lexbuf,"void")) //查找對應代碼
code =25;
else if(0==strcmp(lexbuf,"int"))
code =26;
else if(0==strcmp(lexbuf,"float"))
code =27;
else if(!strcmp(lexbuf,"char"))
code =28;
else if(!strcmp(lexbuf,"if"))
code =29;
else if(!strcmp(lexbuf,"else"))
code =30;
else if(!strcmp(lexbuf,"while"))
code =31;
else if(!strcmp(lexbuf,"do"))
code =32;
else if(!strcmp(lexbuf,"return"))
code =33;
else if(!strcmp(lexbuf,"main"))
code =34;
else if(!strcmp(lexbuf,"printf"))
code =35;
fprintf(fp_out,"<%d,->,",code);
printf("<%d:%s>,",code,lexbuf);
}
else
{ //標識符輸出

fprintf(fp_out,"<%d,%s>,",1,lexbuf);
printf("<%d:%s>,",1,lexbuf);
}

}
else if(isDelimater(buff[t])) //分隔符
{
int code = 0;
if(buff[t]==',')
code=24;
else if(buff[t]==';')
code=23;
else if(buff[t]=='{')
code=21;
else if(buff[t]=='}')
code=22;
else if(buff[t]=='(')
code=17;
else if(buff[t]==')')
code=18;
else if(buff[t]=='[')
code=19;
else if(buff[t]==']')
code=20;
fprintf(fp_out,"<%d,->,",code);
printf("<%d,%c>,",code,buff[t]);

}
else if(isOperator(buff[t])) //運算符
{
int code = 0;
if(buff[t]=='+')
code=3;
else if(buff[t]=='-')
code=4;
else if(buff[t]=='*')
code=5;
else if(buff[t]=='/')
{
t++;
if(buff[t]=='*') //多行註釋判斷
{
while(buff[t]!='/')
{
t++; //忽略註釋
}
}
else if(buff[t]=='/') //單行註釋
{
while(buff[t]!='\n')
{

t++;
}
}else{
code = 4;
t--;
}

}

else if(buff[t]=='<')
{
t++;
if(buff[t]=='=') //==
{

}
else
{
t--;
code = 8; // <
}
}

else if(buff[t]=='>')
{
t++;
if(buff[t]=='=') // >=
{
code =11;
}
else{
code=10;
t--;
}
}

else if(buff[t]=='=')
{
t++;
if(buff[t]=='=') // ==
{
code =12;
}
else{
code=16; // =
t--;
}
}

fprintf(fp_out,"<%d,->,",code);
printf("<%d,%c>",code,buff[t]);
}
t++;
}

if(error_num!=0) //錯誤輸出
{
fprintf(fp_out,"LexicalError(s) on line(s) ");
for(int i=0;i<error_num;i++)
if(i!=error_num)
fprintf(fp_out,"%d,",error[i]);
else
fprintf(fp_out,"%d",error[i]);

}
fclose(fp_out);
return 0;
}


int main() //主程序入口
{
lexan();
return 0;
}
相關文章
相關標籤/搜索