詞法分析器的設計與實現

時間 2019-11-08

標籤詞法分析器設計實現简体版

原文原文鏈接

詞法分析器介紹

詞法分析是從左到右掃描每行源程序的符號，拼成單詞，換成統一的機內表示形式——TOKEN字，送給語法分析程序。c++

TOKEN字是一個二元式：（單詞種別碼，自身值）。單詞自身值按以下規則給出：git

1.標識符的自身值是他在符號表的入口位置。

                               2.常數的自身值是常數自己（或者其餘二進制數值）。

                               3.關鍵字和界限符的自身值爲自己。
複製代碼

詞法分析器功能

1.輸入：字符串（帶進行詞法分析的源程序），可從鍵盤直接輸入或從文件讀入。編程

輸出：由（種別碼，自身值）所組成的二元組序列。數組

單詞的種別碼是語法分析須要的信息，可用整數編碼表示，例如：標識符的種別碼爲1，常數爲2，保留字爲3，運算符爲4，界符爲5。學習

單詞的自身值是編譯其餘階段須要的信息，標識符的自身是標識符在符號表入口，其餘類型單詞的自身值是其自己。編碼

例如：輸入：if i >= 15 then x := y ;spa

輸出：

                   （3，if）

                   （1，0）//i符號的入口爲0

                   （4，>=）

                   （2，15）

                   （3，then）

                   （1，1）//x符號的入口爲1

                   （4，：=）

                   （1，2）//y符號的入口爲2

                   （5，；）
複製代碼

2.功能：code

a.濾過空格。字符串

b.識別保留字：if then else while do 等。get

c.識別標識符：<字母>(<字母>|<數字>)。

d.識別整數：0|(1|2|3|4|5|6|7|8|9)(0|1|2|3|4|5|6|7|8|9)。

e.識別典型的運算符和分隔符，例如：+ - * / > >= <= ( ) ;

3.具備必定的錯誤處理功能，例如：能檢查出程序語言的字符集之外的非法字符。

源碼附帶註釋

#include<string>
#include<stdlib.h>
#include<stdio.h>
using namespace std;

bool isLetter(char ch){//isLetter 標識符
	if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) return true;
	else return false;
}

bool isDigit(char ch){//isDigit 常數
	if (ch >= '0' && ch <= '9') return true;
	else return false;
}

bool isOperators(char ch){// isOperators 運算符
	if (ch == '+' || ch == '*' || ch == '-' || ch == '/' || ch == '=' || ch == ':' || ch == '<' || ch == '>') return true;
	else return false;
}
bool isDelimiter(char ch){// isDelimiter 界符
	if (ch == ',' || ch == ';' || ch == '.' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '{' || ch == '}' || ch == '#') return true;
	else return false;
}
bool isBlank(char ch){
	if (ch == ' ' || ch == '\t') return true;
	else return false;
}
char  key[34][10] = { "main",
"auto", "short", "int", "long", "float", "double", "char", "struct"
, "union", "enum", "typedef", "const", "unsigned", "signed", "extern", "register"
, "static", "volatile", "void", "if", "else", "switch", "case", "for"
, "do", "while", "goto", "continue", "break", "default", "sizeof", "return","then"};


void main()
{
back:
	char Sourcecode[100] = "";
	int number=0;
	int h=0;
	char hh[100][10]={'\0'};
	//聲明變量
	printf("請輸入程序段，標識符爲1，常數爲2，關鍵字爲3，運算符4，界符5\n");
	gets(Sourcecode);
	//讀入程序段放入firstcode
	
	for (int i = 0; i < 100; i){
		//char delimiter[2] = "";
		int j = 0;  //letter  標識符
		int l = 0;   //digit  常數
		int k = 0;   //operators  運算符

		int b = 0;   //類型編號
		char num[10] = "";
		char word[10] = "";  
		if (isBlank(Sourcecode[i]) == 1){
			i += 1;
		}//去空格
		else{
			if (isLetter(Sourcecode[i]) == 1)         //是否爲字母
			{                                             //是
				
				do
				{
					if (j < 10){
						word[j] = Sourcecode[i];
						j++;
					}
				} while (isDigit(Sourcecode[++i]) == 1|| isLetter(Sourcecode[i]) == 1);//常數||標識符

				//*~~~*
				int a = 0;
				while (a < 34){
					if (strcmp(key[a], word) == 0){
						b = 3;
						a++;
						break;
					}
					else {
						b = 1;
						a++;
						//否
					}
				}
				if (b == 3){
					printf("(3,%s)\n", word);
				}//~~~~~~~~~~~~~~~~~~~~~~~~~~~
				if (b == 1){
					int q=0;
					for(int qq=0;strcmp(hh[qq],"")!=0;qq++){
						if(strcmp(word,hh[qq])==0){
							q=1;break;
						}
					}
					if(q==1){
						printf("(1,%d)\n",qq);
						q=0;
					}else{
						for(qq=0;qq<j;qq++){
							hh[h][qq]=word[qq];
						}
						printf("(1,%d)\n",h);
						h++;
					}
				/*	int qq,yy=0;
					if(h==0){
						for(qq=0;qq<j;qq++){
							hh[0][qq]=word[qq];
						}
						printf("(1,%d)\n",h);
						h++;
					}else{
						for(qq=0;qq<h;qq++){
							if(strcmp(hh[qq], word)==0){
								yy=1;
								break;
							}
						}
						if(yy==0){
							for(qq=0;qq<j;qq++){
								hh[h][qq]=word[qq];

							}
							printf("(1,%d)\n",h);
							h++;
							yy=0;
						}
						else{
							for(qq=0;qq<h;qq++){
								if(strcmp(hh[qq], word)==0)
									printf("(1,%d)\n",qq);
								yy=0;
							}
						}
					}*/
					//
				}
				//~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			}
			else if (isDigit(Sourcecode[i]) == 1)//常數
			{int c = 0;
				while (isDigit(Sourcecode[i])){
					
					num[c] = Sourcecode[i];
					i += 1;
					c++;
				}
				printf("(2,%s)\n", num);
			}
			else if (isDelimiter(Sourcecode[i]) == 1){//界符
				
				printf("(5,%c)\n", Sourcecode[i]);
				i++;
			}
			else if (isOperators(Sourcecode[i]) == 1){//運算符

				if (Sourcecode[i] == ':' || Sourcecode[i] == '>' || Sourcecode[i] == '<'&&Sourcecode[i + 1] == '='){
					printf("(4,%c%c)\n", Sourcecode[i], Sourcecode[i + 1]);
					i += 2;
				}
				else{
					printf("(4,%c)\n", Sourcecode[i]);
					i+= 1;
				}
			}
			else{
				if(Sourcecode[i]!='\0')
					printf("(error,%c)\n",Sourcecode[i]);
				if(Sourcecode[i+1]!='\0'){
					
					i+=1;}
				else
					i=100;
			}
		}

	
	}

	goto back;

}
複製代碼