【編譯原理】c++實現自下而上語法分析器

時間 2019-11-20

標籤編譯原理 c++ 實現自下而上語法分析器欄目 C&C++ 简体版

原文原文鏈接

寫在前面：本博客爲本人原創，嚴禁任何形式的轉載！本博客只容許放在博客園（.cnblogs.com），若是您在其餘網站看到這篇博文，請經過下面這個惟一的合法連接轉到原文！html

本博客全網惟一合法URL：http://www.cnblogs.com/acm-icpcer/p/9107838.htmlios

　　基於C++語言實現的PL/0語言的算術表達式的自下而上的語法分析程序。該語言的其餘語法實現思想與此一致，故不贅述。c++

　　運行此程序前，必須先將代碼經過：【編譯原理】c++實現詞法分析器的詞法分析，生成詞法表（詞法表是txt文件，爲了語法分析成功，務必刪除文件中最後空着的一行，即文件末尾不能夠留空白行）。生成的該詞法表爲此程序的必要輸入。git

　　產生式：github

　　S->X(AX)*|AX(AX)*
   X->Y(MY)*
   Y->I|N|(S)
   A->+|-
   M->*|/
   C->=|#|<|<=|>|>= web

　　進行自下而上的語法分析必定比自上而下要難。咱們知道，作自下而上的語法的分析的核心在於「尋找可歸約串」（即術語所說的「句柄」），並且要有必定的「向前展望性」，以防止在能夠歸約但卻不該該歸約的地方進行歸約動做而不是繼續移進下一個終結符或者非終結符。因此編譯原理的語法分析作LR分析的核心目標就是能精確地控制計算機程序對待分析、編譯的程序代碼語句進行正確的、無二義的、符合編程者原目的的語法分析（在自下而上的語法分析中就是「歸約」）。算法

　　那麼知道了爲何要構造規範項目集族、構造肯定的DFA、構造LR分析表後，對於上述產生式所表明的比較簡單的算術表達式語句分析，我在本篇博客中就不使用傳統的「構造規範項目集族、構造肯定的DFA、構造LR分析表」這樣一個套路來作自下而上語法分析了。編程

　　在本篇博客中，我使用比較簡單易理解的模式匹配算法，結合面向對象程序設計思想中的「策略」設計模式來完成編程。設計模式

/*
this code was first initiated by TZ,COI,HZAU
contact email:xmb028@163.com
personal website:wnm1503303791.github.io
personal blogs:www.cnblogs.com/acm-icpcer/
this code has been posted on my personal blog,checking url:www.cnblogs.com/acm-icpcer/p/9107838.html
Copyright 2018/5/29 TZ.
All Rights Reserved.
*/

#include<cstdio>  
#include<cstring>  
#include<algorithm>  
#include<iostream>  
#include<string>  
#include<vector>  
#include<stack>  
#include<bitset>  
#include<cstdlib>  
#include<cmath>  
#include<set>  
#include<list>  
#include<deque>  
#include<map>  
#include<queue>  
#include<fstream>
using namespace std;

//預處理函數 
bool preproccess(char *a,char *b)
{
    int i1=0,i2=1;
    memset(b,1024,'\0');
    while(a[i2]!=',')
    {
        b[i1]=a[i2];
           ++i1,++i2;
    }
    b[i1]='\0';
    //cout<<b<<endl;
    return true;
}

fstream f2("stack.txt", ios::out);//打開文件，供寫 
static int mcount=1;//存儲打印次數 
//當移進或者歸約時打印棧內狀況，以供分析 
bool outf(int head,char data[1024][1024],fstream &f)
{
    f<<"times("<<mcount<<"),";
    f<<"head is:"<<head<<endl;
    for(int i=head;i>=0;i--)
    {
        f<<data[i]<<endl;
    }
    mcount++;
    f<<endl;
}

//「策略」設計模式，面向對象方法 
class presentation
{
private:
    char data[1024][1024];//棧 
    fstream *infile;//詞法分析表 
    int head;//棧頂指針 
public:
    //first initiated the object
    presentation(fstream *in_f)
    {
        this->infile=in_f;
        memset(data,sizeof(data),'\0');
        head=-1;
    }
    bool push()
    {
        head++;
        
        infile->getline(data[head],1024);
        char t[1024];//存放字符標誌 
        preproccess(data[head],t);
        cout<<data[head]<<","<<t<<endl;
        
        memset(data[head],1024,'\0');
        strcpy(data[head],t);
    }
    
    /*
    S->X(AX)*|AX(AX)*
    X->Y(MY)*
    Y->I|N|(S)
    A->+|-
    M->*|/
    C->=|#|<|<=|>|>= 
    */
    //歸約函數 
    bool reduce()
    {
        //S->X(AX)*|AX(AX)*
        if(    head>=4&&
            (!strcmp(data[head],"X"))&&
              (!strcmp(data[head-1],"plus")||!strcmp(data[head-1],"minus"))&&
              (!strcmp(data[head-2],"X"))&&
              (!strcmp(data[head-3],"plus")||!strcmp(data[head-3],"minus"))&&
              (!strcmp(data[head-4],"X"))
          )
        {
            memset(data[head],1024,'\0');
            memset(data[head-1],1024,'\0');
            memset(data[head-2],1024,'\0');
            memset(data[head-3],1024,'\0');
            memset(data[head-4],1024,'\0');
            head=head-5+1;
            
            strcpy(data[head],"S");
            return true;
        }
        
        if(    head>=2&&
            (!strcmp(data[head],"X"))&&
              (!strcmp(data[head-1],"plus")||!strcmp(data[head-1],"minus"))&&
              (!strcmp(data[head-2],"X"))
          )
        {
            memset(data[head],1024,'\0');
            memset(data[head-1],1024,'\0');
            memset(data[head-2],1024,'\0');
            head=head-3+1;
            
            strcpy(data[head],"S");
            return true;
        }
        
        if(    head>=2&&
            (!strcmp(data[head],"plus")||!strcmp(data[head],"minus"))&&
              (!strcmp(data[head-1],"X"))&&
              (!strcmp(data[head-2],"plus")||!strcmp(data[head-2],"minus"))&&
              (!strcmp(data[head-3],"X"))
          )
        {
            memset(data[head],1024,'\0');
            memset(data[head-1],1024,'\0');
            memset(data[head-2],1024,'\0');
            memset(data[head-3],1024,'\0');
            head=head-3+1;
            
            strcpy(data[head],"S");
            return true;
        }
        
        if(    head>=1&&
            (!strcmp(data[head],"plus")||!strcmp(data[head],"minus"))&&
            (!strcmp(data[head-1],"X"))
          )
        {
            memset(data[head],1024,'\0');
            memset(data[head-1],1024,'\0');
            head=head-2+1;
            
            strcpy(data[head],"S");
            return true;
        }
        
        //X->Y(MY)*
        if(    head>=4&&
            (!strcmp(data[head],"Y"))&&
              (!strcmp(data[head-1],"times")||!strcmp(data[head-1],"slash"))&&
              (!strcmp(data[head-2],"Y"))&&
              (!strcmp(data[head-3],"times")||!strcmp(data[head-3],"slash"))&&
              (!strcmp(data[head-4],"Y"))
          )
        {
            memset(data[head],1024,'\0');
            memset(data[head-1],1024,'\0');
            head=head-5+1;
            
            strcpy(data[head],"X");
            return true;
        }
        
        if(    head>=2&&
            (!strcmp(data[head],"Y"))&&
              (!strcmp(data[head-1],"times")||!strcmp(data[head-1],"slash"))&&
              (!strcmp(data[head-2],"Y"))
          )
        {
            memset(data[head],1024,'\0');
            memset(data[head-1],1024,'\0');
            memset(data[head-2],1024,'\0');
            head=head-3+1;
            
            strcpy(data[head],"X");
            return true;
        }
        
        if(    head>=0&&(!strcmp(data[head],"Y"))
          )
        {
            memset(data[head],1024,'\0');
            head=head-1+1;
            
            strcpy(data[head],"X");
            return true;
        }
        
        //Y->I|N|(S)
        if(    head>=0&&(!strcmp(data[head],"ident"))
          )
        {
            memset(data[head],1024,'\0');
            head=head-1+1;
            
            strcpy(data[head],"Y");
            return true;
        }
        
        if(    head>=0&&(!strcmp(data[head],"number"))
          )
        {
            memset(data[head],1024,'\0');
            head=head-1+1;
            
            strcpy(data[head],"Y");
            return true;
        }
        
        if(    head>=2&&
            (!strcmp(data[head],"rparen"))&&
            (!strcmp(data[head-1],"S"))&&
            (!strcmp(data[head-2],"lparen"))
          )
        {
            memset(data[head],1024,'\0');
            memset(data[head-1],1024,'\0');
            memset(data[head-2],1024,'\0');
            head=head-3+1;
            
            strcpy(data[head],"Y");
            return true;
        }
        
        return false;
    }
    //遍歷棧 
    bool visit_data()
    {
        cout<<"current stack:"<<endl;
        for(int i=head;i>=0;i--)
        
        {
            cout<<data[i]<<endl;
        }
    }
    //主控函數 
    bool mainf()
    {
        while(!infile->eof())
        {
            push();
            bool t=reduce();
            outf(head,data,f2); 
            //每當移進結束時就檢查一下是否有可規約串 
            while(t)//防止規約嵌套 
            {
                t=reduce();
                outf(head,data,f2); 
            }
            //visit_data();
        }
        
        visit_data();
        
        bool flag=false;
        for(int i=head;i>=0;i--)
        {
            if(!strcmp(data[i],"S"))
            {
                flag=true;
            }
            if(    strcmp(data[i],"S")&&
                strcmp(data[i],"X")&&
                strcmp(data[i],"A")&&
                strcmp(data[i],"Y")&&
                strcmp(data[i],"M")&&
                strcmp(data[i],"C")
              )
            {
                return false;
            }
        }
        
        return flag;
        
        /*
        while(head>0)
        {
            bool t=reduce();
            //每當移進結束時就檢查一下是否有可規約串 
            while(t)//防止規約嵌套 
            {
                t=reduce();
            }
            //visit_data();
            outf(head,data,f2); 
        }
        */
    }
};

int main()
{
    fstream f1;
    f1.open("lexical.txt", ios::in);//打開詞法分析表，供讀
    
    presentation* s1=new presentation(&f1);
    bool result=s1->mainf();
    
    if(result)
        cout<<"ACCEPTED!"<<endl;
    else
        cout<<"ERROR!"<<endl;
    
    f1.close();
    f2.close();
    return 0;
}

　　固然了，對於不喜歡看面向對象設計模式的同窗，我還寫了另一個面向過程的代碼，這個更易理解。並且我在程序中已經將一張特定的詞法分析表給出。ide

/* 
this code was first initiated by TZ,COI,HZAU
contact email:xmb028@163.com 
personal website:wnm1503303791.github.io 
personal blogs:www.cnblogs.com/acm-icpcer/ 
this code has been posted on my personal blog,checking url:www.cnblogs.com/acm-icpcer/p/9107838.html 
Copyright 2018/5/29 TZ. 
All Rights Reserved. 
*/
#include<cstdio>  
#include<cstring>  
#include<algorithm>  
#include<iostream>  
#include<string>  
#include<vector>  
#include<stack>  
#include<bitset>  
#include<cstdlib>  
#include<cmath>  
#include<set>  
#include<list>  
#include<deque>  
#include<map>  
#include<queue>  
#include<fstream>
using namespace std;

char mdata[1024][1024]={

"ident",
"times",
"rparen",
"number",
"plus",
"ident",
"lparen"

/*
"lparen",
"ident",
"plus",
"number",
"rparen",
"times",
"ident"
*/
};

char data[1024][1024];

int head=0;

bool reduce()
    {
        //S->X(AX)*|AX(AX)*
        if(    (!strcmp(data[head],"X"))&&
              (!strcmp(data[head-1],"plus")||!strcmp(data[head-1],"minus"))&&
              (!strcmp(data[head-2],"X"))
          )
        {
            memset(data[head],1024,'\0');
            memset(data[head-1],1024,'\0');
            memset(data[head-2],1024,'\0');
            head=head-3+1;
            
            strcpy(data[head],"S");
            head++;
            return true;
        }
        
        if(    (!strcmp(data[head],"X"))&&
              (!strcmp(data[head-1],"plus")||!strcmp(data[head-1],"minus"))&&
              (!strcmp(data[head-2],"X"))&&
              (!strcmp(data[head-1],"plus")||!strcmp(data[head-1],"minus"))&&
              (!strcmp(data[head-4],"X"))
          )
        {
            memset(data[head],1024,'\0');
            memset(data[head-1],1024,'\0');
            memset(data[head-2],1024,'\0');
            memset(data[head-3],1024,'\0');
            memset(data[head-4],1024,'\0');
            head=head-5+1;
            
            strcpy(data[head],"S");
            head++;
            return true;
        }
        
        if(    (!strcmp(data[head],"X"))&&
              (!strcmp(data[head-1],"plus")||!strcmp(data[head-1],"minus"))
          )
        {
            memset(data[head],1024,'\0');
            memset(data[head-1],1024,'\0');
            head=head-2+1;
            
            strcpy(data[head],"S");
            head++;
            return true;
        }
        
        if(    (!strcmp(data[head],"X"))&&
              (!strcmp(data[head-1],"plus")||!strcmp(data[head-1],"minus"))&&
              (!strcmp(data[head],"X"))&&
              (!strcmp(data[head-1],"plus")||!strcmp(data[head-1],"minus"))
          )
        {
            memset(data[head],1024,'\0');
            memset(data[head-1],1024,'\0');
            memset(data[head-2],1024,'\0');
            memset(data[head-3],1024,'\0');
            head=head-4+1;
            
            strcpy(data[head],"S");
            head++;
            return true;
        }
        
        //X->Y(MY)*
        if(    (!strcmp(data[head],"Y"))&&
              (!strcmp(data[head-1],"times")||!strcmp(data[head-1],"slash"))&&
              (!strcmp(data[head],"Y"))
          )
        {
            memset(data[head],1024,'\0');
            memset(data[head-1],1024,'\0');
            memset(data[head-2],1024,'\0');
            head=head-3+1;
            
            strcpy(data[head],"X");
            head++;
            return true;
        }
        
        if(    (!strcmp(data[head],"Y"))&&
              (!strcmp(data[head-1],"times")||!strcmp(data[head-1],"slash"))&&
              (!strcmp(data[head-2],"Y"))&&
              (!strcmp(data[head-1],"times")||!strcmp(data[head-1],"slash"))&&
              (!strcmp(data[head-4],"Y"))
          )
        {
            memset(data[head],1024,'\0');
            memset(data[head-1],1024,'\0');
            head=head-5+1;
            
            strcpy(data[head],"X");
            head++;
            return true;
        }
        
        //Y->I|N|(S)
        if(    (!strcmp(data[head],"ident"))
          )
        {
            memset(data[head],1024,'\0');
            head=head-1+1;
            
            strcpy(data[head],"Y");
            head++;
            return true;
        }
        
        if(    (!strcmp(data[head],"number"))
          )
        {
            memset(data[head],1024,'\0');
            head=head-1+1;
            
            strcpy(data[head],"Y");
            head++;
            return true;
        }
        
        if(    (!strcmp(data[head],"rparen"))&&
            (!strcmp(data[head-1],"S"))&&
            (!strcmp(data[head-2],"lparen"))
          )
        {
            memset(data[head],1024,'\0');
            memset(data[head-1],1024,'\0');
            memset(data[head-2],1024,'\0');
            head=head-3+1;
            
            strcpy(data[head],"Y");
            head++;
            return true;
        }
        
        return false;
    }


bool visit_data()
    {
        cout<<"current stack:"<<endl;
        for(int i=head;i>=0;i--)
        
        {
            cout<<data[i]<<endl;
        }
    }

int main()
{
    
    int i=0;
    while(i<=6)
        {
            strcpy(data[head],mdata[i]);
            head++;
            bool t=reduce();
            //每當移進結束時就檢查一下是否有可規約串 
            while(t)//防止規約嵌套 
            {
                t=reduce();
            }
            visit_data();
            i++;
        }
        visit_data();
        
        /*
    int i=0;
    while(i<=6)
    {
        strcpy(data[head],mdata[i]);
        head++;
        i++;
    }
    visit_data();
    */
}