一个简单编译器前端的实现

时间：2014-12-18 22:14:08 阅读：352 评论：0 收藏：0 [点我收藏+]

标签：des style blog http ar io color os sp

小记：

　　其实这个程序是编译原理这门课的综合实验，前段时间我申请免试又失败了，原因是有缺课，平时分不够，早上赖床现在尝到苦果我也是醉了……没办法，逼上梁山，只好攻克这个大boss以拿下免试资格。

　　选了一个最简单的文法，分析了1个多星期，终于决定开始要写的时候时间已经很紧了。

　　去实验室通宵了一晚，在宿舍熬了一晚，睡了3个小时就起来去验收了。还好是通过了，没白费劲。

　　不得不说，编译原理就是烧脑，知识点都比较抽象，如果数据结构和算法的基础打得不牢的话，实现起来会感到吃力。

　　再次感觉到了基础的重要性，这也是一个收获吧。

　　总的来说，相比较之前实现形式化的des加密算法，抽象的编译器更有难度，写完也更能让你感觉到提高。

　　ps：程序仅供参考，时间有限，没做太多测试。

功能：

　　给定一个简单语言的文法描述，本程序是该语言的编译器前端。

　　输入一个符合该文法规则的源文件，输出三地址形式的中间代码。

　　具体功能有词法分析，语法分析，分析流程显示，错误提示等。

程序运行结果：

　　源程序：

　　 bubuko.com,布布扣

　　分析过程：

　　 bubuko.com,布布扣

　　生成中间代码（三地址形式）：

　　 bubuko.com,布布扣

分析过程：

　　文法：

　　 bubuko.com,布布扣

  文法（消除左递归后）
Program->BEGIN Stmt-List END
Stmt-List->Stmt Stmt-List‘
Stmt-List‘->Stmt Stmt-List‘ | .
Stmt->Assign-stmt
Assign-stmt->ID = Expr
Expr->Term Expr‘
Expr‘->Add-Op Term Expr‘ | .
Term->Factor Term‘
Term‘->Multiple-Op Factor Term‘ | .
Factor->( Expr ) | ID | NUM
Add-Op->+ | -
Multiple-Op->* | /

　FIRST集
First(Program)={BEGIN} 
First(Stmt-List)={ID}
First(Stmt-List‘)={ ID,ε}
First(Stmt)={ ID}
First(Assign-stmt)={ ID}
First(Expr)={(,ID,NUM}
First(Expr‘)={+,-,ε}
First(Term)={(,ID,NUM}
First(Term‘)={*,/,ε}
First(Factor)={(,ID,NUM}
First(Add-Op)={+,-}
First(Multiple-Op)={*,/}

　FOLLOW集
Follow(Program)={$} 
Follow(Stmt-List)={END}
Follow(Stmt-List‘)={ END}
Follow(Stmt)={ ID,END}
Follow(Assign-stmt)={ ID,END }
Follow(Expr)={ ID,END ,)}
Follow(Expr‘)={ ID,END , )}
Follow(Term)={+,-, ID,END , )}
Follow(Term‘)={ +,-, ID,END , )}
Follow(Factor)={ *,/,+,-, ID,END , )}
Follow(Add-Op)={(,ID,NUM}
Follow(Multiple-Op)={ (,ID,NUM }

　　预测分析表：

bubuko.com,布布扣

代码：

   1 //词法分析中‘ ‘代表空白字符，包括换行符，空格，制表符
   2 //源程序格式：1、一行只能有一条语句；2、程序中可以有注释
   3 #include <iostream>
   4 #include <string.h>
   5 #include <stdio.h>
   6 #include <stack>
   7 #include <stdlib.h>
   8 using namespace std;
   9 
  10 char src[1000010];    //存储源代码
  11 char TokenList_kind[1010][100];    //词法单元流 之 类别
  12 int TokenList_value[1010];    //词法单元流 之 值
  13 char WordList[1010][100];    //符号表 -- 由token_value指向
  14 int LineList[1010];        //记录每一个token对应的行数 -- 由token_value指向
  15 int TokenListPoint;        //词法单元流指针
  16 int WordListPoint;        //符号表指针
  17 
  18 int WordsPoint;        //语法分析时的词法单元流指针
  19 
  20 int tmpcnt;        //三地址语句中寄存器的编号
  21 
  22 int ExpTable[11][8] = {    //用 表驱动法 读取表达式 ，状态转换表
  23 2,    -1,    1,    -1,    -1,    -1,    -1,    9,
  24 2,    -1,    -1,    -1,    -1,    -1,    -1,    9,
  25 2,    -1,    -1,    3,    -1,    -1,    -1,    9,
  26 4,    5,    -1,    -1,    -1,    -1,    -1,    9,
  27 4,    -1,    8,    -1,    6,    -1,    10,    9,
  28 -1,    5,    8,    -1,    6,    -1,    10,    9,
  29 4,    5,    -1,    -1,    -1,    7,    -1,    9,
  30 4,    5,    -1,    -1,    -1,    -1,    -1,    9,
  31 -1,    -1,    -1,    -1,    -1,    -1,    -1,    -1,
  32 -1,    -1,    -1,    -1,    -1,    -1,    -1,    -1,
  33 -1,    -1,    8,    -1,    6,    -1,    10,    9
  34 };
  35 
  36 /*
  37   文法
  38 1    Program->BEGIN Stmt-List END
  39 2    Stmt-List->Stmt Stmt-List‘
  40 3    Stmt-List‘->Stmt Stmt-List‘ | .
  41 4    Stmt->Assign-stmt
  42 5    Assign-stmt->ID = Expr
  43 6    Expr->Term Expr‘
  44 7    Expr‘->Add-Op Term Expr‘ | .
  45 8    Term->Factor Term‘
  46 9    Term‘->Multiple-Op Factor Term‘ | .
  47 10    Factor->( Expr ) | ID | NUM
  48 11    Add-Op->+ | -
  49 12    Multiple-Op->* | /
  50 
  51   预测分析表
  52 非终结符号    输入符号
  53             BEGIN    ‘+’    ‘-’    ‘*’    ‘/’    (    )    ‘=’    ID    NUM    END    $
  54 Program        1
  55 Stmt-List                                                            2
  56 Stmt-List‘                                                            3        .
  57 Stmt                                                                4
  58 Assign-stmt                                                            5
  59 Expr                                                6                6    6
  60 Expr‘                7        7                            .            .        .
  61 Term                                                8                8    8
  62 Term‘                .        .        9        9            .            .        .
  63 Factor                                                10_1            10_210_3
  64 Add-Op                11_1    11_2
  65 Multiple-Op                            12_1    12_2
  66 */
  67 
  68 char PreTab[12][12][100]={    //存储预测分析表
  69     {"BEGIN Stmt-List END","","","","","","","","","","",""},
  70     {"","","","","","","","","Stmt Stmt-List‘","","",""},
  71     {"","","","","","","","","Stmt Stmt-List‘","",".",""},
  72     {"","","","","","","","","Assign-stmt","","",""},
  73     {"","","","","","","","","ID = Expr","","",""},
  74     {"","","","","","Term Expr‘","","","Term Expr‘","Term Expr‘","",""},
  75     {"","Add-Op Term Expr‘","Add-Op Term Expr‘","","","",".","",".","",".",""},
  76     {"","","","","","Factor Term‘","","","Factor Term‘","Factor Term‘","",""},
  77     {"",".",".","Multiple-Op Factor Term‘","Multiple-Op Factor Term‘","",".","",".","",".",""},
  78     {"","","","","","( Expr )","","","ID","NUM","",""},
  79     {"","+","-","","","","","","","","",""},
  80     {"","","","*","/","","","","","","",""}
  81 };
  82 
  83 char Product[23][50]={    //记录产生式出现的字符串
  84     "Program",      //0
  85     "Stmt-List",    //1
  86     "Stmt-List‘",   //2
  87     "Stmt",         //3
  88     "Assign-stmt",  //4
  89     "Expr",         //5
  90     "Expr‘",        //6
  91     "Term",         //7
  92     "Term‘",        //8
  93     "Factor",       //9
  94     "Add-Op",       //10
  95     "Multiple-Op",  //11
  96 
  97     "BEGIN",        //12
  98     "+",            //13
  99     "-",            //14
 100     "*",            //15
 101     "/",            //16
 102     "(",            //17
 103     ")",            //18
 104     "=",            //19
 105     "ID",           //20
 106     "NUM",          //21
 107     "END"           //22
 108 };
 109 
 110 
 111 //语法树节点
 112 struct Node{
 113     Node(char na[]){    //构造函数
 114         strcpy(name,na);
 115         wp = 0;
 116         brother = NULL;
 117         next = NULL;
 118     }
 119     char name[50];
 120     int wp;    //终结符在token流中的位置
 121     Node* brother;  //指向下一个兄弟节点
 122     Node* next;     //指向当前节点的孩子节点
 123 }*root,*now,*p;
 124 
 125 void Init()
 126 {
 127     memset(src,0,sizeof(src));    //初始化源代码字符数组
 128     memset(TokenList_kind,0,sizeof(TokenList_kind));    //初始化词法单元流
 129     memset(TokenList_value,0,sizeof(TokenList_value));    //初始化词法单元流
 130     memset(WordList,0,sizeof(WordList));    //初始化符号表
 131     TokenListPoint = 1;
 132     WordListPoint = 1;
 133 
 134     WordsPoint = 1;
 135 
 136     //初始化指针
 137     root = NULL;
 138     now = NULL;
 139     p = NULL;
 140     tmpcnt = 1;
 141 }
 142 
 143 //--------------- 词法分析 start ---------------
 144 void AddToken(char kind[],char value[],int line)    //将<类别，值>放入token流中
 145 {
 146     strcpy(TokenList_kind[TokenListPoint],kind);
 147     TokenList_value[TokenListPoint++] = WordListPoint;
 148 
 149     strcpy(WordList[WordListPoint],value);
 150     LineList[WordListPoint++] = line;
 151 }
 152 
 153 void strsub(char str1[],int start,int len,char str2[])    //截取str1的子字符串给str2
 154 {
 155     int i=0,j;
 156     for(j=0;j<len;j++){
 157         str2[i++] = str1[start+j];
 158     }
 159     str2[i] = ‘\0‘;
 160 }
 161 
 162 void InputString()    //从文件中读取源代码，同时过滤注释，一行只能放置一条语句
 163 {
 164     //文件读入
 165     FILE* fs = fopen(".\\src.txt","rb");    //源代码
 166     if(fs==NULL){
 167         printf("源代码文件 src.txt 不存在\n");
 168         return ;
 169     }
 170 
 171     char s[10010]={0};
 172     //fscanf(fs,"%s",src);
 173     while(fgets(s,10010,fs)){
 174         if(sscanf(s,"%s",s)==-1){    //没有读到字符串，将s清空
 175             s[0]=‘\0‘;
 176             continue;
 177         }
 178         //过滤注释
 179         if(s[0]==‘/‘ && s[1]==‘/‘)
 180             continue;
 181         int i;
 182         for(i=0;s[i];i++)
 183             if(s[i]==‘/‘ && s[i+1]==‘/‘)
 184                 break;
 185         s[i]=‘\0‘;
 186 
 187         strcat(src,s);    //连接到源代码字符串后
 188         strcat(src," ");    //连接一个空白符
 189     }
 190 
 191     int len = strlen(src);
 192     len--;
 193     src[len] = ‘\0‘;
 194 }
 195 
 196 bool getBEGIN(int &i,int &line)    //读取“BEGIN”
 197 {
 198     char tmp[6];
 199     strsub(src,i,5,tmp);
 200     if(strcmp(tmp,"BEGIN")==0){
 201         i = i+5;
 202         AddToken("BEGIN","BEGIN",line);    //添加token,<类别，值>
 203         return true;
 204     }
 205     else
 206         return false;
 207 }
 208 bool getBLANK(int &i)    //读取“空白符”==>‘ ‘
 209 {
 210     if(src[i]==‘ ‘){
 211         i++;
 212         return true;
 213     }
 214     else
 215         return false;
 216 }
 217 bool getEND(int &i,int &line)        //读取“END”
 218 {
 219     char tmp[4];
 220     strsub(src,i,3,tmp);
 221     if(strcmp(tmp,"END")==0){
 222         i = i+3;
 223         AddToken("END","END",line);    //添加token,<类别，值>
 224         return true;
 225     }
 226     else
 227         return false;
 228 }
 229 bool getExp(int &i,int &line)        //读取表达式，遇到‘ ‘结束
 230 {
 231     int status=0;
 232     char tmp[10010]={0};
 233     char t[2]={0};
 234     int j=0;
 235     stack <char> ss;
 236     while(status!=8){
 237         if(status==-1)    //跳到错误的状态号，返回false
 238             return false;
 239         if(status==9)    //跳到了错误状态，返回false
 240             return false;
 241 
 242         //根据src[i]确定下一个状态，直到抵达结束状态 8
 243         if( (‘a‘<=src[i] && src[i]<=‘z‘) || (‘A‘<=src[i] && src[i]<=‘Z‘) ){
 244             //读取到字母
 245             status = ExpTable[status][0];
 246             tmp[j++] = src[i++];
 247         }
 248         else if(‘0‘<=src[i] && src[i]<=‘9‘){
 249             //读取到数字
 250             status = ExpTable[status][1];
 251             tmp[j++] = src[i++];
 252         }
 253         else{
 254             switch(src[i]){
 255             case ‘ ‘:    //空白符
 256                 status = ExpTable[status][2];
 257                 if(tmp[0]!=‘\0‘){
 258                     //如果是后来读到空白符，说明表达式该结束了
 259                     //将读取的最后一个单词放入token
 260                     if(j!=0){    //说明之前是一个单词或数字，否则是一个），）已经将最后一个单词放入token流中了，所以不需要处理它
 261                         if( (‘a‘<=tmp[0] && tmp[0]<=‘z‘) || (‘A‘<=tmp[0] && tmp[0]<=‘Z‘) )
 262                             AddToken("ID",tmp,line);    //将这个字符代表的单词放入token流中
 263                         else if( ‘0‘<=tmp[0] && tmp[0]<=‘9‘ )
 264                             AddToken("NUM",tmp,line);    //将这个字符代表的单词放入token流中
 265                         j=0;
 266                     }
 267                 }
 268                 line++;
 269                 i++;
 270                 break;
 271             case ‘=‘:    // =
 272                 status = ExpTable[status][3];
 273 
 274                 tmp[j] = ‘\0‘;
 275                 AddToken("ID",tmp,line);    //=前面一定是标识符，放入token流
 276                 j=0;
 277                 AddToken("=","=",line);    //将当前的=也放入token流
 278 
 279                 i++;
 280                 break;
 281             case ‘+‘:    //OP
 282             case ‘-‘:
 283             case ‘*‘:
 284             case ‘/‘:
 285                 status = ExpTable[status][4];
 286 
 287                 tmp[j] = ‘\0‘;
 288                 if( (‘a‘<=tmp[0] && tmp[0]<=‘z‘) || (‘A‘<=tmp[0] && tmp[0]<=‘Z‘) )    //如果运算符前是字母，则说明是标识符
 289                     AddToken("ID",tmp,line);
 290                 else if( ‘0‘<=tmp[0] && tmp[0]<=‘9‘ )
 291                     AddToken("NUM",tmp,line);
 292                 j=0;
 293                 t[0] = src[i];
 294                 t[1] = ‘\0‘;
 295                 AddToken(t,t,line);    //将运算符放入token流
 296 
 297                 i++;
 298                 break;
 299             case ‘(‘:    // (
 300                 status = ExpTable[status][5];
 301 
 302                 AddToken("(","(",line);    //将（放入token流
 303                 ss.push(‘(‘);
 304 
 305                 i++;
 306                 break;
 307             case ‘)‘:    // )
 308                 status = ExpTable[status][6];
 309 
 310                 tmp[j] = ‘\0‘;
 311                 if( (‘a‘<=tmp[0] && tmp[0]<=‘z‘) || (‘A‘<=tmp[0] && tmp[0]<=‘Z‘) )    //如果)前是字母，则说明是标识符，即ID
 312                     AddToken("ID",tmp,line);
 313                 else if( ‘0‘<=tmp[0] && tmp[0]<=‘9‘ )
 314                     AddToken("NUM",tmp,line);
 315                 j=0;
 316 
 317                 AddToken(")",")",line);    //将（放入token流
 318 
 319                 if(ss.empty())    //括号不匹配
 320                     return false;
 321                 else
 322                     ss.pop();
 323 
 324                 i++;
 325                 break;
 326             default:    //其它
 327                 status = ExpTable[status][7];
 328                 i++;
 329                 break;
 330             }
 331         }
 332     }
 333 
 334     if(status==8){    //正确跳出
 335         /*
 336         if(ss.empty())    //括号匹配
 337             return true;
 338         else
 339             return false;
 340         */
 341         return true;
 342     }
 343     else
 344         return false;
 345 }
 346 
 347 bool Lex()    //进行词法分析
 348 {
 349     printf("词法分析......\n");
 350     int i=0;
 351     int line = 1;
 352     InputString();
 353     try{
 354         getBEGIN(i,line)?1:throw 1;
 355         getBLANK(i)?line++:throw 2;
 356         while(1){    //读取表达式
 357             if(src[i]==‘\0‘)    //还没检测到END，程序就结束了
 358                 throw 3;
 359             char tmp[4];
 360             strsub(src,i,3,tmp);    //截取字符串
 361             if(strcmp(tmp,"END")==0){
 362                 //如果检测到END，跳出循环
 363                 break;
 364             }
 365 
 366             //读取表达式
 367             getExp(i,line)?1:throw 4;
 368         }
 369         getEND(i,line)?1:throw 3;
 370         if(src[i]!=‘\0‘)    //如果END结束标志之后还有输入符号，说明出错误了
 371             throw 5;
 372     }
 373     catch(int err){
 374         printf("【词法错误】\n");
 375         //计算行号
 376         int errline = 1;
 377         int j;
 378         for(j=0;j<=i;j++)
 379             if(src[j]==‘ ‘)
 380                 errline++;
 381 
 382         switch(err){
 383         case 1:
 384             printf("ERROR 1 (第%d行) : 没有读取到开始标识 BEGIN!\n",errline);
 385             printf("具体位置：\t%s%s\n",WordList[WordListPoint-2],WordList[WordListPoint-1]);
 386             printf("\t\t");
 387             for(j=0;j<strlen(WordList[WordListPoint-2]);j++)
 388                 printf(" ");
 389             printf("^\n");
 390             printf("\n");
 391             return false;
 392         case 2:
 393             printf("ERROR 2 (第%d行) : 没有读取到空白符!\n",errline);
 394             printf("具体位置：\t%s%s\n",WordList[WordListPoint-2],WordList[WordListPoint-1]);
 395             printf("\t\t");
 396             for(j=0;j<strlen(WordList[WordListPoint-2]);j++)
 397                 printf(" ");
 398             printf("^\n");
 399             printf("\n");
 400             return false;
 401         case 3:
 402             printf("ERROR 3 (第%d行) : 没有读取到结束标识 END!\n",errline);
 403             printf("具体位置：\t%s%s\n",WordList[WordListPoint-2],WordList[WordListPoint-1]);
 404             printf("\t\t");
 405             for(j=0;j<strlen(WordList[WordListPoint-2]);j++)
 406                 printf(" ");
 407             printf("^\n");
 408             printf("\n");
 409             printf("\n");
 410             return false;
 411         case 4:
 412             printf("ERROR 4 (第%d行) : 表达式错误。例如：标识符有误!\n",errline);
 413             printf("具体位置：\t%s%s\n",WordList[WordListPoint-2],WordList[WordListPoint-1]);
 414             printf("\t\t");
 415             for(j=0;j<strlen(WordList[WordListPoint-2]);j++)
 416                 printf(" ");
 417             printf("^\n");
 418             printf("\n");
 419             return false;
 420         case 5:
 421             printf("ERROR 5 (第%d行) : BEGIN...END 代码段格式错误!\n",errline);
 422             printf("具体位置：\t%s%s\n",WordList[WordListPoint-2],WordList[WordListPoint-1]);
 423             printf("\t\t");
 424             for(j=0;j<strlen(WordList[WordListPoint-2]);j++)
 425                 printf(" ");
 426             printf("^\n");
 427             printf("\n");
 428             return false;
 429         default:break;
 430         }
 431     }
 432     printf("没有词法错误\n");
 433     printf("\n");
 434     return true;
 435 }
 436 
 437 void printTokenList()    //输出token流
 438 {
 439     int i=1;
 440     printf("单词流：\n");
 441     printf("<\t类别\t,值\t>\n");
 442     for(;i<TokenListPoint;i++){
 443         printf("<\t%s\t,%s\t>\n",TokenList_kind[i],WordList[TokenList_value[i]]);
 444     }
 445     printf("\n");
 446 }
 447 //--------------- 词法分析 end ---------------
 448 
 449 
 450 //--------------- 语法分析 start ---------------
 451 //生成语法分析树
 452 
 453 void OutStack(stack <char*> z)        //输出栈中内容的前两个
 454 {
 455     char t[200]={0};
 456     int j=0;
 457     while(!z.empty()){
 458         if(j==2)
 459             break;
 460         if(j==1)
 461             strcat(t,",");
 462         strcat(t,z.top());
 463         z.pop();
 464         j++;
 465     }
 466     strcat(t,"$");
 467     printf("%23s",t);
 468 }
 469 
 470 void OutRightStr()    //输出当前token流中前两个
 471 {
 472     char t[200]={0};
 473     int i,j=0;
 474     for(i = WordsPoint;i<WordListPoint;i++){
 475         if(j==2)
 476             break;
 477         char tt[200]={0};
 478         sprintf(tt,"<%s,%s>",TokenList_kind[i],WordList[TokenList_value[i]]);
 479         strcat(t,tt);
 480         j++;
 481     }
 482     printf("%23s$",t);
 483 }
 484 
 485 void OutAction(char action[])    //输出动作
 486 {
 487     printf(" %s\n",action);
 488 }
 489 
 490 bool seltab(int row,char f[])
 491 {
 492     if(strcmp(TokenList_kind[WordsPoint],"BEGIN")==0){
 493         strcpy(f,PreTab[row][0]);
 494     }
 495     else if(strcmp(TokenList_kind[WordsPoint],"+")==0){
 496         strcpy(f,PreTab[row][1]);
 497     }
 498     else if(strcmp(TokenList_kind[WordsPoint],"-")==0){
 499         strcpy(f,PreTab[row][2]);
 500     }
 501     else if(strcmp(TokenList_kind[WordsPoint],"*")==0){
 502         strcpy(f,PreTab[row][3]);
 503     }
 504     else if(strcmp(TokenList_kind[WordsPoint],"/")==0){
 505         strcpy(f,PreTab[row][4]);
 506     }
 507     else if(strcmp(TokenList_kind[WordsPoint],"(")==0){
 508         strcpy(f,PreTab[row][5]);
 509     }
 510     else if(strcmp(TokenList_kind[WordsPoint],")")==0){
 511         strcpy(f,PreTab[row][6]);
 512     }
 513     else if(strcmp(TokenList_kind[WordsPoint],"=")==0){
 514         strcpy(f,PreTab[row][7]);
 515     }
 516     else if(strcmp(TokenList_kind[WordsPoint],"ID")==0){
 517         strcpy(f,PreTab[row][8]);
 518     }
 519     else if(strcmp(TokenList_kind[WordsPoint],"NUM")==0){
 520         strcpy(f,PreTab[row][9]);
 521     }
 522     else if(strcmp(TokenList_kind[WordsPoint],"END")==0){
 523         strcpy(f,PreTab[row][10]);
 524     }
 525     else if(strcmp(TokenList_kind[WordsPoint],"$")==0){
 526         strcpy(f,PreTab[row][11]);
 527     }
 528     else{
 529         return false;
 530     }
 531     if(f[0]==‘\0‘)    //确定的表位置为空
 532         return false;
 533     return true;
 534 }
 535 
 536 bool Parse()    //语法分析阶段。输入是token流，输出分析结果
 537 {
 538     stack <char*> z;
 539     stack <Node*> nz;
 540     z.push(*Product);
 541 
 542     root = new Node("Program");
 543     now = root;
 544 
 545     printf("词法分析......\n");
 546     printf("【分析流程】\n");
 547     printf("%22s%24s%22s\n","栈","输入","动作");
 548     char action[100]={0};
 549     try{
 550         while(!z.empty() || WordsPoint<WordListPoint){    //栈和输入串中只剩结束符号（栈空 or 指向‘\0‘）时退出循环
 551 
 552             //输出当前分析结果
 553             OutStack(z);        //输出栈中内容
 554             OutRightStr();    //输出剩余输入字符串
 555             OutAction(action);    //输出动作
 556 
 557             memset(action,0,sizeof(action));
 558 
 559             //预处理。例：匹配e。预处理：将e出栈，输入指针后移。
 560 
 561             //还没结束栈就空了
 562             if(z.empty())
 563                 throw 1;    //非法跳出
 564 
 565             if(strcmp(z.top(),TokenList_kind[WordsPoint])==0){    //栈顶元素==指针指向字符，匹配成功
 566                 //语义动作 - 构造语法树
 567                 while(!now->brother){    //回到有下一个兄弟节点的节点为止
 568                     if(nz.empty()){
 569                         if(now==root)    //如果回到了根节点
 570                             goto label;
 571                         else
 572                             throw 1;
 573                     }
 574                     now = nz.top();
 575                     nz.pop();
 576                 }
 577                 now = now->brother;
 578 
 579 label:
 580                 //准备输出字符串 action
 581                 strcat(action,"匹配");
 582                 strcat(action,z.top());
 583 
 584                 //出栈、指针后移
 585                 if(!z.empty())
 586                     z.pop();
 587                 if(WordsPoint<WordListPoint)
 588                     WordsPoint++;
 589             }
 590             else{    //不相等，输出推导
 591                 //确定推导
 592                 char f[200]={0};
 593                 if(strcmp(z.top(),"Program")==0){
 594                     if(!seltab(0,f))    //从表中确定推导串
 595                         throw 2;    //没有找到对应的推导
 596                 }
 597                 else if(strcmp(z.top(),"Stmt-List")==0){
 598                     if(!seltab(1,f))    //从表中确定推导串
 599                         throw 2;    //没有找到对应的推导
 600                 }
 601                 else if(strcmp(z.top(),"Stmt-List‘")==0){
 602                     if(!seltab(2,f))    //从表中确定推导串
 603                         throw 2;    //没有找到对应的推导
 604                 }
 605                 else if(strcmp(z.top(),"Stmt")==0){
 606                     if(!seltab(3,f))    //从表中确定推导串
 607                         throw 2;    //没有找到对应的推导
 608                 }
 609                 else if(strcmp(z.top(),"Assign-stmt")==0){
 610                     if(!seltab(4,f))    //从表中确定推导串
 611                         throw 2;    //没有找到对应的推导
 612                 }
 613                 else if(strcmp(z.top(),"Expr")==0){
 614                     if(!seltab(5,f))    //从表中确定推导串
 615                         throw 2;
 616                 }
 617                 else if(strcmp(z.top(),"Expr‘")==0){
 618                     if(!seltab(6,f))    //从表中确定推导串
 619                         throw 2;
 620                 }
 621                 else if(strcmp(z.top(),"Term")==0){
 622                     if(!seltab(7,f))    //从表中确定推导串
 623                         throw 2;
 624                 }
 625                 else if(strcmp(z.top(),"Term‘")==0){
 626                     if(!seltab(8,f))    //从表中确定推导串
 627                         throw 2;
 628                 }
 629                 else if(strcmp(z.top(),"Factor")==0){
 630                     if(!seltab(9,f))    //从表中确定推导串
 631                         throw 2;
 632                 }
 633                 else if(strcmp(z.top(),"Add-Op")==0){
 634                     if(!seltab(10,f))    //从表中确定推导串
 635                         throw 2;
 636                 }
 637                 else if(strcmp(z.top(),"Multiple-Op")==0){
 638                     if(!seltab(11,f))    //从表中确定推导串
 639                         throw 2;
 640                 }
 641                 else{
 642                     throw 2;
 643                 }
 644                 //准备输出字符串 action
 645                 strcat(action,"输出");
 646                 strcat(action,z.top());
 647                 strcat(action,"->");
 648                 strcat(action,f);
 649 
 650                 //将栈顶字符串记录下来后用
 651                 char proleft[50];
 652                 //将栈顶元素出栈并将推导入栈
 653                 if(!z.empty()){
 654                     strcpy(proleft,z.top());
 655                     z.pop();
 656                 }
 657                 else
 658                     throw 1;
 659 
 660                 char tmp[100];
 661 
 662                 //如果推导出来的是空，即f->.，不作处理
 663                 if(f[0]==‘.‘){
 664                     nz.push(now);
 665 
 666                     now->next = new Node(".");
 667                     now = now->next;    //now指向当前产生式右边的第一个字符串
 668 
 669                     while(!now->brother){    //回到有下一个兄弟节点的节点为止
 670                         if(nz.empty()){
 671                             if(now==root)    //如果回到了根节点
 672                                 goto label;
 673                             else
 674                                 throw 1;
 675                         }
 676                         now = nz.top();
 677                         nz.pop();
 678                     }
 679                     now = now->brother;
 680 
 681                     continue;
 682                 }
 683                 stack <char*> tz;    //临时的栈
 684 
 685                 //正向输入到临时栈中
 686                 while(sscanf(f,"%s",tmp)!=-1){
 687                     //语义动作 - 创建语法树
 688                     if(strcmp(proleft,now->name)==0){
 689                         nz.push(now);    //将当前节点记录下来
 690                         
 691                         now->next = new Node(tmp);
 692                         now = now->next;    //now指向当前产生式右边的第一个字符串
 693                         p = now;
 694                     }
 695                     else{
 696                         p->brother = new Node(tmp);
 697                         p = p->brother;
 698                     }
 699 
 700                     char* pos = strstr(f,tmp);
 701                     strcpy(f,pos+strlen(tmp));
 702                     if(strcmp(tmp,"Program")==0){
 703                         tz.push(*(Product));
 704                     }
 705                     else if(strcmp(tmp,"Stmt-List")==0){
 706                         tz.push(*(Product+1));
 707                     }
 708                     else if(strcmp(tmp,"Stmt-List‘")==0){
 709                         tz.push(*(Product+2));
 710                     }
 711                     else if(strcmp(tmp,"Stmt")==0){
 712                         tz.push(*(Product+3));
 713                     }
 714                     else if(strcmp(tmp,"Assign-stmt")==0){
 715                         tz.push(*(Product+4));
 716                     }
 717                     else if(strcmp(tmp,"Expr")==0){
 718                         tz.push(*(Product+5));
 719                     }
 720                     else if(strcmp(tmp,"Expr‘")==0){
 721                         tz.push(*(Product+6));
 722                     }
 723                     else if(strcmp(tmp,"Term")==0){
 724                         tz.push(*(Product+7));
 725                     }
 726                     else if(strcmp(tmp,"Term‘")==0){
 727                         tz.push(*(Product+8));
 728                     }
 729                     else if(strcmp(tmp,"Factor")==0){
 730                         tz.push(*(Product+9));
 731                     }
 732                     else if(strcmp(tmp,"Add-Op")==0){
 733                         tz.push(*(Product+10));
 734                     }
 735                     else if(strcmp(tmp,"Multiple-Op")==0){
 736                         tz.push(*(Product+11));
 737                     }
 738                     else if(strcmp(tmp,"BEGIN")==0){
 739                         tz.push(*(Product+12));
 740                     }
 741                     else if(strcmp(tmp,"+")==0){
 742                         tz.push(*(Product+13));
 743                     }
 744                     else if(strcmp(tmp,"-")==0){
 745                         tz.push(*(Product+14));
 746                     }
 747                     else if(strcmp(tmp,"*")==0){
 748                         tz.push(*(Product+15));
 749                     }
 750                     else if(strcmp(tmp,"/")==0){
 751                         tz.push(*(Product+16));
 752                     }
 753                     else if(strcmp(tmp,"(")==0){
 754                         tz.push(*(Product+17));
 755                     }
 756                     else if(strcmp(tmp,")")==0){
 757                         tz.push(*(Product+18));
 758                     }
 759                     else if(strcmp(tmp,"=")==0){
 760                         tz.push(*(Product+19));
 761                     }
 762                     else if(strcmp(tmp,"ID")==0){
 763                         tz.push(*(Product+20));
 764                     }
 765                     else if(strcmp(tmp,"NUM")==0){
 766                         tz.push(*(Product+21));
 767                     }
 768                     else if(strcmp(tmp,"END")==0){
 769                         tz.push(*(Product+22));
 770                     }
 771                     else{
 772                         throw 1;
 773                     }
 774 
 775                 }
 776 
 777                 //反向输出到真正的栈中
 778                 while(!tz.empty()){
 779                     if(strcmp(tz.top(),"Program")==0){
 780                         z.push(*(Product));
 781                     }
 782                     else if(strcmp(tz.top(),"Stmt-List")==0){
 783                         z.push(*(Product+1));
 784                     }
 785                     else if(strcmp(tz.top(),"Stmt-List‘")==0){
 786                         z.push(*(Product+2));
 787                     }
 788                     else if(strcmp(tz.top(),"Stmt")==0){
 789                         z.push(*(Product+3));
 790                     }
 791                     else if(strcmp(tz.top(),"Assign-stmt")==0){
 792                         z.push(*(Product+4));
 793                     }
 794                     else if(strcmp(tz.top(),"Expr")==0){
 795                         z.push(*(Product+5));
 796                     }
 797                     else if(strcmp(tz.top(),"Expr‘")==0){
 798                         z.push(*(Product+6));
 799                     }
 800                     else if(strcmp(tz.top(),"Term")==0){
 801                         z.push(*(Product+7));
 802                     }
 803                     else if(strcmp(tz.top(),"Term‘")==0){
 804                         z.push(*(Product+8));
 805                     }
 806                     else if(strcmp(tz.top(),"Factor")==0){
 807                         z.push(*(Product+9));
 808                     }
 809                     else if(strcmp(tz.top(),"Add-Op")==0){
 810                         z.push(*(Product+10));
 811                     }
 812                     else if(strcmp(tz.top(),"Multiple-Op")==0){
 813                         z.push(*(Product+11));
 814                     }
 815                     else if(strcmp(tz.top(),"BEGIN")==0){
 816                         z.push(*(Product+12));
 817                     }
 818                     else if(strcmp(tz.top(),"+")==0){
 819                         z.push(*(Product+13));
 820                     }
 821                     else if(strcmp(tz.top(),"-")==0){
 822                         z.push(*(Product+14));
 823                     }
 824                     else if(strcmp(tz.top(),"*")==0){
 825                         z.push(*(Product+15));
 826                     }
 827                     else if(strcmp(tz.top(),"/")==0){
 828                         z.push(*(Product+16));
 829                     }
 830                     else if(strcmp(tz.top(),"(")==0){
 831                         z.push(*(Product+17));
 832                     }
 833                     else if(strcmp(tz.top(),")")==0){
 834                         z.push(*(Product+18));
 835                     }
 836                     else if(strcmp(tz.top(),"=")==0){
 837                         z.push(*(Product+19));
 838                     }
 839                     else if(strcmp(tz.top(),"ID")==0){
 840                         z.push(*(Product+20));
 841                     }
 842                     else if(strcmp(tz.top(),"NUM")==0){
 843                         z.push(*(Product+21));
 844                     }
 845                     else if(strcmp(tz.top(),"END")==0){
 846                         z.push(*(Product+22));
 847                     }
 848                     else{
 849                         throw 1;
 850                     }
 851                     tz.pop();
 852                 }
 853             }
 854 
 855         }
 856         if(z.empty() && WordsPoint >= WordListPoint){    //正常退出循环
 857             //输出最后一行分析结果
 858             OutStack(z);        //输出栈中内容
 859             OutRightStr();    //输出剩余输入字符串
 860             OutAction(action);    //输出动作
 861         }
 862         else{    //非正常情况
 863             throw 1;
 864         }
 865     }
 866     catch(int err){
 867         printf("\n");
 868         printf("【语法错误】\n");
 869 
 870         switch(err){
 871         case 1:
 872             printf("ERROR 1 (第%d行) : 非法跳出!\n",LineList[WordsPoint]);
 873             printf("\n");
 874             return false;
 875         case 2:
 876             printf("ERROR 2 (第%d行) : 没有找到 %s 对应的推导!\n",LineList[WordsPoint],z.top());
 877             printf("\n");
 878             return false;
 879         default:
 880             break;
 881         }
 882     }
 883     printf("没有语法错误\n");
 884     printf("\n");
 885     return true;
 886 }
 887 //--------------- 语法分析 end ---------------
 888 
 889 //--------------- 生成三地址代码 start ---------------
 890 //对语法分析树进行后序遍历，执行语义规则，一遍扫描之后，树根的code即为三地址代码
 891 
 892 
 893 void setWP(Node* cur)    //设置每一个叶子节点的wp指针
 894 {
 895     if(!cur->next){
 896         if(cur->name[0]==‘.‘)
 897             return ;
 898         cur->wp = WordsPoint++;
 899         return ;
 900     }
 901     Node* p = cur->next;
 902     while(p){
 903         setWP(p);
 904         p = p->brother;
 905     }
 906 }
 907 
 908 char* getNext(Node* cur,FILE* fo)
 909 {
 910     //递归出口
 911     if(!cur->next){
 912         if(cur->name[0]==‘.‘ 
 913             || cur->name[0]==‘(‘ 
 914             || cur->name[0]==‘)‘){    //忽略空. 和 （ 和 ）
 915             char* t = new char[2];
 916             t[0]=‘\0‘;
 917             return t;
 918         }
 919         return WordList[TokenList_value[cur->wp]];
 920     }
 921     if(strcmp(cur->name,"Program")==0){
 922         fprintf(fo,"%s\r\n",getNext(cur->next,fo));
 923         getNext(cur->next->brother,fo);
 924         fprintf(fo,"%s\r\n",getNext(cur->next->brother->brother,fo));
 925         char* tmp = new char[2];
 926         tmp[0] = ‘\0‘;
 927         return tmp;
 928     }
 929     else if(strcmp(cur->name,"Assign-stmt")==0){
 930         char* tmp = new char[2];
 931         tmp[0] = ‘\0‘;
 932         fprintf(fo,"%s=%s\r\n",getNext(cur->next,fo),getNext(cur->next->brother->brother,fo));
 933         return tmp;
 934     }
 935     else if(strcmp(cur->name,"Term")==0){
 936         char* tmp = new char[150];
 937         tmp = getNext(cur->next->brother,fo);
 938         if(tmp[0]==‘*‘ || tmp[0]==‘/‘){
 939             fprintf(fo,"t%d=%s%s\r\n",tmpcnt,getNext(cur->next,fo),tmp);
 940             sprintf(tmp,"t%d",tmpcnt++);
 941             return tmp; 
 942         }
 943     }
 944     else if(strcmp(cur->name,"Expr")==0){
 945         char* tmp = new char[150];
 946         tmp = getNext(cur->next->brother,fo);
 947         if(tmp[0]==‘+‘ || tmp[0]==‘-‘){
 948             fprintf(fo,"t%d=%s%s\r\n",tmpcnt,getNext(cur->next,fo),tmp);
 949             sprintf(tmp,"t%d",tmpcnt++);
 950             return tmp; 
 951         }
 952     }
 953     else if(strcmp(cur->name,"Term‘")==0){
 954         char* tmp = new char[150];
 955         if(cur->next->brother!=NULL){
 956             p = cur->next->brother->brother;
 957             if(p->next->brother!=NULL){
 958                 tmp = getNext(cur->next->brother->brother,fo);
 959                 if(tmp[0]==‘*‘ || tmp[0]==‘/‘){
 960                     if(*getNext(cur->next,fo)==‘/‘){    //如果最前面的是‘-’，后面的运算符应该是反的
 961                         if(tmp[0]==‘/‘)
 962                             tmp[0]=‘*‘;
 963                         else
 964                             tmp[0]=‘/‘;
 965                     }
 966                     fprintf(fo,"t%d=%s%s\r\n",tmpcnt,getNext(cur->next->brother,fo),tmp);
 967                     sprintf(tmp,"%st%d",getNext(cur->next,fo),tmpcnt++);
 968                     return tmp; 
 969                 }
 970             }
 971         }
 972     }
 973     else if(strcmp(cur->name,"Expr‘")==0){
 974         char* tmp = new char[150];
 975         if(cur->next->brother!=NULL){
 976             p = cur->next->brother->brother;
 977             if(p->next->brother!=NULL){
 978                 tmp = getNext(cur->next->brother->brother,fo);
 979                 if(tmp[0]==‘+‘ || tmp[0]==‘-‘){
 980                     if(*getNext(cur->next,fo)==‘-‘){    //如果最前面的是‘-’，后面的运算符应该是反的
 981                         if(tmp[0]==‘-‘)
 982                             tmp[0]=‘+‘;
 983                         else
 984                             tmp[0]=‘-‘;
 985                     }
 986                     fprintf(fo,"t%d=%s%s\r\n",tmpcnt,getNext(cur->next->brother,fo),tmp);
 987                     sprintf(tmp,"%st%d",getNext(cur->next,fo),tmpcnt++);
 988                     return tmp; 
 989                 }
 990             }
 991         }
 992     }
 993 
 994     char * s = new char[150];    //new一个字符串，存储这个节点的结果
 995     memset(s,0,sizeof(s));
 996 
 997     Node* p = cur->next;
 998     while(p){
 999         strcat(s,getNext(p,fo));
1000         p = p->brother;
1001     }
1002     return s;
1003 }
1004 
1005 void get3AddrCode()
1006 {
1007     WordsPoint = 1;
1008     FILE* fo = fopen(".\\out.txt","wb");
1009     if(fo==NULL){
1010         printf("三地址代码生成文件 out.txt 打开失败！\n");
1011         return ;
1012     }
1013 
1014     setWP(root);
1015     printf("生成三地址语句......\n");
1016     getNext(root,fo);
1017 }
1018 //--------------- 生成三地址代码 end ---------------
1019 
1020 int main()
1021 {
1022     bool f=true;
1023     Init();
1024     if(!Lex())    //词法分析
1025         f=false;
1026     //printTokenList();
1027     if(!Parse())    //语法分析
1028         f=false;
1029     if(f)    //都通过了才能生成三地址代码
1030         get3AddrCode();    //生成三地址代码
1031     return 0;
1032 }

Freecode : www.cnblogs.com/yym2013

一个简单编译器前端的实现

标签：des style blog http ar io color os sp

原文地址：http://www.cnblogs.com/yym2013/p/4172697.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行