花了一天写出的程序没有顾及很多层面,但对于理解基本的实验道理和交上实验还是有点帮助的。代码实现了基于有限自动机的词法分析,采用递归下降分析法和EBNF文法实现语法分析并生成中间代码。
lexAnalysis.h
/* * lexAnalysis.h * * Created on: 2014-12-2 * Author: liuqiushan */ #ifndef LEXANALYSIS_H_ #define LEXANALYSIS_H_ #include <stdio.h> #include <stdlib.h> #include <string.h> typedef enum _SYM { _CONST, _VAR, _procedure, _begin, _end, _if, _then, _ID, // such as the 'a' in 'var a;' _INT, // such as the '10' in 'const a = 10;' _ASSIGN,// '=' _PLUS,// '+' _SUB,// '-' _STAR,// '*' _DIV,// '/' _LESS,// '<' _MORE,// '>' _LESSEQ,// '<=' _MOREEQ,// '>=' _DH,// ',' _MD,// ':=' _LEFT,// '(' _RIGHT,// ')' _JH,// '#' _FH// ‘;’ }SYM; SYM getSYM(char* str); void appendStr(char* str, char* c, int p_str); int isABC(char* c); int isNumber(char* c); #endif /* LEXANALYSIS_H_ */
LexAnalysis.c
/* * LexAnalysis.c * * Created on: 2014-12-2 * Author: liuqiushan */ #include "lexAnalysis.h" SYM getSYM(char* _str) { SYM temp = _ID; // default identifier if(!strcasecmp(_str, "CONST")) temp = _CONST; else if(!strcasecmp(_str, "VAR")) temp = _VAR; else if(!strcasecmp(_str, "procedure")) temp = _procedure; else if(!strcasecmp(_str, "begin")) temp = _begin; else if(!strcasecmp(_str, "end")) temp = _end; else if(!strcasecmp(_str, "if")) temp = _if; else if(!strcasecmp(_str, "then")) temp = _then; return temp; } void appendStr(char str[], char* c, int p_str ) { str[p_str] = *c; } int isABC(char* c) { char ch = *c; if(ch=='q'||ch=='w'||ch=='e'||ch=='r'||ch=='t'||ch=='y'||ch=='u'||ch=='i'||ch=='o'||ch=='p'|| ch=='a'||ch=='s'||ch=='d'||ch=='f'||ch=='g'||ch=='h'||ch=='j'||ch=='k'||ch=='l'|| ch=='z'||ch=='x'||ch=='c'||ch=='v'||ch=='b'||ch=='n'||ch=='m'||ch=='Q'||ch=='W'|| ch=='E'||ch=='R'||ch=='T'||ch=='Y'||ch=='U'||ch=='I'||ch=='O'||ch=='P'||ch=='A'|| ch=='S'||ch=='D'||ch=='F'||ch=='G'||ch=='H'||ch=='J'||ch=='K'||ch=='L'||ch=='Z'|| ch=='X'||ch=='C'||ch=='V'||ch=='B'||ch=='N'||ch=='M') return 1; else return 0; } int isNumber(char* c) { char ch = *c; if(ch=='1'||ch=='2'||ch=='3'||ch=='4'||ch=='5'||ch=='6'||ch=='7'||ch=='8'||ch=='9'||ch=='0') return 1; else return 0; }
synAnalysis.h
/* * synAnalysis.h * * Created on: 2014-12-2 * Author: liuqiushan */ #ifndef SYNANALYSIS_H_ #define SYNANALYSIS_H_ typedef enum _KIND { PRO,//procedure CON,//constant VAR //variable }CVPKind; /* * PL/0 Object Instruction * * There are three domains: * * f is the function code * l is the difference between the declared level and the invoked level of the variable * * ----------------------------- * | | | | * | f | l | a | * | | | | * ----------------------------- */ typedef enum _FunctionCode { LIT,//① LIT: put the constant on the top of the stack, a is the constant LOD,//② LOD: put the variable on the top of the stack, a is the address STO,//③ STO: put the content on the top of the stack into a variable. such as c := b + 10 CAL,//④ CAL INT,//⑤ INT: create the data area, and a is the number of the area JMP,//⑥ JMP JPC,//⑦ JPC OPR//⑧ OPR: operation, and the type is delivered by a. And the operation objects are the top of the stack and the next top of the stack }FunctionCode; typedef enum _OPR { ADD = 1, //+ SUB, //- MUL, //* DIV, // / GT, LT, GE, LE, UE, EQ, WRITE, READ, MINUS }OPRTYPE; typedef struct _TableItem { char* name; CVPKind kind; int level; char* address; }CVPTableItem; typedef struct _Instruct { FunctionCode f; int l; int a; }ObjectInstruct; #endif /* SYNANALYSIS_H_ */
SynAnalysis.c
/* ============================================================================ Name : SynAnalysis.c Author : liuqiushan Date : Dec. 2, 2014 18:16 Version : 1.0 Copyright : Shandong University Description : Syntax Analysis ============================================================================ */ /* * Expression EBNF *-------------------------------------------- *| Expression -> Term { Addop Term } | *| Addop -> "+" | "-" | *| Term -> Factor { Mulop Factor } | *| Mulop -> "*" | "/" | *| Factor -> ID | NUM | "(" Expression ")" | *-------------------------------------------- */ /* * Pl/0 Statements to be analyzed *-------------------------------------------- *| const a=10 | *| var b,c | *| procedure p | *| begin | *| c:=b+a | *| end | *-------------------------------------------- */ #include "lexAnalysis.h" #include "synAnalysis.h" // Data Structure typedef struct Statement { char* _statement; int _length; // the length of the statement int _locate_pointer; // default point to the first character }P_Statement; // Methods void putsStatements(void); void initStatements(void); void binding(P_Statement* p_s, char* statement); void lexical_Analysis(char* a, int len_a); void printLexResults(void); SYM getNextSYMToken(); char* getNextID(); char* getNextNumber(); void levelProcess(int level); void processConstant(int level); int processVariable(int level); void processProcedure(int level); void strcpyCVPTable(CVPTableItem* cvp, char* name, char* address); void printSynResults(void); int str2int(char *str); void GEN(FunctionCode f, int l, int a); void Sentence(int level); void EBNFExpression(int level); int getIndexCVP(char* id); void Term(int level); void Factor(int level); /* Global Variables */ // Information about the statements set #define statementNum 6 char* statements[statementNum] = { "const a=10", "var b,c", "procedure p", "begin", "c:=b+a", "end" }; // Contain those statements to be analyzed P_Statement statementSet[statementNum]; // For lexical analysis #define MAX 100 SYM array_SYM[MAX]; // SYM array for lexical analysis int p_a_SYM = 0; // SYM array location pointer char array_Id[MAX][MAX]; // identifier array for lexical analysis int p_a_Id = 0; // identifier array location pointer char array_Num[MAX][MAX]; // number array for lexical analysis int p_a_Num = 0; // number array location pointer // Token acquired during lexical analysis char str[MAX] = ""; // current token to be analyzed int p_str = 0; // token inner location pointer /* For Syntax Analysis */ // Token acquired during syntax analysis SYM _currentSYMToken; int currentPointer = 0; char* _currentID; int currentPointerID = 0; char* _currentNumber; int currentPointerNumber = 0; int level = 0; //It indicates which level is under processing, and also the level is just the main level. CVPTableItem cvptable[MAX]; // used to record every element, and we need the table to show which elements (including constant, variable and procedure) are in the program int p_table = 0; // the appending pointer of cvptable int addrVar = 3; // the initial address for the first variable is 3 // the following two are used for the address of procedure. In fact up to now, I do not their meaning. int indexW = 0; int offset = 1; ObjectInstruct CODE[MAX]; // recording the generated intermediate code int p_code = 0; // the appending pointer of CODE OPRTYPE type_opr; // Expression operation type // Main int main(void) { puts("====================Initial====================="); putsStatements(); // print which statements need to be analyzed initStatements(); // initial all the statements to be analyzed with the struct P_Statement // start lexical analysis puts("================Lexical Analysis================"); int i; for (i = 0; i < statementNum; i++) { lexical_Analysis(statementSet[i]._statement, statementSet[i]._length); } array_SYM[p_a_SYM] = -1;// end with -1 // print the results of lexical analysis printLexResults(); // Now let us start Syntax Analysis, just enjoy it puts("================Syntax Analysis================"); _currentSYMToken = getNextSYMToken(); levelProcess(level); //print the results of syntax analysis printSynResults(); return EXIT_SUCCESS; } // Implementation void putsStatements(void) { puts("Analyzing the following statements: "); puts(""); int i; for (i = 0; i < statementNum; i++) { puts(statements[i]); } } void initStatements(void) { int i; for (i = 0; i < statementNum; i++) binding(&statementSet[i], statements[i]); } void binding(P_Statement* p_s, char* statement) { // calculate the length of statement; int len = strlen(statement); p_s->_statement = malloc(len*sizeof(char)); strcpy(p_s->_statement, statement); p_s->_length = len; p_s->_locate_pointer = 0; // testing // printf("%s\n", p_s->_statement); // printf("%d\n", p_s->_length); } // Lexical Analysis Main Program void lexical_Analysis(char* a, int len_a) { int i; for (i = 0; i < len_a; i++) { // Only get one character each time if (a[i]==' ') continue; // Finite Automation Method if (isABC(&a[i])) // If the first character is letter { while (isABC(&a[i])||isNumber(&a[i])) { appendStr(str, &a[i], p_str); p_str++; i++; if (i == len_a) break; } i--; SYM code = getSYM(str); //either identifier or keyword if (code == _ID) { array_SYM[p_a_SYM] = _ID; p_a_SYM++; strcpy(array_Id[p_a_Id], str); p_a_Id++; } else { // keyword or reserved word of Pl/0 array_SYM[p_a_SYM] = code; p_a_SYM++; } // before next one, clean them memset(str, 0, sizeof(str)); p_str = 0; } else if (isNumber(&a[i])) // If the first character is number { while (isNumber(&a[i])) { appendStr(str, &a[i], p_str); p_str++; i++; if (i == len_a) break; } i--; array_SYM[p_a_SYM] = _INT; p_a_SYM++; strcpy(array_Num[p_a_Num], str); p_a_Num++; // before next one, clean them memset(str, 0, sizeof(str)); p_str = 0; } else if (a[i] == '=') { array_SYM[p_a_SYM] = _ASSIGN; p_a_SYM++; } else if (a[i] == ',') { array_SYM[p_a_SYM] = _DH; p_a_SYM++; } else if (a[i] == ':') { i++; if (a[i] == '=') { array_SYM[p_a_SYM] = _MD; p_a_SYM++; } } else if (a[i] == '+') { array_SYM[p_a_SYM] = _PLUS; p_a_SYM++; } } // print the results of lexical analysis //printLexResults(); } void printLexResults(void) { int i; puts("============Lexical Analysis Results================"); printf("SYM: [ "); for (i = 0; i < p_a_SYM; i++) { switch(array_SYM[i]) { case 0: printf("_CONST "); break; case 1: printf("_VAR "); break; case 2: printf("_procedure "); break; case 3: printf("_begin "); break; case 4: printf("_end "); break; case 5: printf("_if "); break; case 6: printf("_then "); break; case 7: printf("_ID "); break; case 8: printf("_INT "); break; case 9: printf("_ASSIGN "); break; case 10: printf("_PLUS "); break; case 11: printf("_SUB "); break; case 12: printf("_STAR "); break; case 13: printf("_DIV "); break; case 14: printf("_LESS "); break; case 15: printf("_MORE "); break; case 16: printf("_LESSEQ "); break; case 17: printf("_MOREEQ "); break; case 18: printf("_DH "); break; case 19: printf("_MD "); break; case 20: printf("_LEFT "); break; case 21: printf("_RIGHT "); break; case 22: printf("_JH "); break; case 23: printf("_FH "); break; } } printf("]\n"); printf("ID: [ "); for (i = 0; i < p_a_Id; i++) printf("%s ", array_Id[i]); printf("]\n"); printf("NUM: [ "); for (i = 0; i < p_a_Num; i++) printf("%s ", array_Num[i]); printf("]\n"); } SYM getNextSYMToken() { return array_SYM[currentPointer++]; } char* getNextID() { return array_Id[currentPointerID++]; } char* getNextNumber() { return array_Num[currentPointerNumber++]; } void levelProcess(int level) { int countVar = 0; // record the number of variables in one level if (_currentSYMToken != -1) { if (_currentSYMToken == _CONST) // Constant { processConstant(level); if ((_currentSYMToken=getNextSYMToken())==-1) return; } if (_currentSYMToken == _VAR) // variables follow constant in our example { countVar = processVariable(level); } if (_currentSYMToken == _procedure) // procedure follows variables in our example { _currentSYMToken = getNextSYMToken(); // get the identifier of the procedure processProcedure(level); } } GEN(INT, 0, 3+countVar); // Create the specified number area. Because we designed addrVar 3, so we plus 3 Sentence(level); GEN(OPR, 0, 0); // quit the data area } void processConstant(int level) { if ((_currentSYMToken=getNextSYMToken())!=-1&&_currentSYMToken==_ID) // a if ((_currentSYMToken=getNextSYMToken())!=-1&&_currentSYMToken==_ASSIGN) // = if ((_currentSYMToken=getNextSYMToken())!=-1&&_currentSYMToken==_INT) // 10 if ((_currentID=getNextID())!=NULL) // get the identifier if ((_currentNumber=getNextNumber())!=NULL) // get the number { strcpyCVPTable(&cvptable[p_table], _currentID, _currentNumber); cvptable[p_table].kind = CON; cvptable[p_table].level = level; // Do not forget to add p_table p_table++; } } int processVariable(int level) { int count = 0; // record the number of variables if ((_currentSYMToken=getNextSYMToken())!=-1&&_currentSYMToken==_ID) if ((_currentID=getNextID())!=NULL) // get the identifier { count++; char str[MAX]; sprintf(str, "%d", addrVar); strcpyCVPTable(&cvptable[p_table], _currentID, str); cvptable[p_table].kind = VAR; cvptable[p_table].level = level; addrVar++; // Do not forget to add p_table p_table++; } // Maybe we could meet such var a,b while((_currentSYMToken=getNextSYMToken())!=-1&&_currentSYMToken==_DH) { if ((_currentSYMToken=getNextSYMToken())!=-1&&_currentSYMToken==_ID) if ((_currentID=getNextID())!=NULL) // get the identifier { count++; char str[MAX]; sprintf(str, "%d", addrVar); strcpyCVPTable(&cvptable[p_table], _currentID, str); cvptable[p_table].kind = VAR; cvptable[p_table].level = level; addrVar++; // Do not forget to add p_table p_table++; } } return count; } void processProcedure(int level) { if (_currentSYMToken!=-1&&_currentSYMToken==_ID) if ((_currentID=getNextID())!=NULL) // get the identifier of the procedure { char str[MAX]; int temp = indexW + offset; sprintf(str, "%d", temp); strcpyCVPTable(&cvptable[p_table], _currentID, str); cvptable[p_table].kind = PRO; cvptable[p_table].level = level; // Do not forget to add p_table p_table++; } // IMPORTANT PHASE: SUBLEVEL _currentSYMToken = getNextSYMToken(); // in our example it should be begin levelProcess(level + 1); // Recursive-descent Parsing } void strcpyCVPTable(CVPTableItem* cvp, char* name, char* address) { int len_N = strlen(name); int len_A = strlen(address); cvp->name = (char*)malloc(len_N*sizeof(char)); cvp->address = (char*)malloc(len_A*sizeof(char)); strcpy(cvp->name, name); strcpy(cvp->address, address); } void printSynResults(void) { int i; puts("============Syntax Analysis Results================"); puts("The Table of Constant, Variable and Procedure"); puts("Name\tKind\tLevel\tAddress\t"); for (i = 0; i < p_table; i++) { //printf("%s\t%d\t%d\t%s\n", cvptable[i].name, cvptable[i].kind, cvptable[i].level, cvptable[i].address); printf("%s\t", cvptable[i].name); switch(cvptable[i].kind) { case CON: printf("CON\t"); break; case VAR: printf("VAR\t"); break; case PRO: printf("PRO\t"); break; } printf("%d\t%s\n", cvptable[i].level, cvptable[i].address); } puts(""); puts("Intermediate Code"); puts("No.\tFunction Code\tLevel Difference\tDisplacement"); for (i = 0; i < p_code; i++) { printf("%d\t", i); switch(CODE[i].f) { case LIT: printf("LIT\t"); break; case INT: printf("INT\t"); break; case LOD: printf("LOD\t"); break; case STO: printf("STO\t"); break; case CAL: printf("CAL\t"); break; case JMP: printf("JMP\t"); break; case JPC: printf("JPC\t"); break; case OPR: printf("OPR\t"); break; } printf("\t%d\t\t\t%d\n", CODE[i].l, CODE[i].a); } } int str2int(char *str) { int v = 0; do { v = 10*v+*str-'0'; str++; } while((*str>='0')&&(*str<='9')); return v; } // To generate the object instruct void GEN(FunctionCode f, int l, int a) { CODE[p_code].f = f; CODE[p_code].l = l; CODE[p_code].a = a; p_code++; } void Sentence(int level) { // Here I just implement begin and Assignment i.e. _MD(:=) if (_currentSYMToken != -1) { if (_currentSYMToken ==_begin) { _currentSYMToken = getNextSYMToken(); Sentence(level);// recurse with the sub-sentence of the sentence (i.e. nest), but they are in the same level while(_currentSYMToken != -1) { if (_currentSYMToken == _end) { _currentSYMToken = getNextSYMToken(); // in our example, this is the end of SYM table. it should be -1 return; } } } else if (_currentSYMToken == _ID) // Assignment { _currentID = getNextID(); // I omit the variables valid checking, but we sill need to find it in cvptable int cvpIndex = getIndexCVP(_currentID); // Just find its index in cvptable if((_currentSYMToken=getNextSYMToken())!=-1&&_currentSYMToken==_MD) // assignment symbol { _currentSYMToken = getNextSYMToken(); EBNFExpression(level); GEN(STO, level-cvptable[cvpIndex].level, str2int(cvptable[cvpIndex].address)); return; // assignment over } } } } void EBNFExpression(int level) { Term(level); // the level + and - // Here we implement + and - while((_currentSYMToken=getNextSYMToken())!=-1) { if (_currentSYMToken == _SUB || _currentSYMToken == _PLUS) { if (_currentSYMToken == _SUB) type_opr = SUB; else type_opr = ADD; _currentSYMToken = getNextSYMToken(); // next term Term(level); GEN(OPR, 0, type_opr); } } } int getIndexCVP(char* id) { int i; for (i = 0; i < p_table; i++) { if(!strcmp(cvptable[i].name, id)) break; } return i; } void Term(int level) { Factor(level); // the level of * and / // we omit * and / } void Factor(int level) { // In fact, we omit '(' and ')', but it is easy to implement it by recursing with EBNFExpression if(_currentSYMToken!=-1&&_currentSYMToken==_ID) { _currentID = getNextID(); int cvpIndex = getIndexCVP(_currentID); // Just find its index in cvptable if(cvptable[cvpIndex].kind == VAR) // type checking { GEN(LOD, level-cvptable[cvpIndex].level, str2int(cvptable[cvpIndex].address)); } else if (cvptable[cvpIndex].kind == CON) { GEN(LIT, 0, str2int(cvptable[cvpIndex].address)); } } }
实验结果:
(山东大学编译原理实验)
原文地址:http://blog.csdn.net/bluecloudmatrix/article/details/41701233