码迷,mamicode.com
首页 > 编程语言 > 详细

一个简单的C语言语法检查器的实现

时间:2016-09-12 18:59:46      阅读:666      评论:0      收藏:0      [点我收藏+]

标签:

  我自己的实现方法的核心过程:首先用一个非终结符代表所有要检查的程序代码,然后根据文法将这个整体的符号不断展开,以拼凑成按检查的程序的顺序排列的终结符序列,能成功说明语法正确,否则有错误。

关键词:分词;First集;Select集;

  目前还存在的问题:

    1.因为同一个非终结符->终结符的转化可能有多种解释,所以目前我的非终结符展开这里是用递归写的,因此不能定位具体错在哪里。

    2.int a={1,2};int a[2]={1,‘b‘};这种该出错的地方不会出错。这个比较致命,但目前还没想好怎么解决。

  代码部分借鉴了这里,我直接用了他的分词的scnner函数和宏定义的部分。他的程序总共2000+行,我改了一下,总共只用1000行就实现了他的功能,其中500行是抄的他的分词......并且修改了他的程序里的一些错误。

Grammar.txt:文法的定义

技术分享
<程序闭包> -> <声明语句闭包> #
<程序闭包> -> <函数定义> #
<程序闭包> -> $ #
<函数定义> -> <修饰词闭包> <类型> <变量> ( <参数声明> ) { <函数块> } #
<修饰词闭包> -> <修饰词> <修饰词闭包> #
<修饰词闭包> -> $ #
<修饰词> -> describe #
<类型> -> type <取地址> #
<取地址> -> <星号闭包> #
<星号闭包> -> <星号> <星号闭包> #
<星号闭包> -> $ #
<星号> -> * #
<变量> -> <标志符> <数组下标> #
<标志符> -> id #
<数组下标> -> [ <因式> ] #
<数组下标> -> $ #
<因式> -> ( <表达式> ) #
<因式> -> <变量> #
<因式> -> <数字> #
<数字> -> digit #
<数字> -> string #
<表达式> -> <因子> <项> #
<因子> -> <因式> <因式递归> #
<因式递归> -> * <因式> <因式递归> #
<因式递归> -> / <因式> <因式递归> #
<因式递归> -> $ #
<项> -> + <因子> <项> #
<项> -> - <因子> <项> #
<项> -> $ #
<参数声明> -> <声明> <声明闭包> #
<参数声明> -> $ #
<声明> -> <修饰词闭包> <类型> <变量> <赋初值> #
<赋初值> -> = <右值> #
<赋初值> -> $ #
<右值> -> <表达式> #
<右值> -> { <多个数据> } #
<多个数据> -> <数字> <数字闭包> #
<数字闭包> -> , <数字> <数字闭包> #
<数字闭包> -> $ #
<声明闭包> -> , <声明> <声明闭包> #
<声明闭包> -> $ #
<函数块> -> <声明语句闭包> <函数块闭包> #
<声明语句闭包> -> <声明语句> <声明语句闭包> #
<声明语句闭包> -> $ #
<声明语句> -> <声明> ; #
<函数块闭包> -> <赋值函数> <函数块闭包> #
<函数块闭包> -> <for循环> <函数块闭包> #
<函数块闭包> -> <条件语句> <函数块闭包> #
<函数块闭包> -> <函数返回> <函数块闭包> #
<函数块闭包> -> $ #
<赋值函数> -> <变量> <赋值或函数调用> #
<赋值或函数调用> -> = <右值> ; #
<赋值或函数调用> -> ( <参数列表> ) ; # 
<参数列表> -> <参数> <参数闭包> #
<参数闭包> -> , <参数> <参数闭包> #
<参数闭包> -> $ #
<参数> -> <标志符> #
<参数> -> <数字> #
<参数> -> <字符串> #
<字符串> -> string #
<for循环> -> for ( <赋值函数> <逻辑表达式> ; <后缀表达式> ) { <函数块> } #
<逻辑表达式> -> <表达式> <逻辑运算符> <表达式> #
<逻辑运算符> -> < #
<逻辑运算符> -> > #
<逻辑运算符> -> == #
<逻辑运算符> -> != #
<后缀表达式> -> <变量> <后缀运算符> #
<后缀运算符> -> ++ #
<后缀运算符> -> -- #
<条件语句> -> if ( <逻辑表达式> ) { <函数块> } <否则语句> #
<否则语句> -> else { <函数块> } #
<否则语句> -> $ #
<函数返回> -> return <因式> ; #
View Code

Grammar.cpp:主程序

#include "initialize.h"
int main() {
    Init();
    Scan();
    Analyse();
    getchar();
    return 0;
}

initialize.h:初始化

技术分享
#ifndef __INIT__
#define __INIT__
#include "WA.h"
#include "SA.h"
#include "Macro_Struct.h"
void InitKeyMap() {
    keyMap.clear();
    keyMap.push_back(make_pair("auto", AUTO));
    keyMap.push_back(make_pair("break", BREAK));
    keyMap.push_back(make_pair("case", CASE));
    keyMap.push_back(make_pair("char", CHAR));
    keyMap.push_back(make_pair("const", CONST));
    keyMap.push_back(make_pair("continue", CONTINUE));
    keyMap.push_back(make_pair("default", DEFAULT));
    keyMap.push_back(make_pair("do", DO));
    keyMap.push_back(make_pair("double", DOUBLE));
    keyMap.push_back(make_pair("else", ELSE));
    keyMap.push_back(make_pair("enum", ENUM));
    keyMap.push_back(make_pair("extern", EXTERN));
    keyMap.push_back(make_pair("float", FLOAT));
    keyMap.push_back(make_pair("for", FOR));
    keyMap.push_back(make_pair("goto", GOTO));
    keyMap.push_back(make_pair("if", IF));
    keyMap.push_back(make_pair("int", INT));
    keyMap.push_back(make_pair("long", LONG));
    keyMap.push_back(make_pair("register", REGISTER));
    keyMap.push_back(make_pair("return", RETURN));
    keyMap.push_back(make_pair("short", SHORT));
    keyMap.push_back(make_pair("signed", SIGNED));
    keyMap.push_back(make_pair("sizeof", SIZEOF));
    keyMap.push_back(make_pair("static", STATIC));
    keyMap.push_back(make_pair("struct", STRUCT));
    keyMap.push_back(make_pair("switch", SWITCH));
    keyMap.push_back(make_pair("typedef", TYPEDEF));
    keyMap.push_back(make_pair("union", UNION));
    keyMap.push_back(make_pair("unsigned", UNSIGNED));
    keyMap.push_back(make_pair("void", VOID));
    keyMap.push_back(make_pair("volatile", VOLATILE));
    keyMap.push_back(make_pair("while", WHILE));
    keyMap.push_back(make_pair("describe", DESCRIBE));
    keyMap.push_back(make_pair("type", TYPE));
    //keyMap.push_back(make_pair("string", STRING));
    keyMap.push_back(make_pair("digit", DIGIT));
}
void InitOperMap() {
    operMap.clear();
    operMap.push_back(make_pair("!", NOT));
    operMap.push_back(make_pair("&", BYTE_AND));
    operMap.push_back(make_pair("~", COMPLEMENT));
    operMap.push_back(make_pair("^", BYTE_XOR));
    operMap.push_back(make_pair("*", MUL));
    operMap.push_back(make_pair("/", DIV));
    operMap.push_back(make_pair("%", MOD));
    operMap.push_back(make_pair("+", ADD));
    operMap.push_back(make_pair("-", SUB));
    operMap.push_back(make_pair("<", LES_THAN));
    operMap.push_back(make_pair(">", GRT_THAN));
    operMap.push_back(make_pair("=", ASG));
    operMap.push_back(make_pair("->", ARROW));
    operMap.push_back(make_pair("++", SELF_ADD));
    operMap.push_back(make_pair("--", SELF_SUB));
    operMap.push_back(make_pair("<<", LEFT_MOVE));
    operMap.push_back(make_pair(">>", RIGHT_MOVE));
    operMap.push_back(make_pair("<=", LES_EQUAL));
    operMap.push_back(make_pair(">=", GRT_EQUAL));
    operMap.push_back(make_pair("==", EQUAL));
    operMap.push_back(make_pair("!=", NOT_EQUAL));
    operMap.push_back(make_pair("&&", AND));
    operMap.push_back(make_pair("||", OR));
    operMap.push_back(make_pair("+=", COMPLETE_ADD));
    operMap.push_back(make_pair("-=", COMPLETE_SUB));
    operMap.push_back(make_pair("*=", COMPLETE_MUL));
    operMap.push_back(make_pair("/=", COMPLETE_DIV));
    operMap.push_back(make_pair("^=", COMPLETE_BYTE_XOR));
    operMap.push_back(make_pair("&=", COMPLETE_BYTE_AND));
    operMap.push_back(make_pair("~=", COMPLETE_COMPLEMENT));
    operMap.push_back(make_pair("%=", COMPLETE_MOD));
    operMap.push_back(make_pair("|", BYTE_OR));
}
void InitLimitMap() {
    limitMap.clear();
    limitMap.push_back(make_pair("(", LEFT_BRA));
    limitMap.push_back(make_pair(")", RIGHT_BRA));
    limitMap.push_back(make_pair("[", LEFT_INDEX));
    limitMap.push_back(make_pair("]", RIGHT_INDEX));
    limitMap.push_back(make_pair("{", L_BOUNDER));
    limitMap.push_back(make_pair("}", R_BOUNDER));
    limitMap.push_back(make_pair(".", POINTER));
    limitMap.push_back(make_pair("#", JING));
    limitMap.push_back(make_pair("_", UNDER_LINE));
    limitMap.push_back(make_pair(",", COMMA));
    limitMap.push_back(make_pair(";", SEMI));
    limitMap.push_back(make_pair("", SIN_QUE));
    limitMap.push_back(make_pair("\"", DOU_QUE));
}
void ShowExprList() {
    for (int i = 0; i < ExprNum; i++) {
        printf("%s -> ", Signature[Expr[i][0]].first);
        for (int j = 2; j <= Expr[i][1]; j++) {
            printf("%s ", Signature[Expr[i][j]].first);
        }
        printf("\n");
    }
}
void DFS_NullPossibility(int x) {
    //printf("%s\n",Signature[x].first);
    if (visited[x]) {
        return;
    }
    visited[x] = true;
    for (int i = 0; i < ExprNum; i++) {
        if (Expr[i][0] != x) {
            continue;
        }
        bool ret = true;
        for (int j = 2; j <= Expr[i][1]; j++) {
            DFS_NullPossibility(Expr[i][j]);
            ret &= canBeNull[Expr[i][j]];
        }
        if (ret) {
            visited[x] = true;
            canBeNull[x] = true;
            return;
        }
    }
}
void ReadExpr() {
    Signature.clear();
    ifstream fin("Grammar.txt");
    char str[50][50];
    int Length = 0;
    while (fin >> str[Length]) {
        if (strcmp(str[Length], "#") == 0) {
            for (int i = 0; i < Length; i++) {
                if (strcmp(str[i], "->") == 0) {
                    Expr[ExprNum][i] = Length - 1;
                    continue;
                }
                int signID = FindSignature(str[i]);
                if (signID == -1) {
                    int tempLen = strlen(str[i]);
                    if (str[i][0] == < && str[i][tempLen - 1] == >) {
                        pair<char *, bool> tempPair;
                        tempPair.first = new char [tempLen + 1];
                        memcpy(tempPair.first, str[i], tempLen);
                        tempPair.first[tempLen] = \0;
                        tempPair.second = Scalable;
                        Signature.push_back(tempPair);
                    } else {
                        pair<char *, bool> tempPair;
                        tempPair.first = new char [tempLen + 1];
                        memcpy(tempPair.first, str[i], tempLen);
                        tempPair.first[tempLen] = \0;
                        tempPair.second = unScalsble;
                        Signature.push_back(tempPair);
                    }
                    signID = Signature.size() - 1;
                }
                Expr[ExprNum][i] = signID;
            }
            Length = 0;
            ExprNum++;
        } else {
            Length++;
        }
    }
    fin.close();
    memset(canBeNull, false, sizeof(canBeNull));
    for (int i = 0; i < Signature.size(); i++) {
        if (strcmp(Signature[i].first, "$") == 0) {
            canBeNull[i] = true;
        }
    }
    memset(visited, false, sizeof(visited));
    for (int i = 0; i < Signature.size(); i++) {
        if (Signature[i].second == Scalable) {
            DFS_NullPossibility(i);
        }
    }
    //ShowExprList();
}
void DFS_FirstSet(int x) {
    if (visited[x] || Signature[x].second == unScalsble) {
        return;
    }
    visited[x] = true;
    for (int i = 0; i < ExprNum; i++) {
        if (Expr[i][0] != x) {
            continue;
        }
        for (int j = 2; j <= Expr[i][1]; j++) {
            if (Signature[Expr[i][j]].second == unScalsble) {
                First[x][Expr[i][j]] = true;
            } else {
                DFS_FirstSet(Expr[i][j]);
                for (int k = 0; k < Signature.size(); k++) {
                    if (First[Expr[i][j]][k]) {
                        First[x][k] = true;
                    }
                }
            }
            if (!canBeNull[Expr[i][j]]) {
                break;
            }
        }
    }
}
void GetFirstSet() {
    memset(First, false, sizeof(First));
    memset(visited, false, sizeof(visited));
    for (int i = 0; i < Signature.size(); i++) {
        if (Signature[i].second == unScalsble) {
            continue;
        }
        DFS_FirstSet(i);
    }
}
void GetFollowSet() {}
void GetSelectSet() {
    memset(Select, false, sizeof(Select));
    for (int i = 0; i < ExprNum; i++) {
        for (int j = 2; j <= Expr[i][1]; j++) {
            if (Signature[Expr[i][j]].second == unScalsble) {
                Select[i][Expr[i][j]] = true;
            } else {
                for (int k = 0; k < Signature.size(); k++) {
                    if (First[Expr[i][j]][k]) {
                        Select[i][k] = true;
                    }
                }
            }
            if (!canBeNull[Expr[i][j]]) {
                break;
            }
        }
        if (!canBeNull[Expr[i][0]]) {
            Select[i][FindSignature("$")] = false;
        }
    }
}
void ShowShiftList() {
    for (int i = 0; i < Signature.size(); i++) {
        for (int j = 0; j < Signature.size(); j++) {
            if (ShiftList[i][j][0][0] <= 1 || i == j) {
                continue;
            }
            printf("%3d %30s -> %30s\n", ShiftList[i][j][0][0], Signature[i].first, Signature[j].first);
            for (int k = 1; k <= ShiftList[i][j][0][0]; k++) {
                printf("---------->");
                for (int h = 1; h <= ShiftList[i][j][k][0]; h++) {
                    printf("%s ", Signature[ShiftList[i][j][k][h]].first);
                }
                printf("\n");
            }
        }
    }
}
void GetShiftList() {
    memset(ShiftList, 0, sizeof(ShiftList));
    for (int i = 0; i < ExprNum; i++) {
        for (int j = 0; j < Signature.size(); j++) {
            if (!Select[i][j]) {
                continue;
            }
            /*if (Signature[j].second == Scalable) {
                printf("OMG\n");
            }*/
            ShiftList[Expr[i][0]][j][0][0]++;
            for (int k = 1; k <= Expr[i][1]; k++) {
                ShiftList[Expr[i][0]][j][ShiftList[Expr[i][0]][j][0][0]][k - 1] = Expr[i][k];
            }
            ShiftList[Expr[i][0]][j][ShiftList[Expr[i][0]][j][0][0]][0]--;
        }
    }
    //ShowShiftList();
}
void Init() {
    InitKeyMap();
    InitOperMap();
    InitLimitMap();
    tokenList.clear();
    errorList.clear();
    ReadExpr();
    GetFirstSet();
    GetFollowSet();
    GetSelectSet();
    GetShiftList();
}
#endif
View Code

WA.h:词法分析

技术分享
#ifndef __WA__
#define __WA__
#include "stdafx.h"
#include "Macro_Struct.h"

vector<pair<const char *, int>> keyMap;
vector<pair<const char *, int>> operMap;
vector<pair<const char *, int>> limitMap;
vector<Token> tokenList;
vector<Error> errorList;

int SeekKey(char * word) {
    for (int i = 0; i < keyMap.size(); i++) {
        if (strcmp(word, keyMap[i].first) == 0) {
            return keyMap[i].second;
        }
    }
    return IDENTIFER;
}
void InsertToken(char * content, char * describe, int type, int line) {
    Token tempToken;
    strcpy_s(tempToken.content, content);
    strcpy_s(tempToken.describe, describe);
    tempToken.type = type;
    tempToken.line = line;
    tokenList.push_back(tempToken);
}
void InsertError(char * content, char * describe, int type, int line) {
    Error tempError;
    strcpy_s(tempError.content, content);
    strcpy_s(tempError.describe, describe);
    tempError.type = type;
    tempError.line = line;
    errorList.push_back(tempError);
    printf("Line %d:%s\n", line, describe);
}
void preProcess(char * word, int line) {
    regex INCLUDE_REGEX("#include\\s*<[\\w\\.]+>\\s*");
    regex DEFINE_REGEX("#define\\s+\\w+\\s+\\w+\\s*");
    if (regex_match(word, INCLUDE_REGEX)) {
        return;
    }
    if (regex_match(word, DEFINE_REGEX)) {
        return;
    }
    InsertError(word, PRE_PROCESS_ERROR, PRE_PROCESS_ERROR_NUM, line);
}

void Scan() {
    char ch;
    char array[30];//单词长度上限是30
    char * word;
    int i;
    int line = 1;//行数
    FILE * infile;
    errno_t err = fopen_s(&infile, "input.txt", "r");
    ch = fgetc(infile);
    while (ch != EOF) {
        i = 0;
        //以字母或者下划线开头,处理关键字或者标识符
        if ((ch >= A && ch <= Z) || (ch >= a && ch <= z) || ch == _) {
            while ((ch >= A && ch <= Z) || (ch >= a && ch <= z) || (ch >= 0 && ch <= 9) || ch == _) {
                array[i++] = ch;
                ch = fgetc(infile);
            }
            word = new char[i + 1];
            memcpy(word, array, i);
            word[i] = \0;
            int seekTemp = SeekKey(word);
            if (seekTemp != IDENTIFER) {
                InsertToken(word, KEY_DESC, seekTemp, line);
            } else {
                InsertToken(word, IDENTIFER_DESC, seekTemp, line);
            }
            fseek(infile, -1L, SEEK_CUR); //向后回退一位
        }
        //以数字开头,处理数字
        else if (ch >= 0 && ch <= 9) {
            int flag = 0;
            int flag2 = 0;
            //处理整数
            while (ch >= 0 && ch <= 9) {
                array[i++] = ch;
                ch = fgetc(infile);
            }
            //处理float
            if (ch == .) {
                flag2 = 1;
                array[i++] = ch;
                ch = fgetc(infile);
                if (ch >= 0 && ch <= 9) {
                    while (ch >= 0 && ch <= 9) {
                        array[i++] = ch;
                        ch = fgetc(infile);
                    }
                } else {
                    flag = 1;
                }
                //处理Double
                if (ch == E || ch == e) {
                    array[i++] = ch;
                    ch = fgetc(infile);
                    if (ch == + || ch == -) {
                        array[i++] = ch;
                        ch = fgetc(infile);
                    }
                    if (ch >= 0 && ch <= 9) {
                        array[i++] = ch;
                        ch = fgetc(infile);
                    } else {
                        flag = 2;
                    }
                }
            }
            word = new char[i + 1];
            memcpy(word, array, i);
            word[i] = \0;
            if (flag == 1) {
                InsertError(word, FLOAT_ERROR, FLOAT_ERROR_NUM, line);
            } else if (flag == 2) {
                InsertError(word, DOUBLE_ERROR, DOUBLE_ERROR_NUM, line);
            } else {
                if (flag2 == 0) {
                    InsertToken(word, CONSTANT_DESC, INT_VAL, line);
                } else {
                    InsertToken(word, CONSTANT_DESC, FLOAT_VAL, line);
                }
            }
            fseek(infile, -1L, SEEK_CUR); //向后回退一位
        }
        //以"/"开头
        else if (ch == /) {
            ch = fgetc(infile);
            //处理运算符"/="
            if (ch == =) {
                InsertToken("/=", OPE_DESC, COMPLETE_DIV, line);
            }
            //处理"/**/"型注释
            else if (ch == *) {
                ch =  fgetc(infile);
                while (1) {
                    while (ch != *) {
                        if (ch == \n) {
                            line++;
                        }
                        ch = fgetc(infile);
                        if (ch == EOF) {
                            InsertError(_NULL, NOTE_ERROR, NOTE_ERROR_NUM, line);
                            return;
                        }
                    }
                    ch = fgetc(infile);
                    if (ch == /) {
                        break;
                    }
                    if (ch == EOF) {
                        InsertError(_NULL, NOTE_ERROR, NOTE_ERROR_NUM, line);
                        return;
                    }
                }
                InsertToken(_NULL, NOTE_DESC, NOTE1, line);
            }
            //处理"//"型注释
            else if (ch == /) {
                while (ch != \n) {
                    ch = fgetc(infile);
                    if (ch == EOF) {
                        InsertToken(_NULL, NOTE_DESC, NOTE2, line);
                        return;
                    }
                }
                line++;
                InsertToken(_NULL, NOTE_DESC, NOTE2, line);
                if (ch == EOF) {
                    return;
                }
            }
            //处理除号
            else {
                InsertToken("/", OPE_DESC, DIV, line);
            }
        }
        //处理常量字符串
        else if (ch == ") {
            InsertToken("\"", CLE_OPE_DESC, DOU_QUE, line);
            ch = fgetc(infile);
            i = 0;
            while (ch != ") {
                array[i++] = ch;
                if (ch == \n) {
                    line++;
                }
                ch = fgetc(infile);
                if (ch == EOF) {
                    InsertError(_NULL, STRING_ERROR, STRING_ERROR_NUM, line);
                    return;
                }
            }
            word = new char[i + 1];
            memcpy(word, array, i);
            word[i] = \0;
            InsertToken(word, CONSTANT_DESC, STRING_VAL, line);
            InsertToken("\"", CLE_OPE_DESC, DOU_QUE, line);
        }
        //处理常量字符
        else if (ch == \‘) {
            InsertToken("\‘", CLE_OPE_DESC, SIN_QUE, line);
            ch = fgetc(infile);
            i = 0;
            while (ch != \‘) {
                array[i++] = ch;
                if (ch == \n) {
                    line++;
                }
                ch = fgetc(infile);
                if (ch == EOF) {
                    InsertError(_NULL, CHARCONST_ERROR, CHARCONST_ERROR_NUM, line);
                    return;
                }
            }
            word = new char[i + 1];
            memcpy(word, array, i);
            word[i] = \0;
            InsertToken(word, CONSTANT_DESC, CHAR_VAL, line);
            InsertToken("\‘", CLE_OPE_DESC, SIN_QUE, line);
        } else if (ch ==   || ch == \t || ch == \r || ch == \n) {
            if (ch == \n) {
                line++;
            }
        } else {
            if (ch == EOF) {
                return;
            }
            //处理头文件和宏常量(预处理)
            else if (ch == #) {
                while (ch != \n && ch != EOF) {
                    array[i++] = ch;
                    ch = fgetc(infile);
                }
                word = new char[i + 1];
                memcpy(word, array, i);
                word[i] = \0;
                preProcess(word, line);
                fseek(infile, -1L, SEEK_CUR); //向后回退一位
            }
            //处理-开头的运算符
            else if (ch == -) {
                array[i++] = ch;
                ch = fgetc(infile);
                if (ch >= 0 && ch <= 9) {
                    int flag = 0;
                    int flag2 = 0;
                    //处理整数
                    while (ch >= 0 && ch <= 9) {
                        array[i++] = ch;
                        ch = fgetc(infile);
                    }
                    //处理float
                    if (ch == .) {
                        flag2 = 1;
                        array[i++] = ch;
                        ch = fgetc(infile);
                        if (ch >= 0 && ch <= 9) {
                            while (ch >= 0 && ch <= 9) {
                                array[i++] = ch;
                                ch = fgetc(infile);
                            }
                        } else {
                            flag = 1;
                        }
                        //处理Double
                        if (ch == E || ch == e) {
                            array[i++] = ch;
                            ch = fgetc(infile);
                            if (ch == + || ch == -) {
                                array[i++] = ch;
                                ch = fgetc(infile);
                            }
                            if (ch >= 0 && ch <= 9) {
                                array[i++] = ch;
                                ch = fgetc(infile);
                            } else {
                                flag = 2;
                            }
                        }
                    }
                    word = new char[i + 1];
                    memcpy(word, array, i);
                    word[i] = \0;
                    if (flag == 1) {
                        InsertError(word, FLOAT_ERROR, FLOAT_ERROR_NUM, line);
                    } else if (flag == 2) {
                        InsertError(word, DOUBLE_ERROR, DOUBLE_ERROR_NUM, line);
                    } else {
                        if (flag2 == 0) {
                            InsertToken(word, CONSTANT_DESC, INT_VAL, line);
                        } else {
                            InsertToken(word, CONSTANT_DESC, FLOAT_VAL, line);
                        }
                    }
                    fseek(infile, -1L, SEEK_CUR); //向后回退一位
                } else if (ch == >) {
                    InsertToken("->", OPE_DESC, ARROW, line);
                } else if (ch == -) {
                    InsertToken("--", OPE_DESC, SELF_SUB, line);
                } else if (ch == =) {
                    InsertToken("--", OPE_DESC, SELF_SUB, line);
                } else {
                    InsertToken("-", OPE_DESC, SUB, line);
                    fseek(infile, -1L, SEEK_CUR);
                }
            }
            //处理+开头的运算符
            else if (ch == +) {
                ch = fgetc(infile);
                if (ch == +) {
                    InsertToken("++", OPE_DESC, SELF_ADD, line);
                } else if (ch == =) {
                    InsertToken("+=", OPE_DESC, COMPLETE_ADD, line);
                } else {
                    InsertToken("+", OPE_DESC, ADD, line);
                    fseek(infile, -1L, SEEK_CUR);
                }
            }
            //处理*开头的运算符
            else if (ch == *) {
                ch = fgetc(infile);
                if (ch == =) {
                    InsertToken("*=", OPE_DESC, COMPLETE_MUL, line);
                } else {
                    InsertToken("*", OPE_DESC, MUL, line);
                    fseek(infile, -1L, SEEK_CUR);
                }
            }
            //处理按^开头的运算符
            else if (ch == ^) {
                ch = fgetc(infile);
                if (ch == =) {
                    InsertToken("^=", OPE_DESC, COMPLETE_BYTE_XOR, line);
                } else {
                    InsertToken("^", OPE_DESC, BYTE_XOR, line);
                    fseek(infile, -1L, SEEK_CUR);
                }
            }
            //处理%开头的运算符
            else if (ch == %) {
                ch = fgetc(infile);
                if (ch == =) {
                    InsertToken("%=", OPE_DESC, COMPLETE_MOD, line);
                } else {
                    InsertToken("%", OPE_DESC, MOD, line);
                    fseek(infile, -1L, SEEK_CUR);
                }
            }
            //处理&开头的运算符
            else if (ch == &) {
                ch = fgetc(infile);
                if (ch == =) {
                    InsertToken("&=", OPE_DESC, COMPLETE_BYTE_AND, line);
                } else if (ch == &) {
                    InsertToken("&&", OPE_DESC, AND, line);
                } else {
                    InsertToken("&", OPE_DESC, BYTE_AND, line);
                    fseek(infile, -1L, SEEK_CUR);
                }
            }
            //处理~开头的运算符
            else if (ch == ~) {
                ch = fgetc(infile);
                if (ch == =) {
                    InsertToken("~=", OPE_DESC, COMPLETE_COMPLEMENT, line);
                } else {
                    InsertToken("~", OPE_DESC, COMPLEMENT, line);
                    fseek(infile, -1L, SEEK_CUR);
                }
            }
            //处理!开头的运算符
            else if (ch == !) {
                ch = fgetc(infile);
                if (ch == =) {
                    InsertToken("!=", OPE_DESC, NOT_EQUAL, line);
                } else {
                    InsertToken("!", OPE_DESC, NOT, line);
                    fseek(infile, -1L, SEEK_CUR);
                }
            }
            //处理<开头的运算符
            else if (ch == <) {
                ch = fgetc(infile);
                if (ch == <) {
                    InsertToken("<<", OPE_DESC, LEFT_MOVE, line);
                } else if (ch == =) {
                    InsertToken("<=", OPE_DESC, LES_EQUAL, line);
                } else {
                    InsertToken("<", OPE_DESC, LES_THAN, line);
                    fseek(infile, -1L, SEEK_CUR);
                }
            }
            //处理>开头的运算符
            else if (ch == >) {
                ch = fgetc(infile);
                if (ch == >) {
                    InsertToken(">>", OPE_DESC, RIGHT_MOVE, line);
                } else if (ch == =) {
                    InsertToken(">=", OPE_DESC, GRT_EQUAL, line);
                } else {
                    InsertToken(">", OPE_DESC, GRT_THAN, line);
                    fseek(infile, -1L, SEEK_CUR);
                }
            }
            //处理|开头的运算符
            else if (ch == |) {
                ch = fgetc(infile);
                if (ch == |) {
                    InsertToken("||", OPE_DESC, OR, line);
                } else {
                    InsertToken("|", OPE_DESC, BYTE_OR, line);
                    fseek(infile, -1L, SEEK_CUR);
                }
            } else if (ch == =) {
                ch = fgetc(infile);
                if (ch == =) {
                    InsertToken("==", OPE_DESC, EQUAL, line);
                } else {
                    InsertToken("=", OPE_DESC, ASG, line);
                    fseek(infile, -1L, SEEK_CUR);
                }
            } else if (ch == () {
                InsertToken("(", CLE_OPE_DESC, LEFT_BRA, line);
            } else if (ch == )) {
                InsertToken(")", CLE_OPE_DESC, RIGHT_BRA, line);
            } else if (ch == [) {
                InsertToken("[", CLE_OPE_DESC, LEFT_INDEX, line);
            } else if (ch == ]) {
                InsertToken("]", CLE_OPE_DESC, RIGHT_INDEX, line);
            } else if (ch == {) {
                InsertToken("{", CLE_OPE_DESC, L_BOUNDER, line);
            } else if (ch == }) {
                InsertToken("}", CLE_OPE_DESC, R_BOUNDER, line);
            } else if (ch == .) {
                InsertToken(".", CLE_OPE_DESC, POINTER, line);
            } else if (ch == ,) {
                InsertToken(",", CLE_OPE_DESC, COMMA, line);
            } else if (ch == ;) {
                InsertToken(";", CLE_OPE_DESC, SEMI, line);
            } else {
                char temp[2];
                temp[0] = ch;
                temp[1] = \0;
                InsertError(temp, CHAR_ERROR, CHAR_ERROR_NUM, line);
            }
        }
        ch = fgetc(infile);
    }
    fclose(infile);
}

#endif
View Code

SA.h:语法分析 

技术分享
#ifndef __SA__
#define __SA__
#include "stdafx.h"
#include "Macro_Struct.h"
#include "initialize.h"
#define unScalsble false
#define Scalable true
vector<pair<char *, bool>> Signature;
int CodeStack[5000], TryStack[5000], tokenId[5000];
int Csize = 0, Tsize = 0;
int Expr[100][100];
int ExprNum = 0;
bool First[100][100];
bool visited[100];
bool Follow[100][100];
bool Select[100][100];
int ShiftList[100][100][10][100];
bool canBeNull[100];
int ArrayFlag;
int FindSignature(char * str) {
    for (int i = 0; i < Signature.size(); i++) {
        if (strcmp(str, Signature[i].first) == 0) {
            return i;
        }
    }
    return -1;
}
int Try(int top, int sn, int ptr) {
    if (ptr < 0 || Tsize <= top) {
        return -10;
    }
    int Expl = TryStack[top];
    Tsize += (ShiftList[Expl][CodeStack[ptr]][sn][0] - 1);
    for (int i = 1; i <= ShiftList[Expl][CodeStack[ptr]][sn][0]; i++) {
        TryStack[Tsize - i] = ShiftList[Expl][CodeStack[ptr]][sn][i];
    }
    while (Tsize > top) {
        if (ptr < 0) {
            if (canBeNull[TryStack[Tsize - 1]]) {
                Tsize--;
                continue;
            } else {
                printf("Line%2d:%s can\‘ t explain as %s \n", tokenList[tokenId[0]].line, tokenList[tokenId[0]].content, Signature[Expl].first);
                return -10;
            }
        }
        if (TryStack[Tsize - 1] == CodeStack[ptr]) {
            Tsize--;
            ptr--;
            continue;
        }
        if (Signature[TryStack[Tsize - 1]].second == unScalsble) {
            printf("Line%2d:%s can\‘ t explain as %s \n", tokenList[tokenId[ptr]].line, tokenList[tokenId[ptr]].content, Signature[Expl].first);
            return -10;
        }
        if (ShiftList[TryStack[Tsize - 1]][CodeStack[ptr]][0][0] == 0) {
            if (canBeNull[TryStack[Tsize - 1]]) {
                Tsize--;
                continue;
            } else {
                printf("Line%2d:%s can\‘ t explain as %s \n", tokenList[tokenId[ptr]].line, tokenList[tokenId[ptr]].content, Signature[Expl].first);
                return -10;
            }
        }
        bool Match = false;
        for (int i = 1; i <= ShiftList[TryStack[Tsize - 1]][CodeStack[ptr]][0][0]; i++) {
            int tempTs = Tsize;
            int tempTi = TryStack[Tsize - 1];
            int ret = Try(Tsize - 1, i, ptr);
            if (ret != -10) {
                Match = true;
                ptr = ret;
                break;
            } else {
                Tsize = tempTs;
                TryStack[Tsize - 1] = tempTi;
            }
        }
        if (!Match) {
            printf("Line%2d:%s can\‘ t explain as %s \n", tokenList[tokenId[ptr]].line, tokenList[tokenId[ptr]].content, Signature[Expl].first);
            return -10;
        }
    }
    return ptr;
}
void Analyse() {
    for (int i = tokenList.size() - 1; i >= 0; i--) {
        if (tokenList[i].type == AUTO ||
                tokenList[i].type == CONST ||
                tokenList[i].type == UNSIGNED ||
                tokenList[i].type == SIGNED ||
                tokenList[i].type == STATIC ||
                tokenList[i].type == VOLATILE) {
            tokenId[Csize] = i;
            CodeStack[Csize++] = FindSignature("describe");
        } else if (tokenList[i].type == INT_VAL) {
            tokenId[Csize] = i;
            CodeStack[Csize++] = FindSignature("digit");
        } else if (tokenList[i].type == CHAR ||
                   tokenList[i].type == DOUBLE ||
                   tokenList[i].type == FLOAT ||
                   tokenList[i].type == INT ||
                   tokenList[i].type == LONG ||
                   tokenList[i].type == SHORT ||
                   tokenList[i].type == VOID) {
            tokenId[Csize] = i;
            CodeStack[Csize++] = FindSignature("type");
        } else if (tokenList[i].type == STRING_VAL) {
            tokenId[Csize] = i;
            CodeStack[Csize++] = FindSignature("string");
        } else if (tokenList[i].type == DOU_QUE ||
                   tokenList[i].type == SIN_QUE ||
                   tokenList[i].type == NOTE1 ||
                   tokenList[i].type == NOTE2) {
        } else if (tokenList[i].type == IDENTIFER) {
            tokenId[Csize] = i;
            CodeStack[Csize++] = FindSignature("id");
        } else if (tokenList[i].type == FOR) {
            tokenId[Csize] = i;
            CodeStack[Csize++] = FindSignature("for");
        } else if (tokenList[i].type == IF) {
            tokenId[Csize] = i;
            CodeStack[Csize++] = FindSignature("if");
        } else if (tokenList[i].type == ELSE) {
            tokenId[Csize] = i;
            CodeStack[Csize++] = FindSignature("else");
        } else if (tokenList[i].type == RETURN) {
            tokenId[Csize] = i;
            CodeStack[Csize++] = FindSignature("return");
        } else {
            tokenId[Csize] = i;
            CodeStack[Csize++] = FindSignature(tokenList[i].content);
        }
        if (tokenList[i].type != DOU_QUE && tokenList[i].type != SIN_QUE) {
            printf("%15s  %15s  %15d\n", Signature[CodeStack[Csize - 1]].first, tokenList[i].content, tokenList[i].type);
        }
    }
    ArrayFlag = -1;
    while (Csize) {
        if (ShiftList[0][CodeStack[Csize - 1]][0][0] == 0) {
            printf("Error:Line%2d %s\n", tokenList[tokenId[Csize - 1]].line, tokenList[tokenId[Csize - 1]].content);
            break;
        }
        bool Match = false;
        for (int i = 1; i <= ShiftList[0][CodeStack[Csize - 1]][0][0]; i++) {
            Tsize = 1;
            TryStack[0] = 0;
            int ret = Try(0, i, Csize - 1);
            if (ret != -10) {
                Match = true;
                Csize = ret + 1;
                break;
            }
        }
        if (!Match) {
            printf("Error:Line%2d %s\n", tokenList[tokenId[Csize - 1]].line, tokenList[tokenId[Csize - 1]].content);
            break;
        }
    }
    if (Csize == 0) {
        printf("Successful\n");
    }
}
#endif
View Code

 Macro_Struct.h:宏定义以及结构体定义

技术分享
#ifndef __MS__
#define __MS__

#define AUTO 1
#define BREAK 2
#define CASE 3
#define CHAR 4
#define CONST 5
#define CONTINUE 6
#define DEFAULT 7
#define DO 8
#define DOUBLE 9
#define ELSE 10
#define ENUM 11
#define EXTERN 12
#define FLOAT 13
#define FOR 14
#define GOTO 15
#define IF 16
#define INT 17
#define LONG 18
#define REGISTER 19
#define RETURN 20
#define SHORT 21
#define SIGNED 22
#define SIZEOF 23
#define STATIC 24
#define STRUCT 25
#define SWITCH 26
#define TYPEDEF 27
#define UNION 28
#define UNSIGNED 29
#define VOID 30
#define VOLATILE 31
#define WHILE 32
#define KEY_DESC "关键字"

//标志符
#define IDENTIFER 40
#define IDENTIFER_DESC "标志符"

//常量
#define INT_VAL 51 //整形常量
#define CHAR_VAL 52 //字符常量
#define FLOAT_VAL 53 //浮点数常量
#define STRING_VAL 54 //双精度浮点数常量
#define MACRO_VAL 55 //宏常量
#define CONSTANT_DESC "常量"

//运算符
#define NOT 61   // !
#define BYTE_AND 62 //&
#define COMPLEMENT 63 // ~
#define BYTE_XOR  64 // ^
#define MUL 65 // *
#define DIV 66// /
#define MOD 67 // %
#define ADD 68 // +
#define SUB 69 // -
#define LES_THAN 70 // <
#define GRT_THAN 71 // >
#define ASG 72 // =
#define ARROW 73 // ->
#define SELF_ADD 74 // ++
#define SELF_SUB 75 // --
#define LEFT_MOVE 76 // <<
#define RIGHT_MOVE 77 // >>
#define LES_EQUAL 78 // <=
#define GRT_EQUAL 79 // >=
#define EQUAL 80 // ==
#define NOT_EQUAL 81 // !=
#define AND 82 // &&
#define OR 83 // ||
#define COMPLETE_ADD 84 // +=
#define COMPLETE_SUB 85 // -=
#define COMPLETE_MUL 86 // *=
#define COMPLETE_DIV 87 // /=
#define COMPLETE_BYTE_XOR 88 // ^=
#define COMPLETE_BYTE_AND 89 // &=
#define COMPLETE_COMPLEMENT 90 // ~=
#define COMPLETE_MOD 91 //%=
#define BYTE_OR 92 // |
#define OPE_DESC "运算符"

//限界符
#define LEFT_BRA 100 // (
#define RIGHT_BRA 101 // )
#define LEFT_INDEX 102 // [
#define RIGHT_INDEX 103 // ]
#define L_BOUNDER 104 //  {
#define R_BOUNDER 105 // }
#define POINTER 106 // .
#define JING 107 // #
#define UNDER_LINE 108 // _
#define COMMA 109 // ,
#define SEMI 110 // ;
#define SIN_QUE 111 //
#define DOU_QUE 112 // "

#define CLE_OPE_DESC "限界符"

#define NOTE1 120 // "/**/"注释
#define NOTE2 121 // "//"注释
#define NOTE_DESC "注释"


#define HEADER 130 //头文件
#define HEADER_DESC "头文件"

//错误类型
#define FLOAT_ERROR "float表示错误"
#define FLOAT_ERROR_NUM 1
#define DOUBLE_ERROR "double表示错误"
#define DOUBLE_ERROR_NUM 2
#define NOTE_ERROR "注释没有结束符"
#define NOTE_ERROR_NUM 3
#define STRING_ERROR "字符串常量没有结束符"
#define STRING_ERROR_NUM 4
#define CHARCONST_ERROR "字符常量没有结束符"
#define CHARCONST_ERROR_NUM 5
#define CHAR_ERROR "非法字符"
#define CHAR_ERROR_NUM 6
#define LEFT_BRA_ERROR "‘(‘没有对应项"
#define LEFT_BRA_ERROR_NUM 7
#define RIGHT_BRA_ERROR "‘)‘没有对应项"
#define RIGHT_BRA_ERROR_NUM 8
#define LEFT_INDEX_ERROR "‘[‘没有对应项"
#define LEFT_INDEX_ERROR_NUM 9
#define RIGHT_INDEX_ERROR "‘]‘没有对应项"
#define RIGHT_INDEX_ERROR_NUM 10
#define L_BOUNDER_ERROR "‘{‘没有对应项"
#define L_BOUNDER_ERROR_NUM 11
#define R_BOUNDER_ERROR "‘}‘没有对应项"
#define R_BOUNDER_ERROR_NUM 12
#define PRE_PROCESS_ERROR "预处理错误" //头文件或者宏定义错误
#define PRE_PROCESS_ERROR_NUM  13

#define _NULL "无"

#define DESCRIBE 4000
#define TYPE 4001
#define STRING 4002
#define DIGIT 4003

struct Token
{
    char content[30];//内容
    char describe[30];//描述
    int type;//种别码
    int line;//所在行数
};

struct Error {
    char content[30];//错误内容
    char describe[30];//错误描述
    int type;
    int line;//所在行数
};

#endif
View Code

stdafx.h:头文件包含

#ifndef __STDAFX__
#define __STDAFX__

#include <iostream>
#include <fstream>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <vector>
#include <iomanip>
#include <stack>
#include <regex>
using namespace std;

#endif

input.txt:要检查的程序

一个简单的C语言语法检查器的实现

标签:

原文地址:http://www.cnblogs.com/dramstadt/p/5865624.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!