最近对解释型程序(类似python或者是linux里的bc计算器)非常感兴趣,就开始学习一下编译原理。今天自己实现了TINY语言的词法扫描程序。大部分参考《编译原理及实践》一书。但是我做了一些小小的改进。
先说一下TINY语言:
1、注释:放在一对大括号内。书上的注释不能嵌套,我做了一点改进,允许嵌套。
2、关键字:read write if end repeat until else
3、类型:只支持整型和布尔型。
4、计算:+ - * / ( ) < = :=,其中:=为赋值运算,=为判断。没有〈和<= >=
一个示例的TINY语言程序:
test.tine: (选自《编译原理及实践》)
{ Sample program
in TINY language -
computes factorial
}
read x; { input an integer }
if 0 < x then { don't compute if x <= 0 }
fact := 1;
repeat
fact := fact * x;
x := x - 1;
until x = 0;
write fact { output factorial of x }
end
#ifndef GLOBALS_H
#define GLOBALS_H
#include <stdio.h>
typedef enum
{
ENDFILE, ERROR,
IF, THEN, ELSE, END, REPEAT, UNTIL, READ, WRITE,
ID, NUM,
ASSIGN, EQ, LT, PLUS, MINUS, TIMES, OVER, LPAREN, RPAREN, SEMI
} TokenType;
extern lineno;
/* The max size of identifier of reserved word */
#define MAXTOKENLEN 50
#endif
tiny.l
%{
#include <stdio.h>
#include <string.h>
#include "globals.h"
#include "util.h"
char tokenString[MAXTOKENLEN + 1];
%}
digit [0-9]
number {digit}+
letter [a-zA-Z]
identifier {letter}[a-zA-Z0-9]*
newline \n
whitespace [ \t]
%%
"if" {return IF;}
"then" {return THEN;}
"else" {return ELSE;}
"end" {return END;}
"repeat" {return REPEAT;}
"until" {return UNTIL;}
"read" {return READ;}
"write" {return WRITE;}
":=" {return ASSIGN;}
"=" {return EQ;}
"<" {return LT;}
"+" {return PLUS;}
"-" {return MINUS;}
"*" {return TIMES;}
"/" {return OVER;}
"(" {return LPAREN;}
")" {return RPAREN;}
";" {return SEMI;}
{number} {return NUM;}
{identifier} <span style="white-space:pre"> </span>{return ID;}
{newline} {lineno++;}
{whitespace} <span style="white-space:pre"> </span>{ /* Do nothing */ }
"{" { char c;
int count = 1;
do
{
c = input();
if (c == EOF) break;
else if (c == '\n') lineno++;
else if (c == '{') count++;
else if (c == '}') count--;
} while (count != 0);
}
. {return ERROR;}
%%
TokenType getToken(void)
{
TokenType currentToken;
currentToken = yylex();
strncpy(tokenString, yytext, MAXTOKENLEN);
printf("%d: ", lineno);
printToken(currentToken, tokenString);
return currentToken;
}
util.h:
#ifndef UTIL_H #define UTIL_H #include "globals.h" void printToken(TokenType token, char* tokenString); TokenType getToken(void); #endif
#include "util.h"
#include <stdio.h>
#include "globals.h"
void printToken(TokenType token, char* tokenString)
{
switch(token)
{
case IF:
case THEN:
case ELSE:
case END:
case REPEAT:
case UNTIL:
case READ:
case WRITE:
printf("\treversed word: %s\n", tokenString);
break;
case ID:
printf("\tidentifier: %s\n", tokenString);
break;
case NUM:
printf("\tnumber: %s\n", tokenString);
break;
case ASSIGN:
case EQ:
case LT:
case PLUS:
case MINUS:
case TIMES:
case OVER:
case LPAREN:
case RPAREN:
case SEMI:
printf("\toperator: %s\n", tokenString);
}
}
scanner.exe: main.o lex.yy.o util.o gcc main.o lex.yy.o util.o -o scanner.exe -lfl main.o: main.c globals.h util.h gcc main.c -c util.o: util.c util.h globals.h gcc util.c -c lex.yy.o: tiny.l flex tiny.l gcc lex.yy.c -c
由于使用的是默认的输入,所以这个程序直接支持从键盘输入,运行效果如下:
当然,也可以使用重定向操作,使用效果如下:
原文地址:http://blog.csdn.net/pdcxs007/article/details/39999487