码迷,mamicode.com
首页 > 其他好文 > 详细

【词法分析器 】 编译原理作业

时间:2014-10-09 22:46:21      阅读:285      评论:0      收藏:0      [点我收藏+]

标签:style   blog   color   io   os   使用   ar   for   文件   

自己写的版本:

问题:

1:考虑到由于是嵌套接收,浮点型感觉少麻烦,因为还要判断是否两个小数点等,古没有实现

2:对于一些特殊符号,如三元运算符,格式控制符%d等分析不到位

3:类别应该分的更细,否则用唯一的symbol(sym)标识的话无法进行后续的语法分析

4:没有用文件指针,数据输入要在控制台,不利于交互

 

#include <iostream>
#include <string.h>
#include <cstdio>
using namespace std;
int index,sub_index,num,sym; //num输出数字,超过int认为Error,index为s的索引,sun_index为str的索引,sym为symbol种别码
char ch;
char s[50000+10];   //以cin.get(ch)的形式来接收控制台的字符,遇到#结束(\n,空格不结束)
char str[15];       //存储要输出的除数字外的字符(或字符串),数字用num输出
char keyword[27][10]={
    "auto","break","case","char","int","long","double","float","const",
    "continue","default","do","else","enum","extern","for","goto","return",
    "sizeof","static","struct","switch","typedef","union","unsigned","void","while"
};//暂时写27种关键字,是用于处理c语言的基础关键字,估计还是不全的


void function()
{
    for(int i=0;i<15;i++)str[i]=NULL;
    ch=s[index++];
    //由于cin.get不丢弃缓冲区的空格和回车,故此处人工忽略
    while(ch==  || ch==\n || ch==\t || ch==\r || ch==\f)
    {
        ch=s[index++];
    }
    //如果是字母开头,判断其是否是标示符
    if( (ch>=a&&ch<=z) || (ch>=A&&ch<=Z) ||ch==_ ){
        sub_index=0;
        while((ch>=a&&ch<=z) || (ch>=A&&ch<=Z) || (ch<=9&&ch>=0) ||ch==_){
            str[sub_index++]=ch;
            ch=s[index++];
        }
        index--;  //减回去
        sym=3;
        for(int i=0;i<27;i++)  //再判断是否为关键字
            if(!strcmp(str,keyword[i])){
                sym=1;
                break;
            }
    }
    //数字处理
    else if(ch<=9&&ch>=0){
        num=0;
        while(ch<=9&&ch>=0)
        {
            num*=10;
            num+=ch-0;
            ch=s[index++];
        }
        index--;
        sym=2;
        if((num+1>(1<<15)) || (num<(-1<<15)))sym=-1;
    }
    //对于注释符"//"的处理,文档注释不写了吧,道理差不多,找到匹配后即重新递归
    else if(ch==/){
        sub_index=0;
        str[sub_index++]=ch;
        ch=s[index++];
        if(ch==/){
            while(ch!=\n)
            {
                ch=s[index++];
            }
            function();//从注释符的下一行从新进行function函数
        }
    }
    //<= 和<< 的处理
    else if(ch==<){
        sub_index=0;
        str[sub_index++]=ch;
        ch=s[index++];
        if(ch=== || ch==<){
            str[sub_index++]=ch;
        }
        else index--;
        sym=4;
    }
    //>= 和>> 的处理
     else if(ch==>){
        sub_index=0;
        str[sub_index++]=ch;
        ch=s[index++];
        if(ch=== || ch==>){
            str[sub_index++]=ch;
        }
        else index--;
        sym=4;
    }
     // != 和*= 和&=和^=和|=的处理
    else if(ch==!||ch==*||ch==/||ch==&||ch==^||ch==|){
        sub_index=0;
        str[sub_index++]=ch;
        ch=s[index++];
        if(ch===){
            str[sub_index++]=ch;
        }
        else index--;
        sym=4;
    }
    // += 和++的处理
    else if(ch==+){
        sub_index=0;
        str[sub_index++]=ch;
        ch=s[index++];
        if(ch=== || ch==+){
            str[sub_index++]=ch;
        }
        else index--;
        sym=4;
    }
    // -= 和--的处理
    else if(ch==-){
        sub_index=0;
        str[sub_index++]=ch;
        ch=s[index++];
        if(ch=== || ch==- || ch==>){
            str[sub_index++]=ch;
        }
        else index--;
        sym=4;
    }
     // 分界符的处理
    else if(ch==( || ch==) || ch==; || ch==< || ch==>
    || ch==[ || ch==] || ch==, || ch=={ || ch==}){
        sym=5;
        str[0]=ch;
    }
    // 结束符的处理
    else if(ch==#){
        sym=0;
        str[0]=ch;
    }
     //其他没考虑到的运算符号,如位运算等,不再一一累述
    else{
        sym=4;
        str[0]=ch;
    }
}

int main()
{
    //freopen("in.txt","r",stdin);
//    freopen("out.txt","w",stdout);
    cout << "Please input the program code:(over with‘#‘) "<< endl;
    index=0;
    do{
        cin.get(ch);
        s[index++]=ch;
    }while(ch!=#);
    index=0;
    do{
        function();
        if(sym==-1)cout << "Error" <<endl;
        else if(sym==2)cout<<"["<<sym<<","<<num<<"]"<<endl;
        else cout <<"["<<sym<<","<<str<<"]"<<endl;
    }while(sym!=0);
     return 0;
}
 

 

 

看到一个别人实现的不错版本:

1 调用ctype虽然简易,但在一些类型符的判断上会出现一些小错误

2 txt文件光标不能直接放在文件末(当然通过改主函数也可以避免问题~)

3 所有标识符用二维数组处理很好,整齐简便,又有文件读写操作,健壮性强,值得学习。

 

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <malloc.h>
#include <conio.h>
#define NULL 0
#define true 1
#define false 0
FILE *fp;
char ch;
char *keyword[34]={"auto","break","case","char","const","continue","default","do","double",
"else","enum","extern","float","for","goto","if","int","long","register",
"return","short","signed","sizeof","static","struct","switch","typedef", "printf",
"union","unsigned","void","volatile","while","main"};//关键字
char *operatornum[6]={"+","-","*","/","++","--"};//算术运算符
char *comparison[8]={"<","<=","=",">",">=","<>","==","!="};//比较符
char *interpunction[8]={",",";",":=",".","(",")","{","}"};//分隔符
char *biaoshifu[6]={"%","$","^","&","_","#"};//特殊标识符
char *zhushifu[3]={"//",""};//注释符
char *luoji[3]={"&&","||","!"};//逻辑运算符
//////////////////////////////////////////////////////////////////////////////////////////
char search(char searchstr[],int wordtype)
{
int i;
switch (wordtype)
{
case 1:for(i=0;i<=33;i++)
  {
   if(strcmp(keyword[i],searchstr)==0)
    return(true);
  }
case 2:{
  for(i=0;i<=5;i++)
  {
   if(strcmp(operatornum[i],searchstr)==0)
 return(true);
  }
  break;
  }
case 3: {
 for(i=0;i<=7;i++)
   {
 if(strcmp(comparison[i],searchstr)==0)
  return(true);
 }
 break;
   }
case 4: for(i=0;i<=7;i++)
   {
 if(strcmp(interpunction[i],searchstr)==0)
  return(true);
   }
 
break;
case 5: for(i=0;i<=5;i++)
   {
 if(strcmp(biaoshifu[i],searchstr)==0)
  return(true);
   }break;
case 6: for(i=0;i<=2;i++)
   {
 if(strcmp(zhushifu[i],searchstr)==0)
  return(true);
   }break;
case 7: for(i=0;i<=2;i++)
   {
 if(strcmp(luoji[i],searchstr)==0)
  return(true);
   }break;
}
return(false);
}
////////////////////////
char letterprocess (char ch)//字母处理函数
{
int i=-1;
char letter[20];
while (isalnum(ch)!=0)
{
  letter[++i]=ch;
  ch=fgetc(fp);
};
letter[i+1]=\0;
if (search(letter,1))
{
  if(strcmp(letter,"main"))//letter 不等于"main"
   printf("<1,关键字,%s>\n",letter);//关键字识别码1
  else
   printf("<2,自定义标识符,%s>\n",letter);//关键字识别码1
}
else
{
  printf("<2,自定义标识符,%s>\n",letter);//自定义标识符识别码2
}
return(ch);
}
/////////////////////////
char numberprocess(char ch)//数字处理程序
{
int i=-1;
char num[20];
while (isdigit(ch)!=0)
{
  num[++i]=ch;
  ch=fgetc(fp);
}
if(isalpha(ch)!=0)
{
  while(isspace(ch)==0)
  {
   num[++i]=ch;
   ch=fgetc(fp);
  }
  num[i+1]=\0;
  printf("错误!非法标识符:%s\n",num);
  goto u;
}
num[i+1]=\0;
 printf("<3,数字,%s>\n",num);//数字识别码3
u: return(ch);
}
/////////////////////////
char otherprocess(char ch)
{
int i=-1;
char other[20];
if (isspace(ch)!=0)
{
  ch=fgetc(fp);
  goto u;
}
while ((isspace(ch)==0)&&(isalnum(ch)==0))
{
  other[++i]=ch;
  ch=fgetc(fp);
}
other[i+1]=\0;
if (search(other,2))
  printf("<4,运算符,%s>\n",other);//算术运算符识别码4
else
if (search(other,3))
   printf("<4,运算符,%s>\n",other);//关系运算符号 识别码4
else
if (search(other,4))
 printf("<5,分隔符号,%s>\n",other);//分隔符号识别码5
else
if (search(other,5))
 printf("<%s,特殊标识符号>\n",other);
else
if (search(other,6))
 printf("<%s,注释符号>\n",other);
else
if (search(other,7))
 printf("<%s,逻辑运算符号>\n",other);
else
   printf("错误!非法字符:%s\n",other);
u: return (ch);
}
///////////////////////////
void main ()
{
char str,c;
printf("**********************************词法分析器************************************\n");
if((fp=fopen("F:\\写的代码\\tt\\源程序.txt","r"))==NULL)
  printf("源程序无法打开!\n");
else
{
  str =fgetc(fp);
  while (str!=EOF)
  {
   if (isalpha(str)!=0)
 str=letterprocess(str);
   else
   {
 if (isdigit(str)!=0)
  str=numberprocess(str);
 else
  str=otherprocess(str);
   }
 
  };
  fclose(fp);
  fp=NULL;
  printf("词法分析结束,谢谢使用!\n");
  printf("点任意键退出!\n");
}
   c=getch();
}

 

【词法分析器 】 编译原理作业

标签:style   blog   color   io   os   使用   ar   for   文件   

原文地址:http://www.cnblogs.com/balfish/p/4014527.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!