您的位置:首页 > 编程语言 > C语言/C++

c++ 简单词法分析器的实现

2012-03-12 12:56 489 查看
现在在学编译原理,做了个简单的词法分析器。可以自定义关键字、界符以及运算符。

效果如下:



有一点不满意的地方是必须输入两次ctrl+z,原来打算的是按下回车就结束输入的

还有一点想法是不输入这些设置,而是读取一个配置文件,识别关键字,界符以及运算符

代码如下:

#include<iostream>

#include<vector>

#include <map>

#include<iterator>

#include<sstream>

#include<string>

#include<fstream>

using namespace std;

vector <string> keywords;

vector <pair<string,string> > symble;

vector <pair<string,string> > bound;

char ch;

int isKeyWord(const string& s,int& n)

{

if(keywords.empty())

{

return 0;

}

for(vector<string>::iterator it = keywords.begin();

it != keywords.end();it++,n++)

{

if(s == *it)

{

return 1;

}

}

return -1;

}

int isSymble(const string& s, string& result)

{

for(vector<pair<string,string> >::iterator it = symble.begin();

it != symble.end();it++)

{

if(s == (*it).first)

{

result = (*it).second;

return 1;

}

}

return 0;

}

int isBound(const string& s, string& result)

{

for(vector<pair<string,string> >::iterator it = bound.begin();

it != bound.end();it++)

{

if(s == (*it).first)

{

result = (*it).second;

return 1;

}

}

return 0;

}

void analyse(FILE *fp)

{

string temp = "";

string str = "";

string result = "";

int id = 0;

while((ch = fgetc(fp)) != EOF)

{

temp = "";

str = ch;

id = 0;

if(ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r')

{

while(ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r')

{

ch = fgetc(fp);

}

fseek(fp,-1L,SEEK_CUR);

}

else if(isalpha(ch))

{

while(isalpha(ch) || isdigit(ch))

{

temp = temp + ch;

ch = fgetc(fp);

}

fseek(fp,-1L,SEEK_CUR);

if(isKeyWord(temp,id) == 1)

{

cout << temp << "\t$关键字 , " << id << endl;

}

else

{

cout << temp << "\t$标识符" << endl;

}

}

else if(isdigit(ch))

{

while(isdigit(ch))

{

temp = temp + ch;

ch = fgetc(fp);

}

fseek(fp,-1L,SEEK_CUR);

cout << temp << "\t$整型" << endl;

}

else if(isSymble(str,result))

{

cout << ch << "\t$" << "运算符" << result << endl;

/*case '+':cout << ch << "\t$ADD" << endl;break;

case '-':cout << ch << "\t$SUBTRACT" << endl;break;

case '*':cout << ch << "\t$MULTIPLY" << endl;break;

case '/' :cout << ch << "\t$DIVIDE" << endl;break;

case '=' :cout << ch << "\t$ASSIGN" << endl;break;

case '(' :cout << ch << "\t$LPAR" << endl;break;

case ')' :cout << ch << "\t$RPAR" << endl;break;

case '[' :cout << ch << "\t$LSB" << endl;break;

case ']' :cout << ch << "\t$RSB" << endl;break;

case ';' :cout << ch << "\t$SEMICOLON" << endl;break;

case '.' :cout << ch << "\t$DOT" << endl;break;

case ',' :cout << ch << "\t$COMMA" << endl;break;

case '{' :cout << ch << "\t$LBRACE" << endl; break;

case '}' :cout << ch << "\t$RBRACE" << endl;break;

default :cout << ch << "\t$UnKnow" << endl;*/

}

else if(isBound(str,result))

{

cout << ch << "\t$" << "界符" << result << endl;

}

else

{

cout << ch << "\t$未知" << endl;

}

}

}

int main()

{

string line, symbelLine,boundLine,word,filename,symbleName,symbleId,boundName,boundId;

cout << "请输入要解析的文件名" << endl;

cin >> filename;

cout << "请输入该编程语言的关键字" << endl;

while(getline(cin,line))

{

istringstream stream(line);

while(stream >> word)

{

keywords.push_back(word);

}

}

cin.clear();

cout << "请输入该编程语言的运算符,格式为 符号名称 符号" << endl;

while(getline(cin,symbelLine))

{

istringstream stream(symbelLine);

while(stream >> symbleName >> symbleId)

{

symble.push_back(pair<string,string>(symbleName,symbleId));

}

}

cin.clear();

cout << "请输入该编程语言的界符,格式为 符号名称 符号" << endl;

while(getline(cin,boundLine))

{

istringstream stream(boundLine);

while(stream >> boundName >> boundId)

{

bound.push_back(pair<string,string>(boundName,boundId));

}

}

FILE *fp;

fp = fopen(filename.c_str(),"r");

if(!fp)

{

cout << "文件操作错误,请检查后重试" << endl;

return -1;

}

analyse(fp);

fclose(fp);

cout << "按任意键退出" << endl;

getchar();

return 0;

}

实现起来很简单,就不注释了。呵呵。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: