您的位置:首页 > 编程语言 > C语言/C++

【C++】C++实现LL(1)语法分析

2009-12-11 13:53 411 查看
编译原理:语法分析

使用方法:LL(1) 分析方法

语法分析需求文件:

1. 预测分析表

2. 消除了左递归以及提取了左公因子的文法

3. 原文件。

#include <stdlib.h>
#include <stdio.h>
#include <string>
#include<iostream>
#include <fstream>
#include<stack>
#include <sstream>
using namespace std;
// 定义文法结构体
struct  type
{
string leftContent;
string rightContent;
int length;
};
//定义此法分析返回类型
struct lexicalType
{
string strToken;
int pos;
};
stack<string> stkGrammer;
type	grammerlist[17];
const int x = 10;
const int y = 13;
string terminalSymbol[y];
string nonterminalSymbol[x];
int analyzeTable[x][y];
// 从文档中读取文法
bool readGrammer(fstream* f)
{
string line;
getline(*f, line);
int pos = 0;
do
{
istringstream is(line);
is >> grammerlist[pos].leftContent>> grammerlist[pos].length;
is >> grammerlist[pos].rightContent;
while(!is.eof())
{
string temp;
is >> temp;
grammerlist[pos].rightContent.append(" " + temp);
}
cout << grammerlist[pos].leftContent << " "<< grammerlist[pos].length << " " << grammerlist[pos].rightContent << endl;
++pos;
getline(*f, line);
}while(!f->eof());
return true;
}
// 从相应的文档中读入文法分析表
bool readGrammerAnalyze(fstream* f)
{
// 读取第1行终结符
string line;
getline(*f, line);
istringstream is(line);
int pos = 0;
while(!is.eof())
{
is >> terminalSymbol[pos];
pos++;
};
//for (int i = 0; i < 13; i++)
//	cout << ::terminalSymbol[i] << endl;
// 读取第2行非终结符
getline(*f, line);
istringstream iss(line);
pos = 0;
while(!iss.eof())
{
iss >> nonterminalSymbol[pos];
pos++;
};
int i = 0;
while(!f->eof() && i < x)
{
int j = 0;
getline(*f, line);
istringstream iss(line);
for(; j < y; j++)
{
iss >> ::analyzeTable[i][j];
}
++i;
}
return true;
}
//------------------------------------------------------
const int KEYLENGTH = 32;
char enter[] = "/r/n";
char* key[KEYLENGTH] = {
"#", "begin", "if", "then", "while", "do", "end","","","", // 0-9
"letter", "digit" ,"", "+",	"-", "*", "/", ":", ":=", "", "<", // 10-20
"<>", "<=", ">", ">=", "=", ";", "(", ")", "string", "[",			// 21-30
"]"																				// 31
};

int getSymbolPosOfKey(string t)
{
int pos = -1;
for (int i = 0; i < KEYLENGTH; i++)
{
if (!_stricmp(key[i], t.c_str()))
{
pos = i;
break;
}
}
return pos;
}
int getTerminalSymbolPos(string t)
{
int pos = -1;
for(int i = 0; i < y; i++)
{
string s = terminalSymbol[i];
if (s == t)
{
pos = i;
break;
}
}
return pos;
}
int getNonTerminalSymbolPos(string nt)
{
int pos = -1;
for(int i = 0; i < x; i++)
{
if (nonterminalSymbol[i] == nt)
{
pos = i;
break;
}
}
return pos;
}
// 读入ch
char GetChar(ifstream& infileStream)
{
char cRet;
infileStream.get(cRet);
return cRet;
}
// 读入空格
char GetBC(ifstream& infileStream)
{
char cRet;
infileStream.get(cRet);
while (cRet == ' ')
infileStream.get(cRet);
return cRet;
}
// 连接单词符号
void Concat(char *str, char c)
{
size_t n = strlen(str);
str[n++] = c;
str
= '/0';
}
// 判断是否为保留字
int Reserve(const char* str)
{
int bRet = -1;
for (int i = 0; i < KEYLENGTH; i++)
{
if (_stricmp(key[i], str) == 0)
{
bRet = i;
break;
}
}
return bRet;
}
// 回调字符
char Retract(ifstream& infileStream)
{
infileStream.seekg(-1, ios::cur);
return '/0';
}
lexicalType lexical(ifstream& infileStream)
{
char ch;
char strToken[1024] = "";
ch = GetChar(infileStream);
int pos = -1;
// 判断标识符的情况
if (isalpha(ch))
{
while (isalpha(ch) || isdigit(ch) || ch == '_')
{
Concat(strToken, ch);
ch = GetChar(infileStream);
}
ch = Retract(infileStream);
if ((pos = Reserve(strToken)) != -1)
{
cout << '(' << pos << ", " << strToken << ')' << enter;
lexicalType a;
a.strToken.append(strToken, strlen(strToken));
a.pos = pos;
return a;
}
else
{
cout << '(' << 10 << ", /'" << strToken << "/')" << enter;
lexicalType a;
a.strToken.append(strToken, strlen(strToken));
a.pos = 10;
return a;
}
}
// 判断数值的情况
else if (isdigit(ch))
{
while (isdigit(ch))
{
Concat(strToken, ch);
ch = GetChar(infileStream);
}
Retract(infileStream);
cout << '(' << 11 << ", /'" << strToken << "/')" << enter;
lexicalType a;
a.strToken.append(strToken, strlen(strToken));
a.pos = 11;
return a;
}
// 判断字符串的情况
else if (ch == '/'')
{
Concat(strToken, ch);
ch = GetChar(infileStream);
while (ch != '/'')
{
Concat(strToken, ch);
ch = GetChar(infileStream);
}
if (ch != '/'')
cerr << "String is too long - more than 1024 bytes!" << endl;
else
{
Concat(strToken, ch);
cout << '(' << 29 << ", /'" << strToken << "/')" << enter;
lexicalType a;
a.strToken.append(strToken, strlen(strToken));
a.pos = 29;
return a;
}
}
// 判断所有没有歧义的单目运算符
else if (ch == '+')
{
cout << '(' << 13 << ", /'" << '+' << "/')" << enter;
lexicalType a;
a.strToken.append(1, '+');
a.pos = 13;
return a;
}
else if (ch == '-')
{
cout << '(' << 14 << ", /'" << '-' << "/')" << enter;
lexicalType a;
a.strToken.append(1, '-');
a.pos = 14;
return a;
}
else if (ch == '*')
{
cout << '(' << 15 << ", /'" << '*' << "/')" << enter;
lexicalType a;
a.strToken.append(1, '*');
a.pos = 15;
return a;
}
else if (ch == '/')
{
cout << '(' << 16 << ", /'" << '/' << "/')" << enter;
lexicalType a;
a.strToken.append(1, '/');
a.pos = 16;
return a;
}
else if (ch == '=')
{
cout << '(' << 25 << ", /'" << '=' << "/')" << enter;
lexicalType a;
a.strToken.append(1, '=');
a.pos = 25;
return a;
}
else if (ch == '[')
{
cout << '(' << 30 << ", /'" << '[' << "/')" << enter;
lexicalType a;
a.strToken.append(1, '[');
a.pos = 30;
return a;
}
else if (ch == ']')
{
cout << '(' << 31 << ", /'" << ']' << "/')" << enter;
lexicalType a;
a.strToken.append(1, ']');
a.pos = 31;
return a;
}
else if (ch == ',')
{
cout << '(' << 32 << ", /'" << ',' << "/')" << enter;
lexicalType a;
a.strToken.append(1, ',');
a.pos = 32;
return a;
}
else if (ch == ';')
{
cout << '(' << 26 << ", /'" << ';' << "/')" << enter;
lexicalType a;
a.strToken.append(1, ';');
a.pos = 26;
return a;
}
else if (ch == '(')
{
cout << '(' << 27 << ", /'" << '(' << "/')" << enter;
lexicalType a;
a.strToken.append(1, '(');
a.pos = 27;
return a;
}
else if (ch == ')')
{
cout << '(' << 28 << ", /'" << ')' << "/')" << enter;
lexicalType a;
a.strToken.append(1, ')');
a.pos = 28;
return a;
}
// 判断<、<>和<=
else if (ch == '<')
{
ch = GetChar(infileStream);
if (ch == '>')
{
cout << '(' << 21 << ", /'" << "<>"<< "/')" << enter;
lexicalType a;
a.strToken = "<>";
a.pos = 21;
return a;
}
else if (ch == '=')
{
cout << '(' << 22 << ", /'" << '<=' << "/')" << enter;
lexicalType a;
a.strToken = "<=";
a.pos = 22;
return a;
}
else
{
cout << '(' << 20 << ", /'" << '<' << "/')" << enter;
Retract(infileStream);
lexicalType a;
a.strToken.append(1, '<');
a.pos = 20;
return a;

}
}
// 判断>和>=
else if (ch == '>')
{
ch = GetChar(infileStream);
if (ch == '=')
{
cout << '(' << 24 << ", /'" << '>=' << "/')" << enter;
lexicalType a;
a.strToken = ">=";
a.pos = 24;
return a;
}
else
{
cout << '(' << 23 << ", /'" << '>' << "/')" << enter;
Retract(infileStream);
lexicalType a;
a.strToken.append(1, '>');
a.pos = 23;
return a;
}
}
// 判断:和:=
else if (ch == ':')
{
ch = GetChar(infileStream);
if (ch == '=')
{
cout << '(' << 18 << ", " << ":=" << ")" << enter;
lexicalType a;
a.strToken = ":=";
a.pos = 18;
return a;
}
else
{
cout << '(' << 17 << ", /'" << ':' << "/')" << enter;
Retract(infileStream);
lexicalType a;
a.strToken.append(1, ':');
a.pos = 17;
return a;
}
}
else if (ch == '#')
{
cout << '(' << 0 << ", /'" << '#' << "/')" << enter;
lexicalType a;
a.strToken.append(1, '#');
a.pos = 0;
return a;
}
else
{
lexicalType a = lexical(infileStream);
return a;
}
}
void main()
{
stkGrammer.push("#");
stkGrammer.push("Proc");
string fileName;
cout << "Please input the Grammer file name (grammer.txt): ";
cin >> fileName;
fstream G_fileStream;
G_fileStream.open(&fileName[0] , ios_base::in);
if (G_fileStream.fail())
{
cout << "open file error/n" ;
return;
}
readGrammer(&G_fileStream);
G_fileStream.close();

// 读取语法分析表
cout << "Loading analyze.txt..." << endl;
fileName = "analyze.txt";
fstream G_A_fileStream;
G_A_fileStream.open(fileName.c_str(), ios_base::in);
if(G_A_fileStream.eof())
{
cout << "open the anylyze file error!/n";
return;
}
readGrammerAnalyze(&G_A_fileStream);
G_A_fileStream.close();
cout << "load anylyze.txt done/n";
// 读源文件
cout << "Please input Source file name(sentence.txt) : ";
cin >> fileName;
ifstream infileStream(fileName.c_str());
if(infileStream.eof())
{
cout << "open source file error/n";
return;
}
lexicalType lexicaltype;
while(!infileStream.eof())
{
lexicaltype = lexical(infileStream);
while(1)
{
int i = getSymbolPosOfKey(stkGrammer.top());
if (lexicaltype.pos == 0)
{
cout << "Okay, the sentence is good checked by LL(1) syntax" << endl;
exit(0);
}
if ( i != -1 )
{
if (i == lexicaltype.pos)
{
// 终结符向匹配,弹出stack的头元素
stkGrammer.pop();
break;
}
else
{
cout << "Error when analyze terminal symbol :" << lexicaltype.strToken << endl;
cout << "The top of the stack is: " << stkGrammer.top() << endl;
return;
}
}
else
{
int pos_x = getNonTerminalSymbolPos(stkGrammer.top());
if (pos_x == -1)
{
cout << "Find a undefine nonTerminalSymbol : " << stkGrammer.top() << endl;
return;
}
string sTer= key[lexicaltype.pos];
int pos_y = getTerminalSymbolPos(sTer);
if (pos_y == -1)
{
cout << "Find a undefine TerminalSymbol : " << lexicaltype.strToken << endl;
return;
}
int pos_result = analyzeTable[pos_x][pos_y];

if (pos_result == -1)
{
cout << "Error when analyze /n " << "Stack: " << stkGrammer.top() << "  Sentence: " << lexicaltype.strToken << endl;
return;
}
type syntax = grammerlist[pos_result];
// 讲根据分析表获取得到的文法反置放进stkGrammer里面
if (syntax.rightContent != "null"){
stkGrammer.pop();
stack<string> s;
istringstream is(syntax.rightContent);
for (int i = 0; i < syntax.length; i++)
{
string st;
is >> st;
s.push(st);
}
while(s.size() != 0)
{
stkGrammer.push(s.top());
s.pop();
}
}
else{
stkGrammer.pop();
}

}
}
}

infileStream.close();
}


// grammer.txt

Proc 3 begin Stmt end

Stmt 2 Sent Stmt'

Stmt' 3 ; Sent Stmt'

Stmt' 1 null

Sent 1 Eval

Eval 3 letter := Expr

Expr 2 Item Expr'

Expr' 3 + Item Expr'

Expr' 3 - Item Expr'

Expr' 1 null

Item 2 Fact Item'

Item' 3 * Fact Item'

Item' 3 / Fact Item'

Item' 1 null

Fact 1 letter

Fact 3 ( Expr )

Fact 1 digit

analyze.txt

手动搞定分析表,如果没有内容的地方写上-1, 有内容的地方就填上对应着的语法下标。

(语法在grammer.txt 里面)

//analyze.txt

begin end letter digit ( ) + - * / := ; #

Proc Stmt Stmt' Sent Eval Expr Expr' Item Item' Fact

0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1

-1 -1 1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1

-1 3 -1 -1 -1 -1 -1 -1 -1 -1 -1 2 -1

-1 -1 4 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1

-1 -1 5 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1

-1 -1 6 6 6 -1 -1 -1 -1 -1 -1 -1 -1

-1 9 -1 -1 -1 9 7 8 -1 -1 -1 9 -1

-1 -1 10 10 10 -1 -1 -1 -1 -1 -1 -1 -1

-1 13 -1 -1 -1 13 13 13 11 12 -1 13 -1

-1 -1 14 16 15 -1 -1 -1 -1 -1 -1 -1 -1

// sentence.txt

begin x:=9;y:=2*x+1/3+1-1end#
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: