您的位置:首页 > 编程语言

给定正规式转化为等价的NFA代码实现

2016-12-29 20:31 344 查看

词法分析程序题

题目

给定正规式,转化为等价的NFA

算法

结构体介绍

state结构体

struct state//定义状态结构体

{

         string input;//输入符号集合

         vector<state*> *next;//输出状态集合,与输入符号集合一一对应

};

input[i]和(*next)[i]一一对应,若next.size()为0则表示到了终点

unit结构体

struct unit//正规文法单元,记录当前单元的开始状态地址和结束状态地址,最后不断壮大的单元即为所需的NFA

{

         char id;//0表示最小单元,()|*表示辅助字母,1表示曾通过()将状态绑定在一起以便更好区分(ab)*和ab*的处理

         state *begin;//开始状态地址

         state *end;//结束状态地址

};

该结构体采用正规文法的思想,使用正规文法到正规式转化的逆思想,将正规式不断单元化,最后再合并。

算法介绍

遍历正规式,对正规式每一个输入进行相应处理。

输入’(‘:构造unit单元,id设为’(‘,起始状态和终止状态都为NULL,压入unit栈

输入’|’: 构造unit单元,id设为’|‘,起始状态和终止状态都为NULL,压入unit栈

输入’)’:判断’()’内是否含有’|’,若有,则分’|’右边和左边,右边按出栈顺序依次联结成一个unit单元,左边也按出栈顺序依次联结成一个unit单元,然后构造空串,如下图

图1



若无’|’,则直接按出栈顺序依次联结’()’内单元

输入’*’:判断栈顶单元标识,若id==’1’,表示该单元内内容通过’()’进行过绑定,直接通过添加空串状态指向begin状态和begin状态指向空串,再次联结成一个新单元

若id==’0’,表示该单元内内容未通过’()’进行过绑定,直接通过添加空串状态指向end状态和end状态指向空串

输入其他字符:默认为输入字符,判断该字符是否为首字符或右边为’(‘或’|’,若是则创建新状态并构建新单元压入栈,否则创建新状态联结进栈顶单元

输入完毕:联结栈内所有单元,输出该单元

源程序

#include
#include
#include"d_stack.h"//栈类头文件
#include
using namespace std;
struct state//定义状态结构体
{
string input;//输入符号集合
vector *next;//输出状态集合,与输入符号集合一一对应
};
struct unit//正规文法单元,记录当前单元的开始状态地址和结束状态地址,最后不断壮大的单元即为所需的NFA
{
char id;//0表示最小单元,()|*表示辅助字母,1表示曾通过()将状态绑定在一起以便更好区分(ab)*和ab*的处理
state* begin;//开始状态地址
state* end;//结束状态地址
};
class NFA
{
public:
unit* REtoNFA(string RE)//regular expression to NFA
{
miniStack *s=new miniStack();
for(int i=0;iid='(';//标识为'(',压入栈
u1->begin=NULL;
u1->end=NULL;
s->push(u1);
break;
}
case')':
{
unit* u1;
unit* u2;
unit* u3;
u1=NULL;
u3=NULL;
bool exist=false;
while(s->top()->id!='(')//联结'|"右边的单元
{
if(s->top()->id=='0'&&u1==NULL||s->top()->id=='1'&&u1==NULL)//当u1未初始化时
{
u1=s->top();
s->pop();
}
else
if(s->top()->id!='|')//未遇到'|'前联结所有单元
{
unit* tempu1=s->top();
tempu1->end->next->push_back(u1->begin);
tempu1->end=u1->end;
u1=tempu1;
s->pop();
}
else//否则设置exist为true,表示'|'辅助字母存在
{
u2=s->top();
s->pop();
exist=true;
break;
}

}
while(exist&&s->top()->id!='(')//'|'存在,联结'|"左边的单元
{
if(u3==NULL)
{
u3=s->top();

b952
s->pop();
}
else
{
unit* tempu1=s->top();
tempu1->end->next->push_back(u3->begin);
tempu1->end=u3->end;
u3=tempu1;
s->pop();
}
}
s->pop();
if(u2!=NULL)//当'|'存在时,需要构造空串
{
state* s1=new state();
s1->input="@@";//s1->input="εε";
s1->next=new vector();
s1->next->push_back(u1->begin);
s1->next->push_back(u3->begin);
state* s2=new state();
s2->input="@";//s2->input="ε";
s2->next=new vector();
u1->end->next->push_back(s2);
u3->end->next->push_back(s2);
unit* u4=new unit();
u4->id='1';
u4->begin=s1;
u4->end=s2;
s->push(u4);
}
else//否则直接压入栈
{
u1->id='1';
s->push(u1);
}
break;
}
case'|':
{
unit* u1=new unit();
u1->id='|';
u1->begin=NULL;
u1->end=NULL;
s->push(u1);
break;
}
case'*':
{
unit* u1=s->top();
if(u1->id=='1')//id=='1'表示*前的单元使用过()绑定例如(ab)*,因此需要以整个括号内的内容为最小单元,并构造空串
{
state* s1=new state();
s1->input="@@";//s1->input="εε";
s1->next=new vector();
u1->end->next->push_back(s1);
s1->next->push_back(u1->begin);
state* s2=new state();
s2->input="@";//s2->input="ε";
s2->next=new vector();
s1->next->push_back(s2);
u1->begin->input+="@";//u1->begin->input+="ε";
u1->begin->next->push_back(s2);
u1->end=s2;
}
else//id=='0'表示*前的单元未使用过()绑定例如ab*,因此只需以前一个状态为最小单元,并构造空串
{
state* s1=new state();
s1->input="@@";//s1->input="εε";
s1->next=new vector();
u1->end->next->push_back(s1);
s1->next->push_back(u1->end);
state* s2=new state();
s2->input="@";//s2->input="ε";
s2->next=new vector();
s1->next->push_back(s2);
u1->end->input+="@";//u1->end->input+="ε";
u1->end->next->push_back(s2);
u1->end=s2;
}
break;
}
default:
{
if(s->empty()||s->top()->id=='('||s->top()->id=='|')//当前栈内无单元或该输入符号处在'('右边或'|'右边,需要新构造一个最小单元
{
unit* u1=new unit();
u1->id='0';
state* s1=new state();
s1->input=RE[i];
s1->next=new vector();
u1->begin=s1;
u1->end=s1;
s->push(u1);
}
else//否则新构造一个新状态并与栈顶最小单元联结
{
unit* u2=s->top();
state* s1=new state();
s1->input=RE[i];
s1->next=new vector();
u2->end->next->push_back(s1);
u2->end=s1;
}
break;
}
}
}
if(s->size()==1)//如果栈内最小单元唯一
return s->top();
else//否则联结所有最小单元
{
int n=s->size();
unit* u1=NULL;
while(n--)
{
if(u1==NULL)
{
u1=s->top();
s->pop();
}
else
{
unit* u2=s->top();
s->pop();
u2->end->next->push_back(u1->begin);
u2->end=u1->end;
u1=u2;
}
}
return u1;
}
}
void display(unit* u)//显示NFA的一条路,*无限循环(最好不测试),(a|b)模式选择b路
{
vector *showedState=new vector();
state* temp=u->begin;
while(temp->next->size()!=0)
{
cout<input[0];
temp=(*temp->next)[0];
}
cout<input[0];
cout<

#ifndef VECTOR_BASED_STACK_CLASS
#define VECTOR_BASED_STACK_CLASS

#ifdef _MSC_VER
// disable warning messages that identifier was truncated
// to 'number' characters in the debug information
#pragma warning(disable:4786)
#endif	// _MSC_VER

#include 			// vector class used by object composition

#include "d_except.h"	// for underflowError exception

using namespace std;

template
class miniStack
{
public:
miniStack();
// constructor. create an empty stack

void push(const T& item);
// push (insert) item onto the stack.
// Postcondition: the stack has a new topmost element and
// the stack size increases by 1

void pop();
// remove the item from the top of the stack.
// Precondition: the stack is not empty.
// if the stack is empty, the function throws
// the underflowError exception

T& top();
// return a reference to the element on the top
// of the stack.
// Precondition: the stack is not empty.
// if the stack is empty, the function throws
// the underflowError exception
const T& top() const;
// constant version of top()

bool empty() const;
// determine whether the stack is empty

int size() const;
// return the number of elements in the stack

private:
vector stackVector;
// a vector object maintains the stack items and size
};

// the constructor has nothing to do. the default
// constructor for the vector class initializes
// stackVector to be empty
template
miniStack::miniStack()
{}

// push item on the stack by inserting it at
// the rear of the vector
template
void miniStack::push(const T& item)
{
stackVector.push_back(item);
}

// pop the stack by removing the item at
// the rear of the vector
template
void miniStack::pop()
{
// check for an empty stack
if (empty())
throw underflowError("miniStack pop(): stack empty");

// pop the stack
stackVector.pop_back();
}

// the top of the stack is at the rear of the vector
template
T& miniStack::top()
{
// check for an empty stack
if (empty())
throw underflowError("miniStack top(): stack empty");

// return the element at the rear of the vector
return stackVector.back();
}

// constant version of top()
template
const T& miniStack::top() const
{
// check for an empty stack
if (empty())
throw underflowError("miniStack top(): stack empty");

// return the element at the rear of the vector
return stackVector.back();
}

template
bool miniStack::empty() const
{
return stackVector.size() == 0;
}

template
int miniStack::size() const
{
return stackVector.size();
}

#endif	// VECTOR_BASED_STACK_CLASS

#ifndef EXCEPTION_CLASSES
#define EXCEPTION_CLASSES

#include
#include

using namespace std;

class baseException
{
public:
baseException(const string& str = ""):
msgString(str)
{
if (msgString == "")
msgString = "Unspecified exception";
}

string what() const
{
return msgString;
}

// protected allows a derived class to access msgString.
// chapter 13 discusses protected in detail
protected:
string msgString;
};

// failure to allocate memory (new() returns NULL)
class memoryAllocationError: public baseException
{
public:
memoryAllocationError(const string& msg = ""):
baseException(msg)
{}
};

// function argument out of proper range
class rangeError: public baseException
{
public:
rangeError(const string& msg = ""):
baseException(msg)
{}
};

// index out of range
class indexRangeError: public baseException
{
public:
indexRangeError(const string& msg, int i, int size):
baseException()
{
char indexString[80];
ostrstream indexErr(indexString, 80);

indexErr << msg << "  index " << i << "  size = " << size << ends;
// indexRangeError can modify msgString, since it is in
// the protected section of baseException
msgString = indexString;
}
};

// attempt to erase from an empty container
class underflowError: public baseException
{
public:
underflowError(const string& msg = ""):
baseException(msg)
{}
};

// attempt to insert into a full container
class overflowError: public baseException
{
public:
overflowError(const string& msg = ""):
baseException(msg)
{}
};

// error in expression evaluation
class expressionError: public baseException
{
public:
expressionError(const string& msg = ""):
baseException(msg)
{}
};

// bad object reference
class referenceError: public baseException
{
public:
referenceError(const string& msg = ""):
baseException(msg)
{}
};

// feature not implemented
class notImplementedError: public baseException
{
public:
notImplementedError(const string& msg = ""):
baseException(msg)
{}
};

// date errors
class dateError: public baseException
{
public:
dateError(const string& first, int v, const string& last):
baseException()
{
char dateStr[80];
ostrstream dateErr(dateStr, 80);

dateErr << first << ' ' << v << ' ' << last << ends;
// dateError can modify msgString, since it is in
// the protected section of baseException
msgString = dateStr;
}
};

// error in graph class
class graphError: public baseException
{
public:
graphError(const string& msg = ""):
baseException(msg)
{}
};

// file open error
class fileOpenError: public baseException
{
public:
fileOpenError(const string& fname):
baseException()
{
char errorStr[80];
ostrstream fileErr(errorStr, 80);

fileErr << "Cannot open \"" << fname << "\"" << ends;
// fileOpenError can modify msgString, since it is in
// the protected section of baseException
msgString = errorStr;
}
};

// error in graph class
class fileError: public baseException
{
public:
fileError(const string& msg = ""):
baseException(msg)
{}
};

#endif	// EXCEPTION_CLASSES

#include"NFA.h"
using namespace std;
void main()
{
NFA test;
unit* result=test.REtoNFA("1(0|1(a|b))");
test.display(result);
}


测试结果

输入正则式:1(0|1(a|b))

输出一条路径:@表示空串,’|’运算在display()函数中默认选右边,’*’运算将导致无限循环



添加’*’测试:

输入正则式: 1(0|1(a|b))*

输出一条路径:



参考资料

编译原理(第2版)
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: