您的位置:首页 > 其它

最初步的正则表达式引擎:将显示的连接符改为了非显示的连接符

2013-06-22 12:52 225 查看
由于当前的连接符变为非显示,所以在有些时候需要考虑当前输入指针所指的位置是否缺少连接符,如果缺少,则将连接符入栈。

#include <stdio.h>
#include <malloc.h>
#include <string.h>
//这里默认是没有显示的连接运算符,运算符的优先级为括号、闭包*、连接.、并|
//在括号后及闭包后可能缺少显示的连接符,因此需要考虑添加连接符,而由于并操作符的优先级比连接符低
//所以就不需要在处理并操作符的时候去考虑是否缺少显示的连接符了
int token[100];
int token_pointer;
char reg_operator[100];
int reg_operator_pointer;
int name_number;
int input_pointer;
char reg_input[20];
int is_operator(char for_in)//判断输入字符是否是操作符
{
switch(for_in)
{
case'(':
case')':
case'*':
case'|':
return 1;
default: return 0;
}
}
void tackle_or()//处理并操作符
{
if(reg_operator_pointer!=0)//如果操作符栈中已经有操作符了
{
if(reg_operator[reg_operator_pointer-1]!='(')//括号另外说
{
name_number++;
if(reg_operator[reg_operator_pointer-1]=='.')
//如果前面的优先级比当前的高,则处理前面的优先级
{
printf("name%d is concat of name%d and name%d\n",name_number,token[token_pointer-2],token[token_pointer-1]);

}
else
//这里处理的是相同优先级的情况,其实这里可以与前面的合并的,只不过打印信息不同
{
printf("name%d is  name%d or name%d\n",name_number,token[token_pointer-2],token[token_pointer-1]);
}
token[token_pointer-2]=name_number;
token_pointer--;
reg_operator[reg_operator_pointer-1]='|';
input_pointer++;
}
else//对于括号,则直接入栈
{
reg_operator[reg_operator_pointer++]='|';
input_pointer++;
}
}
else//对于空操作符栈,也是直接入栈
{
reg_operator[reg_operator_pointer++]='|';
input_pointer++;
}
}
void tackle_cat()//处理连接符,事实上跟前面的or操作符讨论的差不多,差异就在优先级那
{
if(reg_operator_pointer!=0)//如果操作符栈不为空
{
if(reg_operator[reg_operator_pointer-1]=='(')//如果前面有括号,则直接入栈
{
reg_operator[reg_operator_pointer++]='.';
}
else//对于前面不是括号的情况下
{
if(reg_operator[reg_operator_pointer-1]=='.')//优先级相同则输出前面的那个
{
name_number++;
printf("name%d is the concat of name%d and name%d\n",name_number,token[token_pointer-2],token[token_pointer-1]);
token[token_pointer-1]=0;
token[token_pointer-2]=name_number;
token_pointer--;
}
else//否则的话,前面的优先级比当前优先级低,操作符入栈
{
reg_operator[reg_operator_pointer++]='.';
}
}
}
else//如果操作符栈为空,则入栈
{
reg_operator[reg_operator_pointer++]='.';
}
}
void tackle_parenthesis(void)//处理闭括号模式,这里有点复杂
{
if(reg_operator[reg_operator_pointer-1]=='(')//如果前面那个操作符为开括号,则匹配输出
{
name_number++;
printf("name%d is (name%d)\n",name_number,token[token_pointer-1]);
token[token_pointer-1]=name_number;
input_pointer++;
reg_operator[--reg_operator_pointer]='\0';
//这时候需要考虑后面的是否少了显示的连接符,如果判断缺少连接符,则需要加上去
if(!is_operator(reg_input[input_pointer]))
{
if(reg_input[input_pointer]!='\0')
{
tackle_cat();
}
}
else
{
if(reg_input[input_pointer]=='(')
{
tackle_cat();
}
}
}
else//如果闭括号前面还有运算符,那么根据他们的优先级输出,这个时候输入指针是不变的,注意
{
name_number++;
if(reg_operator[reg_operator_pointer-1]=='.')
{
printf("name%d is the concat of name%d and name%d\n",name_number,token[token_pointer-2],token[token_pointer-1]);
}
else
{
printf("name%d is  name%d or name%d\n",name_number,token[token_pointer-2],token[token_pointer-1]);
}
token[token_pointer-1]=0;
token[token_pointer-2]=name_number;
token_pointer--;
reg_operator_pointer--;
}
}
int main(void)
{
reg_operator_pointer=name_number=token_pointer=0;
for(input_pointer=0;input_pointer<100;input_pointer++)//初始化栈
{
reg_operator[input_pointer]='\0';
token[input_pointer]=0;
}
input_pointer=0;
printf("please  type in you regex short phrase\n");
scanf("%s",reg_input);
while(reg_input[input_pointer]!='\0')
{
if(!is_operator(*(reg_input+input_pointer)))//对于操作符和非操作符分开讨论
{
name_number++;
token[token_pointer++]=name_number;
printf("name%d is %c\n",name_number,*(reg_input+input_pointer));
input_pointer++;
//非操作符直接进字符栈,这个时候需要考虑后面是否缺少显示的连接符
if(!is_operator(reg_input[input_pointer]))
{
if(reg_input[input_pointer]!='\0')
{
tackle_cat();
}
}
else
{
if(reg_input[input_pointer]=='(')
{
tackle_cat();
}
}
}
else//通过多路选择来处理操作符的情况
{
switch(reg_input[input_pointer])
{
case '('://开括号直接入栈
reg_operator[reg_operator_pointer++]='(';
input_pointer++;
break;
case ')'://闭括号专门处理
tackle_parenthesis();
break;
case '*'://由于*运算符的优先级第二高,只比括号低,所以可以直接输出
name_number++;
printf("name%d is multiple of name%d\n",name_number,token[token_pointer-1]);
token[token_pointer-1]=name_number;
input_pointer++;
//这个时候仍然需要考虑后面缺少显示连接符的情况
if(!is_operator(reg_input[input_pointer])&®_input[input_pointer]!='\0')
{
tackle_cat();
}
else
{
if(reg_input[input_pointer]=='(')
{
tackle_cat();
}
}
break;
case '|'://对于并操作符,调用函数处理
tackle_or();
break;
default: break;
}
}
}
while(reg_operator_pointer>=1)//如果全部的输入都弄完了,可是 操作符栈中还有数据,则输出
{
name_number++;
if(reg_operator[reg_operator_pointer-1]=='.')
{
printf("name%d is concat of name%d and name%d\n",name_number,token[token_pointer-2],token[token_pointer-1]);
}
else
{
printf("name%d is name%d or name%d\n",name_number,token[token_pointer-2],token[token_pointer-1]);
}
token[token_pointer-2]=name_number;
token_pointer--;
reg_operator_pointer--;
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: