您的位置:首页 > 其它

词法分析器设计

2016-02-25 07:50 295 查看
词法分析是编制一个读单词的过程,从输入的源程序中,识别出各个具有独立意义的单词,即基本保留字、标识符、常数、运算符、分隔符五大类。程序语言的单词符号一般分为五种:关键字(保留字/基本字)if、while、begin…;标识符:常量名、变量名…;常数:34、56.78、true、‘a’、…;运算符:+、-、*、/、〈、and、or、….、;界限符:, ; (  )  {    }   /*…。

方法:

词法分析器的设计方法有如下四个步骤:

1.写出该语言的词法规则。

2.把词法规则转换为相应的状态转换图。

3.把各转换图的初态连在一起,构成识别该语言的自动机。

4.设计扫描器;把扫描器作为语法分析的一个过程,当语法分析需要一个单词时,就调用扫描器。扫描器从初态出发,当识别一个单词后便进入终态,送出二元式。

针对该程序设计的DFA 图大致如下:




核心代码(这段是分析代码。其他的定义代码,识别字母代码,识别数字代码,等等就不一一列举了,源程序中有):



/**
* 初始化并读取源代码文件
* 扫描程序开始执行,直到读取文件结束符EOF
* @throws Exception
*/
private void scanning(String originalFile) throws Exception {
this.sourceFile = new BufferedReader(new FileReader(originalFile));

this.initial();
while(!isEOF) {
getToken();
}
System.out.println("========================> end scanning ...");
}

/**
* 获取下一个字符
* @return
* @throws Exception
*/
private char getNextChar() throws Exception {
char nextChar = '\0';

if(!(charPos < bufSize)) {
if((eachLine = sourceFile.readLine()) != null) {
lineNum++;
System.out.println(lineNum + ": " + eachLine);
lineBuf = eachLine.toCharArray();
bufSize = eachLine.length();
charPos = 0;
nextChar = lineBuf[charPos++];
} else {
isEOF = true;
nextChar = '\0';
}
} else {
nextChar = lineBuf[charPos++];
}
return nextChar;
}

/**
* 【按步长(step)】取消获取下一个字符
*/
private void unGetNextChar(int step) {
if(!isEOF) {
charPos -= step;
}
}

/**
* 获取一个Token
* @return
* @throws Exception
*/
private String getToken() throws Exception {
String tokenStr = "";
String currentToken = "";
int currentState = Start;
boolean isSave;

// 不同时为EOF和Done状态
while(currentState != Done && !isEOF) {
char c = getNextChar();
isSave = true;

switch(currentState) {
case Start:
if(isDigit(c)) {
currentState = Num;
} else if(isLetter(c) || c == '.') { //点号是为了处理头文件iostream.h的格式
currentState = ID;
} else if(c == ' ' || c == '\t' || c == '\n') {
isSave = false;
} else if(c == '!') {
currentState = NE;
} else if(c == '=') {
currentState = EQ;
} else if(c == '<') {
currentState = NM;
} else if(c == '>') {
currentState = NL;
} else if(c == '/') {
currentState = Coms;
isSave = false;
} else if(c == '"') {
currentState = Str;
} else {
currentState = Done;
//                      if(isSingle(c)) {
//                          currentToken = "" + c;
//                          currentState = Done;
//                          isSave = false;
//                      }
}
break;
case Num:
if(!isDigit(c)) {
currentState = Done;
unGetNextChar(1);
isSave = false;
}
break;
case ID:
if(!isLetter(c) && !isDigit(c)) {
currentState = Done;
unGetNextChar(1);
isSave = false;
}
break;
case NE:
if(c != '=') {
currentState = Special;
unGetNextChar(2);
isSave = false;
} else {
currentState = Done;
}
break;
case NM:
if(c != '=' && c != '<') {
currentState = Special;
unGetNextChar(2);
isSave = false;
} else {
currentState = Done;
}
break;
case NL:
if(c != '=' && c != '>') {
currentState = Special;
unGetNextChar(2);
isSave = false;
} else {
currentState = Done;
}
break;
case EQ:
if(c != '=') {
currentState = Special;
unGetNextChar(2);
isSave = false;
} else {
currentState = Done;
}
break;
case Str:
if(c == '"') {
currentState = Done;
}
break;
case Coms:
isSave = false;
if(c == '/') {
currentState = LineCom;
} else if(c == '*') {
currentState = MulCom1;
} else {
currentState = Special;
unGetNextChar(1);
}
break;
case LineCom:
isSave = false;
if(c == '\n') {
currentState = Done;
}
break;
case MulCom2:
isSave = false;
if(c == '*') {
currentState = MulCom2;
} else if(c == '/') {
currentState = Done;
} else {
currentState = MulCom1;
}
break;
case Special:

983c
if(c == '!' || c == '=' || c == '<' || c == '>') {
//                  if(isSpecialSingle(c)) {
currentToken = "" + c;
currentState = Done;
isSave = false;
} else {
currentToken = "Error";
currentState = Done;
}
break;
default:
System.out.println(lineNum + " >> Scanner Bug : state = " + currentState);
currentState = Done;
currentToken = "Error";
break;
}
if(isSave) {
tokenStr += c;
}
if(currentState == Done) {
currentToken = tokenStr;
printToken(currentToken);
}
}
return currentToken;
}


运行结果如下:

待翻译代码:

#include"iostream.h"

main()

{

int i;

cin>>i;

i=i+1;

if(i>=3) cout<<"chenggong";

elsecout<<"shibai";

}

显示结果:



需要源码的朋友
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: