您的位置:首页 > 其它

编译原理-词法分析器

2016-05-31 22:33 357 查看
编译原理的词法分析器是我们编译原理课程的实验内容,语法分析器和语义分析等做完了再贴出来。

程序用c++写的,比较粗糙,报错部分还需要改善,欢迎大家批评指正,有疑问的请留言,大家一起进步!

头文件

//
#ifndef morpholopy_h
#define morpholopy_h

//NUM part
#define STATE_DONE 0
#define STATE_ERROR -1
#define STATE_BEGIN 12
#define STATE_INTEGER 13
#define STATE_DECIMAL_BEGIN 14
#define STATE_DECIMAL 15
#define STATE_E 16
#define STATE_SYMBOL 17
#define STATE_POWER 18
#define STATE_CHECK 19

#endif /* morpholopy_h */


wordAnalyze.cpp

#pragma warning(disable : 4996)
#include <iostream>
#include <stdio.h>
#include <ctype.h>
#include <iostream>
#include <fstream>
#include <stdlib.h>
#include "morpholopy.h"
using namespace std;
static int wcount = 0;

struct Word {
char type[50];
char value[50];
};

char* checkRelops(char* ch, Word result[]);
char* checkID(char* ch, Word result[]);
char* checkNum(char* p, Word words[]);
char* checkOP(char* ch, Word word[]);
char* checkEqual(char* ch, Word result[]);
void setResult_OP(Word word[], char op1, char op2);
char* checkLimit(char* ch, Word result[]);
char* checkAnnotate(char* ch, Word word[]);
void display(Word word[]);

//char key[][10] = {"auto","double","int","struct","break","else","long","switch","case","enum","register","typedef","char","extern","return","union","const","float","short","unsigned","continue","for","signed","void","default","goto","sizeof","volatile","do","if", "while","static"};

char key[][10] = { "auto", "double", "int", "struct", "break", "else", "long", "switch", "case", "enum", "register", "typedef", "char", "extern", "return", "union", "const", "float", "short", "unsigned", "continue", "for", "signed", "void", "default", "goto",
"sizeof", "volatile", "do", "if", "while", "static" };

int main(int argc, const char * argv[]) {

char word[1024];
Word result[1024];

ifstream ifs;
ifs.open("text.txt");
if (!ifs.is_open())
{
cout << "Error opening file";
exit(1);
}
while (!ifs.eof())
{
ifs.read(word, 256);
int readCount=ifs.gcount();
word[readCount] = '\0';
char* start = &word[0];
char* end = start;
while (*(start) != '\0') {
if (*start == ' ')
{
end = end + 1;
start = end;
}
else if ((*start == '/' && *(start + 1) == '*') || (*start == '/' && *(start + 1) == '/'))
{
end = checkAnnotate(start, result);
start = end;
}
else if (*start == '_' || isalpha(*start))
{
end = checkID(start, result);
start = end;
}
else if (isdigit(*start))
{
end = checkNum(start, result);
start = end;
}
else if (*start == '<' || *start == '>' || *start == '=')
{
end = checkRelops(start, result);
if (end == start) {
end = checkEqual(start, result);
}
start = end;
}
else if (*start == '+' || *start == '-' || *start == '*' || *start == '/' || *start == '%' || *start == '&' || *start == '|' || *start == '^' || *start == '~' || *start == '.' || *start == ':' || *start == '?')
{
end = checkOP(start, result);
start = end;
}
else if (*start == '{' || *start == '}' || *start == '<' || *start == '>' || *start == '[' || *start == ']' || *start == '(' || *start == ')' || *start == '@' || *start == '#' || *start == ',' || *start == ';' || *start == '"')
{
end = checkLimit(start, result);
start = end;
}
else
{
end = end + 1;
start = end;
}
}
}
display(result);
/*for (int i = 0; i < wcount; i++) {
cout << "<" << result[i].type << "," << result[i].value << ">" << endl;
}*/
int a = 0;
cin >> a;
cout << a << endl;
return 0;
}

char* checkLimit(char* ch, Word result[])
{
char* temp = ch;
char* afterFirstRef;
char* beforeLastRef;
if (*ch == '"'){
afterFirstRef = ch;
ch++;
int forword=9;
while (forword!=0){
if (*ch == '"'){
beforeLastRef = ch;
forword = 0;
//保存第一个引号
char* write_ch = &result[wcount].value[0];
strcpy(&result[wcount].type[0], "limit");
*write_ch = *ch;
write_ch++;
*write_ch = '\0';
wcount++;

//保存中间的字符串常量
//char* write_ch2 = &result[wcount].value[0];
strcpy(&result[wcount].type[0], "const-ref");
//*write_ch2 = *ch;

afterFirstRef++;
int refSize = beforeLastRef - afterFirstRef;
for (int i = 0; i < refSize ; i++){
result[wcount].value[i] = *afterFirstRef;
afterFirstRef++;
}
result[wcount].value[refSize] = '\0';
//write_ch2++;
//*write_ch2 = '\0';
wcount++;

//保存第二个引号
char* write_ch3 = &result[wcount].value[0];
strcpy(&result[wcount].type[0], "limit");
*write_ch3 = *ch;
write_ch3++;
*write_ch3 = '\0';
wcount++;

ch++;

}
else{
if (*ch == '\0')//把“\0”当作结束符
return temp;
//保存引号中间的常量
ch++;
}

}
}
else{
char* write_ch = &result[wcount].value[0];
strcpy(&result[wcount].type[0], "limit");
*write_ch = *ch;
write_ch++;
*write_ch = '\0';
wcount++;
ch++;
}
return ch;
}

char* checkEqual(char* ch, Word result[])
{
strcpy(&result[wcount].type[0], "=");
strcpy(&result[wcount].value[0], " ");
wcount++;
;
ch++;
return ch;
}

char* checkRelops(char* ch, Word result[])
{
char* start = ch;
while (true) {
if (*ch == '<') {
ch++;
if (*ch == '=') {
strcpy(&result[wcount].type[0], "relop");
strcpy(&result[wcount].value[0], "<=");
wcount++;
ch++;
return ch;
}
else if (*ch == '>')
{
strcpy(&result[wcount].type[0], "relop");
strcpy(&result[wcount].value[0], "<>");
wcount++;
ch++;
return ch;
//不等于
}
else
{
//小于
strcpy(&result[wcount].type[0], "relop");
strcpy(&result[wcount].value[0], "<");
wcount++;
return ch;
}
}
else if (*ch == '=') {
ch++;
if (*ch == '=')
{
//等等于
strcpy(&result[wcount].type[0], "relop");
strcpy(&result[wcount].value[0], "==");
wcount++;
ch++;
return ch;
}
else
return start;
}
else if (*ch == '>') {
ch++;
if (*ch == '=') {
//大于等于
strcpy(&result[wcount].type[0], "relop");
strcpy(&result[wcount].value[0], ">=");
wcount++;
ch++;
return ch;
}
else
{
//大于
strcpy(&result[wcount].type[0], "relop");
strcpy(&result[wcount].value[0], ">");
wcount++;
return ch;
}
}
else
return start;
}
}

char* checkNum(char* p, Word words[]) {
int state = 12;
char* write_p = &words[wcount].value[0];
//char* pre_p = p;//向前看一个字符
while (state > 0){
switch (state) {
case STATE_BEGIN:
if ((*p <= '9' && *p >= '0')) {
state = STATE_INTEGER;
}
else {
state = STATE_ERROR;//格式出错
}
break;

case STATE_INTEGER:
if ((*p <= '9' && *p >= '0')) {
state = STATE_INTEGER;
}
else if (*p == '.') {
state = STATE_DECIMAL_BEGIN;
}
else if (*p == 'E') {
state = STATE_E;
}
else {
state = STATE_DONE;//进入终结状态
}
break;

case STATE_DECIMAL_BEGIN:
if ((*p <= '9' && *p >= '0')) {
state = STATE_DECIMAL;
}
else {
state = STATE_ERROR;//格式出错
}
break;

case STATE_DECIMAL:
if ((*p <= '9' && *p >= '0')) {
//state = STATE_DECIMAL;
}
else if (*p == 'E') {
state = STATE_E;
}
else {
state = STATE_DONE;//进入终结状态
}
break;

case STATE_E:
if ((*p <= '9' && *p >= '0')) {
state = STATE_POWER;
}
else if (*p == '+' || *p == '-') {
state = STATE_SYMBOL;
}
else {
state = STATE_ERROR;//格式出错
}
break;

case STATE_SYMBOL:
if ((*p <= '9' && *p >= '0')) {
state = STATE_POWER;
}
else {
state = STATE_ERROR;//格式出错
}
break;

case STATE_POWER:
if ((*p <= '9' && *p >= '0')) {
//state = STATE_POWER;
}
else {
state = STATE_DONE;//进入终结状态
}
break;
}

if (state == STATE_DONE) {
//正确终结
strcpy(&words[wcount].type[0], "num");
*write_p = '\0';
break;
}
else if (state == STATE_ERROR) {
//格式出错
strcpy(&words[wcount].type[0], "NUM_ERROR");
*write_p = '\0';
break;

}
else {
//当前字符匹配
*write_p = *p;
++write_p;
++p;
}
}
//存储结果的数组的下标移向下一位
wcount++;
return p;
}

char* checkID(char* ch, Word result[])
{
char* write_p = &result[wcount].value[0];

int state = 12;
while (state > 0) {
switch (state) {
case STATE_BEGIN:
if ((*ch) == '_' || isalpha(*ch)) {
state = STATE_CHECK;
}
break;
case STATE_CHECK:
if ((*ch) == '_' || isalpha(*ch) || isdigit(*ch)) {
state = STATE_CHECK;
}
else
state = STATE_DONE;
break;
default:
break;
}

if (state == STATE_DONE) {
strcpy(&result[wcount].type[0], "id");
*write_p = '\0';
break;
}
else
{
*write_p = *ch;
++write_p;
++ch;
}
}
for (int i = 0; i < sizeof(key); i++) {
if (!strcmp(result[wcount].value, key[i]))
{
strcpy(&result[wcount].type[0], "key");

}
}
wcount++;
return ch;
}

char* checkOP(char* ch, Word word[]){
ch = ch + 1;
if (*ch == *(ch - 1) && *ch != '+' && *ch != '-' && *ch != '*' && *ch != '/' && *ch != '%' && *ch != '~' && *ch != '^' && *ch != '.' && *ch != '?' && *ch != ':'){
setResult_OP(word, *ch, *ch);
ch++;
}
else{
setResult_OP(word, *(ch - 1), '\0');
}
strcpy(&word[wcount].type[0], "op");
wcount++;
return ch;
}

char* checkAnnotate(char* ch, Word word[]){
if (*ch == '/' && *(ch + 1) == '*'){
char* temp = ch;
ch = ch + 2;
int state = 9;
while (state != 0){
if (*ch == '*' && *(ch + 1) == '/'){
ch = ch + 2;
state = 0;
}
else{
if (*ch == '\0')//这里写文件结束符
{
strcpy(&word[wcount].type[0], "ERROR");
strcpy(&word[wcount].value[0], "the annotate is not finished...");
wcount++;
return temp+2;
}
ch++;
}
}
return ch;
}
else if (*ch == '/' && *(ch + 1) == '/'){
ch = ch + 2;
int state = 9;
while (state != 0){
if (*ch == '\n' || *ch == '\0'){
ch = ch + 1;
state = 0;
}
else{
/*if (*ch == '\n')
{
strcpy(&word[wcount].type[0], "AN_ERROR");
return ++ch;
}*/
ch++;
}
}
return ch;
}
else{
return ch;
}

}

void setResult_OP(Word word[], char op1, char op2){
if (op2 != '\0'){
word[wcount].value[0] = op1;
word[wcount].value[1] = op2;
word[wcount].value[2] = '\0';
}
else{
word[wcount].value[0] = op1;
word[wcount].value[1] = op2;
}
}

void display(Word word[]){
for (int i = 0; i <wcount; i++)
{
cout << "<" << word[i].type << "," << word[i].value << ">" << endl;
}
}


测试文件:text.txt 和.cpp文件放在同一目录下

void setResult_OP(Word word[], char op1, char op2){
if (op2 != '\0'){
word[wcount].value[0] = op1;
}
/* this is an annotation ...*/
int a=10;
a=a+80;
float s=2E.2;
//单行注释
if(a==s){
printf("a equal s");
}
}


输出结果:
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: