您的位置:首页 > 其它

谈谈编译原理和其在WEB开发中的应用1

2009-01-22 14:41 441 查看
计算机相关专业的差不多都有学过编译原理吧?今天我班门弄刀,也谈谈我自己对编译原理的认识和理解。当然啦,我主要要谈的是编译器的前端的实现,后端的代码生成我目前还没有研究过。

实现一个编译器有两大步骤:一是词法分析,二是语法分析。应对这两块有很多的工具是可以帮助我们进行这样的工作的(例如:flex、yacc等),但我要说的是怎么完全手工去实现它。语法分析的主要目的是把一个个的字符和字母之类的东西给识别为token,而语法分析的主要作用是去构建分析后的对象或是构建抽象语法树。下面我结合早一段时间开发的JSON的反序列化来聊聊整个的过程。

词法分析

private IList<Token> ReadTokens(char[] chars)

{

IList<Token> tokens = new List<Token>();

StringBuilder sb = new StringBuilder();

CharReader src = new CharReader(chars);

char c = src.Read();

readLoop:

while (c != '\0')

{

switch (c)

{

#region 空白字符

case '\t':

{

c = src.Read();

while (c == '\t') { c = src.Read(); }

break;

}

case ' ':

{

c = src.Read();

while (c == ' ') { c = src.Read(); }

break;

}

case '\r':

{

c = src.Read();

while (c == '\r') { c = src.Read(); }

break;

}

case '\n':

{

c = src.Read();

while (c == '\n') { c = src.Read(); }

break;

}

#endregion

#region 运算符

case '[':

{

tokens.Add(new Token(TokenId.LBracket));

c = src.Read();

break;

}

case ']':

{

tokens.Add(new Token(TokenId.RBracket));

c = src.Read();

break;

}

case '{':

{

tokens.Add(new Token(TokenId.LCurly));

c = src.Read();

break;

}

case '}':

{

tokens.Add(new Token(TokenId.RCurly));

c = src.Read();

break;

}

case ':':

{

tokens.Add(new Token(TokenId.Colon));

c = src.Read();

break;

}

case ',':

{

tokens.Add(new Token(TokenId.Comma));

c = src.Read();

break;

}

#endregion

#region 字符串

case '@':

case '\'':

case '"':

{

bool isVerbatim = false;

if (c == '@')

{

isVerbatim = true;

c = src.Read(); // skip to follow quote

}

sb.Length = 0;

char quote = c;

bool isSingleQuote = (c == '\'');

c = src.Read();

while (!src.Eof)

{

if (c == '\\' && !isVerbatim) // normal escaped chars

{

c = src.Read();

switch (c)

{

case '\0':

{

if (src.Eof)

{

goto readLoop;

}

else

{

sb.Append('\0');

c = src.Read();

break;

}

}

case 'a':

{

sb.Append('\a');

c = src.Read();

break;

}

case 'b':

{

sb.Append('\b');

c = src.Read();

break;

}

case 'f':

{

sb.Append('\f');

c = src.Read();

break;

}

case 'n':

{

sb.Append('\n');

c = src.Read();

break;

}

case 'r':

{

sb.Append('\r');

c = src.Read();

break;

}

case 't':

{

sb.Append('\t');

c = src.Read();

break;

}

case 'v':

{

sb.Append('\v');

c = src.Read();

break;

}

case '\\':

{

sb.Append('\\');

c = src.Read();

break;

}

case '\'':

{

sb.Append('\'');

c = src.Read();

break;

}

case '\"':

{

// strings are always stored as verbatim for now, so the double quote is needed

sb.Append("\"\"");

c = src.Read();

break;

}

default:

{

sb.Append(c);

break;

}

}

}

else if (c == '\"')

{

c = src.Read();

// two double quotes are escapes for quotes in verbatim mode

if (c == '\"' && isVerbatim)// verbatim escape

{

sb.Append("\"\"");

c = src.Read();

}

else if (isSingleQuote)

{

sb.Append('\"');

}

else

{

break;

}

}

else // non escaped

{

if (c == quote)

{

break;

}

sb.Append(c);

c = src.Read();

}

}

if (c != '\0')

{

if (c == quote)

{

c = src.Read(); // skip last quote

}

tokens.Add(new Token(TokenId.String, sb.ToString()));

}

break;

}

#endregion

#region 数字

case '0':

case '1':

case '2':

case '3':

case '4':

case '5':

case '6':

case '7':

case '8':

case '9':

case '.':

case '+':

case '-':

{

sb.Length = 0;

TokenId numKind = TokenId.Int;

bool isDouble = false;

if (c == '.')

{

c = src.Read();

if (c < '0' || c > '9')

{

break;

}

else

{

sb.Append('.');

numKind = TokenId.Double;

isDouble = true;

}

}

else if (c == '+')

{

c = src.Read();

}

else if (c == '-')

{

sb.Append('-');

c = src.Read();

}

bool isNum = true;

while (isNum)

{

switch (c)

{

case '0':

case '1':

case '2':

case '3':

case '4':

case '5':

case '6':

case '7':

case '8':

case '9':

{

sb.Append(c);

break;

}

case '.':

{

if (isDouble)

{

numKind = TokenId.Double;

tokens.Add(new Token(numKind, sb.ToString()));

goto readLoop;

}

c = src.Read();

if (c < '0' || c > '9')

{

tokens.Add(new Token(numKind, sb.ToString()));

goto readLoop;

}

else

{

sb.Append('.');

sb.Append(c);

numKind = TokenId.Double;

isDouble = true;

}

break;

}

default:

{

isNum = false;

break;

}

}

if (isNum) c = src.Read();

}

tokens.Add(new Token(numKind, sb.ToString()));

isNum = false;

break;

}

#endregion

#region 关键字和变量

default:

{

switch (c)

{

case 'a':

case 'b':

case 'c':

case 'd':

case 'e':

case 'f':

case 'g':

case 'h':

case 'i':

case 'j':

case 'k':

case 'l':

case 'm':

case 'n':

case 'o':

case 'p':

case 'q':

case 'r':

case 's':

case 't':

case 'u':

case 'v':

case 'w':

case 'x':

case 'y':

case 'z':

case 'A':

case 'B':

case 'C':

case 'D':

case 'E':

case 'F':

case 'G':

case 'H':

case 'I':

case 'J':

case 'K':

case 'L':

case 'M':

case 'N':

case 'O':

case 'P':

case 'Q':

case 'R':

case 'S':

case 'T':

case 'U':

case 'V':

case 'W':

case 'X':

case 'Y':

case 'Z':

case '_':

{

sb.Length = 0;

sb.Append(c);

c = src.Read();

bool endIdent = false;

bool possibleKeyword = true;

while (c != '\0' && !endIdent)

{

switch (c)

{

case 'a':

case 'b':

case 'c':

case 'd':

case 'e':

case 'f':

case 'g':

case 'h':

case 'i':

case 'j':

case 'k':

case 'l':

case 'm':

case 'n':

case 'o':

case 'p':

case 'q':

case 'r':

case 's':

case 't':

case 'u':

case 'v':

case 'w':

case 'x':

case 'y':

case 'z':

{

sb.Append(c);

c = src.Read();

break;

}

case 'A':

case 'B':

case 'C':

case 'D':

case 'E':

case 'F':

case 'G':

case 'H':

case 'I':

case 'J':

case 'K':

case 'L':

case 'M':

case 'N':

case 'O':

case 'P':

case 'Q':

case 'R':

case 'S':

case 'T':

case 'U':

case 'V':

case 'W':

case 'X':

case 'Y':

case 'Z':

case '_':

case '0':

case '1':

case '2':

case '3':

case '4':

case '5':

case '6':

case '7':

case '8':

case '9':

{

possibleKeyword = false;

sb.Append(c);

c = src.Read();

break;

}

default:

{

endIdent = true;

break;

}

}

}

string identText = sb.ToString();

sb.Length = 0;

bool isKeyword = possibleKeyword ? _keyWords.ContainsKey(identText) : false;

if (isKeyword)

{

tokens.Add(new Token(_keyWords[identText]));

}

else

{

tokens.Add(new Token(TokenId.Ident, identText));

}

break;

}

default:

{

tokens.Add(new Token(TokenId.Invalid));

c = src.Read();

break;

}

}

break;

}

#endregion

}

}

return tokens;

}

关于JSON的定义请参看http://www.json.org/,要实现JSON的分析,我们要确定其基本的定义,而它的定义相对比较简单,它只有 Object、Array、String、Number和Value五种形式。而根据这几种对象的表示的边界条件,我们知道我们要分析的字符主要有:'{', '}', '[', ']', ':', ',', '"'等,对这些的分析的实现代码如上。

  完整代码如下:/Files/afxcn/MiniJSON.rar

下一篇:

  谈谈编译原理和其在WEB开发中的应用2
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: