在网上,看到还是有部分程序爱好者希望能编出自己的编译器.当然,这的确是件难事,许多人都说要去看什么编译原理和精通汇编语言,结果让这些爱好者都望而却步.但是,当我们亲手去做做后,发现要做一个简单的程序解释器(就像Java和Basic)那样,还是挺容易的.你根本不用去看那些东西,只要你懂C语言,在看了本文后,就可以完成那样的解释器.
在网上,有许多大型C语言,Perl语言的编译器源代码.但当你下载后看看,竟发现点都看不懂.其实那些东西还是不看为妙.看了本文后,我相信你宁愿自己动手编,也不愿意去领会那些庞大的源代码了.
少说费话了,我们开始讲解.
这一篇Basic解释器的代码.十分经典.而且十分简单化.
get_token()是词汇提取,譬如 PRINT A+B通过调用一次get_token(),就在 字符串token里装上PRINT再调用一次get_token(),token里就装上A再调用一次get_token(),token里就装上+再调用一次get_token(),token里就装上B很简单吧!putback()是将prog指针向回移动一格.
其中包含了词发分析和十分关键的代数式求值get_exp(int *result)关于它的代数式求值get_exp(int *result),用到递归函数void get_exp(),level2(),level3(),level4(),level5();void level6(),primitive(),arith(),unary();,确实难看懂,不过你尽管拿来用就是了.
话不多说,你看源代码就是了.最后,我将给你看看C++中完整的源代码
/* recursive descent parser for integer expression which may include variables*/
#include <stdio.h>#include <setjmp.h>#include <math.h>#include <ctype.h>#include <stdlib.h>
#define DELIMITER 1#define VARIABLE 2#define NUMBER 3#define COMMAND 4#define STRING 5#define QUOTE 6
#define EOL 9#define FINISHED 10
extern char *prog; /* holds expression to be analyzed */
extern jmp_buf e_buf; /* hold enviroment */extern int variables[26]; /* variables */extern struct commands { char command[20]; char tok;} table[];
extern char token[80]; /* holds string representation of token */extern char token_type; /* contains type of token */extern char tok; /* holds the internal representation of token */
void get_exp(),level2(),level3(),level4(),level5();void level6(),primitive(),arith(),unary();void serror(),putback();
/* entry point into parser */void get_exp(int *result){ get_token(); if (!*token) { serror(2); return; } level2(result); putback(); /*return last token read to input stream */}
/* add or subtract two terms */void level2(int *result){ register char op; int hold; level3(result); while ((op = *token) =='+' || op == '-') { get_token(); level3(&hold); arith(op,result,&hold); }}
/* multiply or divide two factors */void level3(int *result){ register char op; int hold;
level4(result); while ((op = *token) == '*' || op == '/' || op == '%') { get_token(); level3(&hold); arith(op,result,&hold); }}
/* process integer exponent */void level4(int *result){ register char op; int hold;
level5(result); if (*token == '^') { get_token(); level4(&hold); arith(op,result,&hold); }}
/* is a unary + or - */ void level5(int *result){ register char op; op = 0; if ((token_type==DELIMITER) && *token == '+' || *token == '-' ) { op = *token; get_token(); } level6(result); if (op) unary(op,result);}
/* process parenthesized expression */ void level6(int *result){ if ((*token == '(') && (token_type == DELIMITER)) { get_token(); level2(result); if (*token!=')') serror(1); get_token(); } else primitive(result);}
/* find value of number or variable */void primitive(int *result){ switch (token_type) { case VARIABLE: *result = find_var(token); get_token(); return; case NUMBER: *result = atoi(token); get_token(); return; default: serror(0); }}
/* perform the specified arithmetic */void arith(char o,int *r,int *h){ register int t,ex; switch (o) { case '-': *r = *r-*h; break; case '+': *r = *r+*h; break; case '*': *r = *r**h; break; case '/': *r = (*r)/(*h); break; case '%': *r = (*r)%(*h); break; case '^': ex = *r; if (*h==0) { *r = 1; break; } for (t=*h-1;t>0;--t) *r=(*r)*ex; break; }}
/* reverse the sign */void unary(char o,int *r){ if (o=='-') *r = -(*r);}
/* find the value of a variable */int find_var(char *s){ if (!isalpha(*s)) { serror(4); /* not a variable */ return 0; } return variables[toupper(*token)-'A'];}
/* display an error message */void serror(int error){ char *e[] = { "syntax error", "unbalanced parentheses", "no expression present", "equal sign expected", "not a variable", "label table full", "duplicate label", "undefined label", "THEN expected", "TO expected", "too many nested FOR loops", "NEXT without FOR", "too many nested GOSUB", "RETURN without GOSUB" };
printf ("%s/n",e[error]); longjmp(e_buf,1); /* return to save point */}
/* get a token */get_token(){ register char *temp;
token_type = 0;tok = 0; temp = token; if (*prog == '/0') { /* end of file */ *token = 0; tok = FINISHED; return (token_type = DELIMITER); } while (iswhite(*prog)) ++prog; /* skip over white space */
if (*prog == '/r') { /* CR LF */ ++prog;++prog; tok = EOL;*token = '/r'; token[1] = '/n';token[2] = 0; return (token_type = DELIMITER); } if (strchr("+-*^/%=;(),><",*prog)) { /* delimiter */ *temp = *prog; prog++; /* advance to next position */ temp++; *temp=0; return (token_type = DELIMITER); } if (*prog == '"') { /* quote string */ prog++; while (*prog!='"'&&*prog!='/r') *temp++=*prog++; if (*prog=='/r') serror(1); prog++;*temp=0; return (token_type = QUOTE); } if (isdigit(*prog)) { /* number */ while (!isdelim(*prog)) *temp++=*prog++; *temp = '/0'; return (token_type = NUMBER); }
if (isalpha(*prog)) { /* var or command */ while (!isdelim(*prog)) *temp++=*prog++; token_type = STRING; }
*temp = '/0';
/* see if a string is a command or a variable */ if (token_type == STRING) { tok = look_up(token); /* convert to internal rep */ if (!tok) token_type = VARIABLE; else token_type = COMMAND; /* is a command */ } return token_type;}
/* return a token to input stream */void putback(){ char *t; t = token; for (;*t;t++) prog--;}
look_up(char *s){ register int i,j; char *p;
/* convert to lowercase */ p = s; while (*p) { *p = tolower(*p); p++; }
/* see if token is in table */ for (i=0;*table[i].command;i++) if (!strcmp(table[i].command,s)) return table[i].tok; return 0; /* unknown command */}
/* return true if c is a delimiter */isdelim(char c){ if (strchr(";,+-<>/*%^=() ",c)||c==9||c=='/r'||c==0) return 1; return 0;}
iswhite (char c){ if (c==' '||c=='/t') return 1; else return 0;}