======================== Outlook: Simple Compiler ======================== What we've programmed in getToken is called a lexer. This groups individual characters into so-called tokens. In the implementation, we've implemented a so-called finite automaton. The next step could now be to use the lexer to implement a simple calculator that can evaluate arithmetic expressions (with `+`, `*`, and parentheses). Alternatively, the calculator could generate assembly code that describes how to evaluate the expression. If you use the code shown below (see Code Snippet) together with function `getToken` into our program (sse Complete Code), you'll achieve just that: ---- CODE(type=txt) ------------------------------------------------------------ MCL:session1 lehn$ ./a.out 3 * (4 + 5); load 3, %1 load 4, %2 load 5, %3 add %3, %2, %4 mul %4, %1, %5 -------------------------------------------------------------------------------- Such a program could already be called a compiler. A good compiler would, of course, actually calculate the expression and generate code like this: ---- CODE(type=txt) ------------------------------------------------------------ MCL:session1 lehn$ ./a.out 3 * (4 + 5); load 27, %1 -------------------------------------------------------------------------------- Code Snippet ============ ---- CODE(type=abc, , linenumbers=51) ------------------------------------------ global reg: int; fn parseSum(): int; fn parseInt(): int { if (token == INTEGER) { getToken(); printf("\tload %d, %%%d\n", val, reg); return reg++; } if (token == LPAREN) { getToken(); local sum: int = parseSum(); if (sum < 0) { printf("sum expected\n"); return -1; } if (token != RPAREN) { printf("expected ')'\n"); return -1; } getToken(); return sum; } else { return -1; } } fn parseProd(): int { local prod: int = parseInt(); if (prod < 0) { return -1; } while (token == ASTERISK) { getToken(); local factor: int = parseInt(); if (factor < 0) { printf("expected factor\n"); return -1; } printf("\tmul %%%d, %%%d, %%%d\n", factor, prod, reg++); } return reg; } fn parseSum(): int { local sum: int = parseProd(); if (sum < 0) { return -1; } while (token == PLUS) { getToken(); local summand: int = parseProd(); if (summand < 0) { printf("expected summand\n"); return -1; } printf("\tadd %%%d, %%%d, %%%d\n", summand, sum, reg++); } return reg; } fn main() { getToken(); for (local sum: int = -1; (sum = parseSum()) >= 0; ) { if (token == SEMICOLON) { getToken(); } } } -------------------------------------------------------------------------------- Complete Code ============= ---- CODE(file=session01/page09/ex1.abc, type=abc, linenumbers) ---------------- @ enum TokenKind { EOI, // = 0 (end of input) BAD, INTEGER, PLUS, ASTERISK, SEMICOLON, LPAREN, RPAREN, }; global token: int = 0; global ch: int = 0; global val: int = 0; fn getToken() { if (ch >= '0' && ch <= '9') { val = 0; while (ch >= '0' && ch <= '9') { val = val * 10; val = val + ch - '0'; ch = getchar(); } token = INTEGER; } else if (ch == '+') { ch = getchar(); token = PLUS; } else if (ch == '*') { ch = getchar(); token = ASTERISK; } else if (ch == ';') { ch = getchar(); token = SEMICOLON; } else if (ch == '(') { ch = getchar(); token = LPAREN; } else if (ch == ')') { ch = getchar(); token = RPAREN; } else if (ch == EOF) { token = EOI; } else { ch = getchar(); getToken(); } } global reg: int; fn parseSum(): int; fn parseInt(): int { if (token == INTEGER) { getToken(); printf("\tload %d, %%%d\n", val, ++reg); return reg; } if (token == LPAREN) { getToken(); local sum: int = parseSum(); if (sum < 0) { printf("sum expected\n"); return -1; } if (token != RPAREN) { printf("expected ')'\n"); return -1; } getToken(); return sum; } else { return -1; } } fn parseProd(): int { local prod: int = parseInt(); if (prod < 0) { return -1; } while (token == ASTERISK) { getToken(); local factor: int = parseInt(); if (factor < 0) { printf("expected factor\n"); return -1; } printf("\tmul %%%d, %%%d, %%%d\n", factor, prod, ++reg); } return reg; } fn parseSum(): int { local sum: int = parseProd(); if (sum < 0) { return -1; } while (token == PLUS) { getToken(); local summand: int = parseProd(); if (summand < 0) { printf("expected summand\n"); return -1; } printf("\tadd %%%d, %%%d, %%%d\n", summand, sum, ++reg); } return reg; } fn main() { getToken(); for (local sum: int = -1; (sum = parseSum()) >= 0; ) { if (token == SEMICOLON) { getToken(); } } } --------------------------------------------------------------------------------