Outlook: Simple Compiler

What we've programmed in getToken is called a lexer. This groups individual characters into so-called tokens. In the implementation, we've implemented a so-called finite automaton.

The next step could now be to use the lexer to implement a simple calculator that can evaluate arithmetic expressions (with +, *, and parentheses).

Alternatively, the calculator could generate assembly code that describes how to evaluate the expression. If you use the code shown below (see Code Snippet) together with function getToken into our program (sse Complete Code), you'll achieve just that:

1
2
3
4
5
6
7
MCL:session1 lehn$ ./a.out
3 * (4 + 5);
        load 3, %1
        load 4, %2
        load 5, %3
        add %3, %2, %4
        mul %4, %1, %5

Such a program could already be called a compiler. A good compiler would, of course, actually calculate the expression and generate code like this:

1
2
3
MCL:session1 lehn$ ./a.out
3 * (4 + 5);
        load 27, %1

Code Snippet

 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
global reg: int;

fn parseSum(): int;

fn parseInt(): int
{
    if (token == INTEGER) {
        getToken();
        printf("\tload %d, %%%d\n", val, reg);
        return reg++;
    } if (token == LPAREN) {
        getToken();
        local sum: int = parseSum();
        if (sum < 0) {
            printf("sum expected\n");
            return -1;
        }
        if (token != RPAREN) {
            printf("expected ')'\n");
            return -1;
        }
        getToken();
        return sum;
    } else {
        return -1;
    }
}

fn parseProd(): int
{
    local prod: int = parseInt();
    if (prod < 0) {
        return -1;
    }
    while (token == ASTERISK) {
        getToken();
        local factor: int = parseInt();
        if (factor < 0) {
            printf("expected factor\n");
            return -1;
        }
        printf("\tmul %%%d, %%%d, %%%d\n", factor, prod, reg++);
    }
    return reg;
}

fn parseSum(): int
{
    local sum: int = parseProd();
    if (sum < 0) {
        return -1;
    }
    while (token == PLUS) {
        getToken();
        local summand: int = parseProd();
        if (summand < 0) {
            printf("expected summand\n");
            return -1;
        }
        printf("\tadd %%%d, %%%d, %%%d\n", summand, sum, reg++);
    }
    return reg;
}

fn main()
{
    getToken();
    for (local sum: int = -1; (sum = parseSum()) >= 0; ) {
        if (token == SEMICOLON) {
            getToken();
        }
    }
}

Complete Code

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
@ <stdio.hdr>

enum TokenKind {
    EOI, // = 0 (end of input)
    BAD,
    INTEGER,
    PLUS,
    ASTERISK,
    SEMICOLON,
    LPAREN,
    RPAREN,
};

global token: int = 0;
global ch: int = 0;
global val: int = 0;

fn getToken()
{
    if (ch >= '0' && ch <= '9') {
        val = 0;
        while (ch >= '0' && ch <= '9') {
            val = val * 10;
            val = val + ch - '0';
            ch = getchar();
        }
        token = INTEGER;
    } else if (ch == '+') {
        ch = getchar();
        token = PLUS;
    } else if (ch == '*') {
        ch = getchar();
        token = ASTERISK;
    } else if (ch == ';') {
        ch = getchar();
        token = SEMICOLON;
    } else if (ch == '(') {
        ch = getchar();
        token = LPAREN;
    } else if (ch == ')') {
        ch = getchar();
        token = RPAREN;
    } else if (ch == EOF) {
        token = EOI;
    } else {
        ch = getchar();
        getToken();
    }
}

global reg: int;

fn parseSum(): int;

fn parseInt(): int
{
    if (token == INTEGER) {
        getToken();
        printf("\tload %d, %%%d\n", val, ++reg);
        return reg;
    } if (token == LPAREN) {
        getToken();
        local sum: int = parseSum();
        if (sum < 0) {
            printf("sum expected\n");
            return -1;
        }
        if (token != RPAREN) {
            printf("expected ')'\n");
            return -1;
        }
        getToken();
        return sum;
    } else {
        return -1;
    }
}

fn parseProd(): int
{
    local prod: int = parseInt();
    if (prod < 0) {
        return -1;
    }
    while (token == ASTERISK) {
        getToken();
        local factor: int = parseInt();
        if (factor < 0) {
            printf("expected factor\n");
            return -1;
        }
        printf("\tmul %%%d, %%%d, %%%d\n", factor, prod, ++reg);
    }
    return reg;
}

fn parseSum(): int
{
    local sum: int = parseProd();
    if (sum < 0) {
        return -1;
    }
    while (token == PLUS) {
        getToken();
        local summand: int = parseProd();
        if (summand < 0) {
            printf("expected summand\n");
            return -1;
        }
        printf("\tadd %%%d, %%%d, %%%d\n", summand, sum, ++reg);
    }
    return reg;
}

fn main()
{
    getToken();
    for (local sum: int = -1; (sum = parseSum()) >= 0; ) {
        if (token == SEMICOLON) {
            getToken();
        }
    }
}