Parser and Code Generator

Initial code, the used ISA, and the code created in this video are provided below.

Initial Code

In this session, the lexer from session 11 is used as a building block. It should recognize at least decimal literals and the tokens +, *, (, and ). You can use your code (eventually extended to support these tokens) or the code provided here:

session13/initial_code/lexer.hdr

enum TokenKind
{
    BAD_TOKEN,
    EOI,
    DECIMAL_LITERAL,
    IDENTIFIER,
    PLUS,
    ASTERISK,
    LPAREN,
    RPAREN,
};

type TokenVal: array[12] of char;

struct Token
{
    kind:   TokenKind;
    pos:    struct Pos {
                row, col: unsigned;
            };
    val:    TokenVal;
};

extern token: Token;

extern fn getToken(): TokenKind;

extern fn tokenKindStr(token: TokenKind): -> char;

session13/initial_code/lexer.abc

@ <stdio.hdr>
@ "lexer.hdr"

// -- internal: only available within this translation unit

global ch: int;
global currentPos: Pos = {1, 0};
global lengthVal: size_t;

fn nextCh(updateVal: bool)
{
    if (updateVal) {
        assert(lengthVal < sizeof(token.val) / sizeof(token.val[0]));
        token.val[lengthVal++] = ch;
        token.val[lengthVal] = 0;
    } else {
        lengthVal = 0;
    }
    ch = getchar();
    if (ch == '\n') {
        ++currentPos.row;
        currentPos.col = 0;
    } else {
        ++currentPos.col;
    }
}

fn isDigit(ch: int): bool
{
    return ch >= '0' && ch <= '9';
}

fn isLetter(ch: int): bool
{
    return ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'A';
}

// -- exported: available for other translation units

global token: Token;

fn getToken(): TokenKind
{
    lengthVal = 0;
    while (ch == 0 || ch == ' ' || ch == '\n' || ch == '\t') {
        nextCh(false);
    }
    token.pos = currentPos;
    if (isDigit(ch)) {
        while (isDigit(ch)) {
            nextCh(true);
        }
        return token.kind = DECIMAL_LITERAL;
    } else if (isLetter(ch)) {
        while (isLetter(ch) || isDigit(ch)) {
            nextCh(true);
        }
        return token.kind = IDENTIFIER;
    } else if (ch == '+') {
        nextCh(true);
        return token.kind = PLUS;
    } else if (ch == '*') {
        nextCh(true);
        return token.kind = ASTERISK;
    } else if (ch == '(') {
        nextCh(true);
        return token.kind = LPAREN;
    } else if (ch == ')') {
        nextCh(true);
        return token.kind = RPAREN;
    } else if (ch == EOF) {
        return token.kind = EOI;
    } else {
        nextCh(true);
        return token.kind = BAD_TOKEN;
    }
}

fn tokenKindStr(token: TokenKind): -> char
{
    switch (token) {
        case BAD_TOKEN: return "BAD_TOKEN";
        case EOI: return "EOI";
        case DECIMAL_LITERAL: return "DECIMAL_LITERAL";
        case IDENTIFIER: return "IDENTIFIER";
        case PLUS: return "PLUS";
        case ASTERISK: return "ASTERISK";
        case LPAREN: return "LPAREN";
        case RPAREN: return "RPAREN";
        default: return "??";
    }
}

session13/initial_code/xtest_lexer.abc

@ <stdio.hdr>
@ "lexer.hdr"

fn main()
{
    while (getToken() != EOI) {
        printf("%u.%u: %s (%s) \n", token.pos.row, token.pos.col,
               tokenKindStr(token.kind), token.val);
    }
}

ISA Used by the Compiler

session13/initial_code/simple.isa

S20_R       (OP u 8) (dest u 4) (imm s 20)
R_R_R       (OP u 8) (z u 4) (y u 4) (x u 4)

0x02 R_R_R
:   halt %x
    ulm_halt(ulm_regVal(x));

0x11 S20_R
#   Load sign extended immediate value into destination register.
:   load imm, %dest
    ulm_setReg(imm, dest);

0x13 R_R_R
#   Adds register \\texttt{%x} to register and stores the result in
#   register \\texttt{%z}.
:   add %x, %y, %z
    ulm_add64(ulm_regVal(x), ulm_regVal(y), z);

0x17 R_R_R
:   mul %x, %y, %z
    ulm_mul64(ulm_regVal(x), ulm_regVal(y), z);

Code Implemented in the Video

session13/final_code/xtest_parser.abc

@ <stdio.hdr>
@ "lexer.hdr"

fn getReg(): int
{
    static reg: int = 1;
    return reg++;
}

// ---

fn parseTerm(): int;
fn parseFactor(): int;

fn parseExpr(): int
{
    local left: int = parseTerm();
    if (left < 0) {
        return -1;
    }
    while (token.kind == PLUS) {
        getToken();
        local right: int = parseTerm();
        if (right < 0) {
            printf("%u.%u expected term\n",
                   token.pos.row, token.pos.col);
            assert(0);
        }
        local dest: int = getReg();
        printf("\tadd %%%d, %%%d, %%%d\n", left, right, dest);
        left = dest;
    }
    return left;
}

fn parseTerm(): int
{
    local left: int = parseFactor();
    if (left < 0) {
        return -1;
    }
    while (token.kind == ASTERISK) {
        getToken();
        local right: int = parseFactor();
        if (right < 0) {
            printf("%u.%u expected term\n",
                   token.pos.row, token.pos.col);
            assert(0);
        }
        local dest: int = getReg();
        printf("\tmul %%%d, %%%d, %%%d\n", left, right, dest);
        left = dest;
    }
    return left;
}

fn parseFactor(): int
{
    if (token.kind == DECIMAL_LITERAL) {
        local reg: int = getReg();
        printf("\tload %s, %%%d\n", token.val, reg);
        getToken();
        return reg;
    } else if (token.kind == LPAREN) {
        getToken();
        local reg: int = parseExpr();
        if (reg < 0) {
            printf("%u.%u expected expression\n",
                   token.pos.row, token.pos.col);
            assert(0);
        }
        if (token.kind != RPAREN) {
            printf("%u.%u expected ')'\n",
                   token.pos.row, token.pos.col);
            assert(0);
        }
        getToken();
        return reg;
    } else {
        return -1;
    }
}

fn main()
{
    getToken();
    local reg: int = parseExpr();
    if (reg < 0) {
        printf("syntax error\n");
        assert(0);
    }
    printf("\thalt %%%d\n", reg);
}

Some Old Version

By mistake this outdated version (parseExpr is called here parseSum etc.) was uploaded before. I keep it online for historical reasons ;-)

session13/final_code_old/xtest_parser.abc

@ <stdio.hdr>
@ "lexer.hdr"

// -------

fn getReg(): int
{
    static reg: int = 1;
    return reg++;
}

// -------

fn parseSum(): int;

fn parseFactor(): int
{
    if (token.kind == DECIMAL_LITERAL) {
        local reg: int = getReg();
        printf("\tload %s, %%%d\n", token.val, reg);
        getToken();
        return reg;
    } else if (token.kind == LPAREN) {
        getToken();
        local reg: int = parseSum();
        if (reg < 0) {
            printf("%u.%u: sum expected\n", token.pos.row, token.pos.col);
            return -1;
        }
        if (token.kind != RPAREN) {
            printf("%u.%u: ')' expected\n", token.pos.row, token.pos.col);
            return -1;
        }
        getToken();
        return reg;
    } else {
        return -1;
    }
}

fn parseProd(): int
{
    local left: int = parseFactor();
    if (left < 0) {
        return -1;
    }
    while (token.kind == ASTERISK) {
        getToken();
        local right: int = parseFactor();
        if (right < 0) {
            printf("%u.%u: factor expected\n", token.pos.row, token.pos.col);
            return -1;
        }
        local dest: int = getReg();
        printf("\tmul %%%d, %%%d, %%%d\n", left, right, dest);
        left = dest;
    }
    return left;
}

fn parseSum(): int
{
    local left: int = parseProd();
    if (left < 0) {
        return -1;
    }
    while (token.kind == PLUS) {
        getToken();
        local right: int = parseProd();
        if (right < 0) {
            printf("%u.%u: product expected\n", token.pos.row, token.pos.col);
            return -1;
        }
        local dest: int = getReg();
        printf("\tadd %%%d, %%%d, %%%d\n", left, right, dest);
        left = dest;
    }
    return left;
}

fn main()
{
    getToken();
    local reg: int = parseSum();
    if (parseSum()) {
        printf("\thalt %%%d\n", reg);
    } else {
        printf("syntax error\n");
    }
}