CPW Part 15 & 16: Conditional Jumps and Simple Logical Expressions

Code with Solution for Quiz 26

You can use the code below for comparison.

#include <assert.h>
#include <inttypes.h>
#include <math.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>

#include "expr.h"
#include "finalize.h"
#include "memregion.h"
#include "sym.h"

struct Expr
{
    enum ExprKind kind;
    union
    {
        struct
        {
            const struct Expr *left, *right;
        } binary;
        const struct Expr *unary;
        struct
        {
            struct
            {
                uint64_t uint;
            } literal;
            const struct UStr *identifier;
        } primary;
    };
};

// constructors
static struct Expr *
newExpr(void)
{
    struct Expr *expr = allocFromMemRegion(sizeof(*expr));
    return expr;
}

struct Expr *
newUnsignedLiteralExpr(uint64_t uint)
{
    struct Expr *expr = newExpr();
    expr->kind = EK_UNSIGNED_LITERAL;
    expr->primary.literal.uint = uint;
    return expr;
}

struct Expr *
newIdentifierExpr(const struct UStr *identifier)
{
    assert(identifier);
    struct Expr *expr = newExpr();
    expr->kind = EK_IDENTIFIER;
    expr->primary.identifier = identifier;
    return expr;
}

struct Expr *
newUnaryExpr(enum ExprKind kind, const struct Expr *unary)
{
    assert(kind >= EK_UNARY && kind < EK_UNARY_END);
    assert(unary);

    struct Expr *expr = newExpr();
    expr->kind = kind;
    expr->unary = unary;
    return expr;
}

struct Expr *
newBinaryExpr(enum ExprKind kind, const struct Expr *left,
              const struct Expr *right)
{
    assert(kind >= EK_BINARY && kind < EK_BINARY_END);
    assert(left);
    assert(right);

    struct Expr *expr = newExpr();
    expr->kind = kind;
    expr->binary.left = left;
    expr->binary.right = right;
    return expr;
}

// destructor
void
deleteAllExpr(void)
{
    releaseMemRegion();
}

// methods

bool
isLValueExpr(const struct Expr *expr)
{
    return expr->kind == EK_IDENTIFIER;
}

bool
isConstExpr(const struct Expr *expr)
{
    return expr->kind == EK_UNSIGNED_LITERAL;
}

void
loadExprAddr(const struct Expr *expr, GenReg dest)
{
    assert(isLValueExpr(expr));
    if (expr->kind == EK_IDENTIFIER) {
        genLoadLabel(expr->primary.identifier->cstr, dest);
        return;
    }
    fprintf(stderr, "loadExprAddr: kind = %d", expr->kind);
    assert(0);
}

static enum GenOp
makeOp3r(enum ExprKind kind)
{
    switch (kind) {
        case EK_ADD:
            return GEN_ADD_R;
        case EK_SUB:
            return GEN_SUB_R;
        case EK_MUL:
            return GEN_IMUL_R;
        case EK_DIV:
            return GEN_DIV_R;
        case EK_MOD:
            return GEN_MOD_R;
        default:
            fprintf(stderr, "makeOp3r: kind = %d", kind);
            finalizeExit(1);
            return 0; // never reached
    }
}

static enum GenOp
makeOp3i(enum ExprKind kind)
{
    switch (kind) {
        case EK_ADD:
            return GEN_ADD_I;
        case EK_SUB:
            return GEN_SUB_I;
        case EK_MUL:
            return GEN_IMUL_I;
        case EK_DIV:
            return GEN_DIV_I;
        case EK_MOD:
            return GEN_MOD_I;
        default:
            fprintf(stderr, "makeOp3i: kind = %d", kind);
            finalizeExit(1);
            return 0; // never reached
    }
}

void
loadExpr(const struct Expr *expr, GenReg dest)
{
    assert(expr);
    assert(expr->kind >= EK_BINARY && expr->kind < EK_PRIMARY_END);

    if (expr->kind >= EK_BINARY && expr->kind < EK_BINARY_END) {
        const struct Expr *left = expr->binary.left;
        const struct Expr *right = expr->binary.right;
        switch (expr->kind) {
            case EK_ADD:
            case EK_SUB:
            case EK_MUL:
            case EK_DIV:
            case EK_MOD:
                {
                    loadExpr(left, dest);
                    if (isConstExpr(right)) {
                        uint64_t rVal = right->primary.literal.uint;
                        genOp3i(makeOp3i(expr->kind), rVal, dest, dest);
                    } else {
                        GenReg tmp = genGetReg();
                        loadExpr(right, tmp);
                        genOp3r(makeOp3r(expr->kind), tmp, dest, dest);
                        genUngetReg(tmp);
                    }
                }
                return;
            case EK_ASSIGN:
                {
                    assert(isLValueExpr(left));
                    loadExpr(right, dest);
                    GenReg addr = genGetReg();
                    genLoadLabel(left->primary.identifier->cstr, addr);
                    genStore(dest, addr);
                    genUngetReg(addr);
                }
                return;
            default:
                ;
        }
    } else if (expr->kind >= EK_UNARY && expr->kind < EK_UNARY_END) {
        if (expr->kind == EK_UNARY_MINUS) {
            loadExpr(expr->unary, dest);
            genOp2r(GEN_UNARYMINUS_R, dest, dest);
            return;
        } else if (expr->kind == EK_UNARY_PLUS) {
            loadExpr(expr->unary, dest);
            return;
        }
    } else if (expr->kind == EK_IDENTIFIER) {
        genLoadLabel(expr->primary.identifier->cstr, dest);
        genFetch(dest, dest);
        return;
    } else if (expr->kind == EK_UNSIGNED_LITERAL) {
        genLoadUInt(expr->primary.literal.uint, dest);
        return;
    }
    fprintf(stderr, "loadExpr: internal error. kind = %d\n", expr->kind);
    finalizeExit(1);
}

static void
printIndent(size_t indent, FILE *out)
{
    for (size_t i = 0; i < indent * 4; ++i) {
        fprintf(out, " ");
    }
}

static void
printExprNode(const struct Expr *expr, size_t indent, FILE *out)
{
    assert(expr);

    printIndent(indent, out);
    if (expr->kind >= EK_BINARY && expr->kind < EK_UNARY_END) {
        switch (expr->kind) {
            case EK_ADD:
                fprintf(out, "[ +\n");
                return;
            case EK_ASSIGN:
                fprintf(out, "[ {=}\n");
                return;
            case EK_SUB:
                fprintf(out, "[ -\n");
                return;
            case EK_MUL:
                fprintf(out, "[ *\n");
                return;
            case EK_DIV:
                fprintf(out, "[ /\n");
                return;
            case EK_MOD:
                fprintf(out, "[ $\\bmod$\n");
                return;
            case EK_UNARY_MINUS:
                fprintf(out, "[ -\n");
                return;
            case EK_UNARY_PLUS:
                fprintf(out, "[ +\n");
                return;
            default:;
        }
    } else if (expr->kind == EK_UNSIGNED_LITERAL) {
        fprintf(out, "[ %" PRIu64 "]\n", expr->primary.literal.uint);
        return;
    } else if (expr->kind == EK_IDENTIFIER) {
        fprintf(out, "[ %s ]\n", expr->primary.identifier->cstr);
        return;
    }
    fprintf(stderr, "printExprNode: internal error. kind = %d\n", expr->kind);
    finalizeExit(1);
}

static void
printExprTree_(const struct Expr *expr, size_t indent, FILE *out)
{
    assert(expr);
    assert(expr->kind >= EK_BINARY && expr->kind < EK_PRIMARY_END);

    if (expr->kind >= EK_BINARY && expr->kind < EK_BINARY_END) {
        printExprNode(expr, indent, out);
        printExprTree_(expr->binary.left, indent + 1, out);
        printExprTree_(expr->binary.right, indent + 1, out);
        printIndent(indent, out);
        fprintf(out, "]\n");
    } else if (expr->kind >= EK_UNARY && expr->kind < EK_UNARY_END) {
        printExprNode(expr, indent, out);
        printExprTree_(expr->unary, indent + 1, out);
        printIndent(indent, out);
        fprintf(out, "]\n");
    } else {
        printExprNode(expr, indent, out);
    }
}

void
printExprTree(const struct Expr *expr, FILE *out)
{
    fprintf(out, "\\begin{center}\n");
    fprintf(out, "\\begin{forest}\n");
    fprintf(out, "for tree={draw,circle,calign=fixed edge angles}\n");
    printExprTree_(expr, 0, out);
    fprintf(out, "\\end{forest}\n");
    fprintf(out, "\\end{center}\n");
}

//------------------------------------------------------------------------------
// stuff for constant folding

static const struct Expr *
constFoldBinary(const struct Expr *expr)
{
    assert(expr);
    assert(expr->kind >= EK_BINARY && expr->kind < EK_BINARY_END);

    // const fold child nodes
    const struct Expr *left = constFoldExpr(expr->binary.left);
    const struct Expr *right = constFoldExpr(expr->binary.right);

    if (isConstExpr(left) && !isConstExpr(right)) {
        // swap operands if possible 
        switch (expr->kind) {
            case EK_ADD:
            case EK_MUL:
                {
                    expr = newBinaryExpr(expr->kind, right, left);
                    left = expr->binary.left;
                    right = expr->binary.right;
                }
                break;
            default:
                ;
        }
    }
    if (isConstExpr(right)) {
        // if right child node is constant, handle some special cases
        if (expr->kind == EK_ADD && right->primary.literal.uint == 0) {
            return left;
        }
        if (expr->kind == EK_MUL && right->primary.literal.uint == 1) {
            return left;
        }
        if (expr->kind == EK_MUL && right->primary.literal.uint == 0) {
            return newUnsignedLiteralExpr(0);
        }
        if (expr->kind == EK_DIV && right->primary.literal.uint == 1) {
            return left;
        }
        if (expr->kind == EK_MOD && right->primary.literal.uint == 1) {
            return newUnsignedLiteralExpr(0);
        }
    }
    if (!isConstExpr(left) || !isConstExpr(right)) {
        // nothing more can be done
        if (left == expr->binary.left && right == expr->binary.right) {
            return expr;
        }
        return newBinaryExpr(expr->kind, left, right);
    }
    // handle cases where node can be completely folded
    // NOTE: here we exploit that in our case constants are always unsigned
    uint64_t leftVal = left->primary.literal.uint;
    uint64_t rightVal = right->primary.literal.uint;
    switch (expr->kind) {
        case EK_ADD:
            return newUnsignedLiteralExpr(leftVal + rightVal);
        case EK_SUB:
            return newUnsignedLiteralExpr(leftVal - rightVal);
        case EK_MUL:
            return newUnsignedLiteralExpr(leftVal * rightVal);
        case EK_DIV:
            return newUnsignedLiteralExpr(leftVal / rightVal);
        case EK_MOD:
            return newUnsignedLiteralExpr(leftVal % rightVal);
        case EK_ASSIGN:
            assert(0); // internal error
            return 0; // prevent warning (never reached in debug mode)
        default:
            // special case not handled
            fprintf(stderr, "warning: constFoldBinary currently does not "
                    " handle expression kind %d\n", expr->kind);
            return newBinaryExpr(expr->kind, left, right);
    }
}

static const struct Expr *
constFoldUnary(const struct Expr *expr)
{
    assert(expr);
    assert(expr->kind >= EK_UNARY && expr->kind < EK_UNARY_END);

    // const fold child node
    const struct Expr *unary = constFoldExpr(expr->unary);

    // handle all cases where folding is not possible
    if (!isConstExpr(unary)) {
        if (unary == expr->unary) {
            return expr;
        }
        return newUnaryExpr(expr->kind, unary);
    }
    // otherwise return folded expression node
    // NOTE: here we exploit that in our case constants are always unsigned
    switch (expr->kind) {
        case EK_UNARY_PLUS:
            return unary;
        case EK_UNARY_MINUS:
            return newUnsignedLiteralExpr(-unary->primary.literal.uint);
        default:
            // special case not handled
            fprintf(stderr, "warning: constFoldUnary currently does not "
                    " handle expression kind %d\n", expr->kind);
            return newUnaryExpr(expr->kind, unary);
    }
}

const struct Expr *
constFoldExpr(const struct Expr *expr)
{
    assert(expr);
    assert(expr->kind >= EK_BINARY && expr->kind < EK_PRIMARY_END);

    if (expr->kind >= EK_BINARY && expr->kind < EK_BINARY_END) {
        return constFoldBinary(expr);
    } else if (expr->kind >= EK_UNARY && expr->kind < EK_UNARY_END) {
        return constFoldUnary(expr);
    }
    return expr;
}
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#ifndef ABC_EXPR_H
#define ABC_EXPR_H

#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>

#include "gen.h"
#include "ustr.h"

enum ExprKind
{
    EK_BINARY,
    // binary expression
    EK_ADD = EK_BINARY,
    EK_ASSIGN,
    EK_SUB,
    EK_MUL,
    EK_DIV,
    EK_MOD,
    EK_BINARY_END,

    EK_UNARY = EK_BINARY_END,
    // unary expression
    EK_UNARY_MINUS = EK_UNARY,
    EK_UNARY_PLUS,
    EK_UNARY_END,

    EK_PRIMARY = EK_UNARY_END,
    // primary expression
    EK_UNSIGNED_LITERAL = EK_PRIMARY,
    EK_IDENTIFIER,
    EK_PRIMARY_END,
};

struct Expr;

// constructors
struct Expr *newUnsignedLiteralExpr(uint64_t uint);
struct Expr *newIdentifierExpr(const struct UStr *identifier);
struct Expr *newUnaryExpr(enum ExprKind kind, const struct Expr *unary);
struct Expr *newBinaryExpr(enum ExprKind kind, const struct Expr *left,
                           const struct Expr *right);

// destructor
void deleteAllExpr(void);

// methods
bool isLValueExpr(const struct Expr *expr);
bool isConstExpr(const struct Expr *expr);
void loadExprAddr(const struct Expr *expr, GenReg dest);
void loadExpr(const struct Expr *expr, GenReg dest);
void printExprTree(const struct Expr *expr, FILE *out);
const struct Expr *constFoldExpr(const struct Expr *expr);

#endif // ABC_EXPR_H
#include <stdlib.h>
#include <stdio.h>

#include "finalize.h"

struct Node
{
    struct Node *next;
    void (*callback)(void);
};

static struct Node *list;

void
finalizeRegister(void (*callback)(void))
{
    struct Node *n = malloc(sizeof(*n));
    if (!n) {
        fprintf(stderr, "finalizeRegister: out of memory\n");
        finalizeExit(1);
    }

    // initialize list node and prepend to list
    n->next = list;
    n->callback = callback;
    list = n;
}

void
finalize(void)
{
    for (struct Node *n = list, *next; n; n = next) {
        // keep copy of pointer to next node
        next = n->next;

        // callback
        n->callback();

        // free node itself
        free(n);
    }
}

void
finalizeExit(int code)
{
    finalize();
    exit(code);
}
#ifndef ABC_FINALIZE_H
#define ABC_FINALIZE_H

void finalizeRegister(void (*callback)(void));
void finalize(void);
void finalizeExit(int code);

#endif // ABC_FINALIZE_H
#include <assert.h>
#include <inttypes.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>

#include "finalize.h"
#include "gen.h"
#include "ustr.h"

enum {
    ZERO = 0,
    FP = 1,
    SP = 2,
    RET_ADDR = 3,
    FUNC_ADDR = 4,
};


// output needs to be set before using the rest of the interface
static FILE *out;

void
genSetOutput(FILE *out_)
{
    out = out_;
}

// acquire / release register
static bool usedReg[256];

static void
check(void)
{
    GenReg reg = 0;
    do {
        if (usedReg[reg]) {
            fprintf(stderr, "warning: register %" PRIu8 " not released\n", reg);
        }
        ++reg;
    } while (reg != 255);
}

GenReg
genGetReg(void)
{
    static bool first = true;
    if (first) {
        first = false;
        finalizeRegister(check);
    }
    GenReg reg = 5;
    while (usedReg[++reg]) {
    }
    if (reg < 6) {
        fprintf(stderr, "genGetReg: out of registers\n");
        finalizeExit(1);
    }
    usedReg[reg] = true;
    return reg;
}

void
genUngetReg(GenReg reg)
{
    assert(usedReg[reg]);
    usedReg[reg] = false;
}

// internal print stuff
static void
printInstr(const char *instr)
{
    assert(out);
    fprintf(out, "\t%s ", instr);
}

static void
printUInt_(uint64_t val, const char *prefix, const char *suffix)
{
    assert(out);
    if (prefix) {
        fprintf(out, "%s", prefix);
    }
    fprintf(out, "0x%" PRIx64, val);
    if (suffix) {
        fprintf(out, "%s", suffix);
    }
}

static void
printUInt(uint64_t val)
{
    printUInt_(val, 0, 0);
}

static void
printReg(GenReg reg)
{
    assert(out);
    fprintf(out, "%%%" PRIu8, reg);
}

static void
printMem(GenReg reg)
{
    assert(out);
    fprintf(out, "(%%%" PRIu8 ")", reg);
}

static void
printDisplMem(int64_t displ, GenReg reg)
{
    assert(out);
    fprintf(out, "%" PRId64 "(%%%" PRIu8 ")", displ, reg);
}

static void
printLabel_(const char *label, const char *prefix, const char *suffix)
{
    assert(out);
    if (prefix) {
        fprintf(out, "%s", prefix);
    }
    fprintf(out, "%s", label);
    if (suffix) {
        fprintf(out, "%s", suffix);
    }
}

static void
printLabelDef(const char *label)
{
    assert(out);
    fprintf(out, "%s:\n", label);
}

static void
printComma(void)
{
    assert(out);
    fprintf(out, ", ");
}

static void
printNl(void)
{
    assert(out);
    fprintf(out, "\n");
}

// header / footer
void
genHeader(void)
{
    printInstr("ldzwq");
    printUInt(0);
    printComma();
    printReg(SP);
    printNl();
}

void
genFooter(void)
{
    printInstr("halt");
    printReg(0);
    printNl();
}

// set active segment
void
genText(void)
{
    printInstr(".text");
    printNl();
}

void
genData(void)
{
    printInstr(".data");
    printNl();
}

void
genBSS(void)
{
    printInstr(".bss");
    printNl();
}

// generate data
void
genLabeledUInt(const char *label, uint64_t val)
{
    printInstr(".align");
    printUInt(8);
    printNl();
    printLabelDef(label);
    printInstr(".quad");
    printUInt(val);
    printNl();
}

// load literal into register
void
genLoadUInt(uint64_t val, GenReg reg)
{
    if (val <= 0xFFFFu) {
        printInstr("ldzwq");
        printUInt(val);
        printComma();
        printReg(reg);
        printNl();
    } else if (val <= 0xFFFFFFFFu) {
        printInstr("ldzwq");
        printUInt_(val, "@w1(", ")");
        printComma();
        printReg(reg);
        printNl();
        printInstr("shldwq");
        printUInt_(val, "@w0(", ")");
        printComma();
        printReg(reg);
        printNl();
    } else {
        printInstr("ldzwq");
        printUInt_(val, "@w3(", ")");
        printComma();
        printReg(reg);
        printNl();
        printInstr("shldwq");
        printUInt_(val, "@w2(", ")");
        printComma();
        printReg(reg);
        printNl();
        printInstr("shldwq");
        printUInt_(val, "@w1(", ")");
        printComma();
        printReg(reg);
        printNl();
        printInstr("shldwq");
        printUInt_(val, "@w0(", ")");
        printComma();
        printReg(reg);
        printNl();
    }
}

void
genLoadLabel(const char *label, GenReg reg)
{
    printInstr("ldzwq");
    printLabel_(label, "@w3(", ")");
    printComma();
    printReg(reg);
    printNl();
    printInstr("shldwq");
    printLabel_(label, "@w2(", ")");
    printComma();
    printReg(reg);
    printNl();
    printInstr("shldwq");
    printLabel_(label, "@w1(", ")");
    printComma();
    printReg(reg);
    printNl();
    printInstr("shldwq");
    printLabel_(label, "@w0(", ")");
    printComma();
    printReg(reg);
    printNl();
}

// load / fetch quad word (8 bytes)
void
genFetch(GenReg addr, GenReg dest)
{
    printInstr("movq");
    printMem(addr);
    printComma();
    printReg(dest);
    printNl();
}

void
genFetchDispl(int64_t displ, GenReg addr, GenReg dest)
{
    if (displ >= -128 && displ < 128) {
        printInstr("movq");
        printDisplMem(displ, addr);
        printComma();
        printReg(dest);
        printNl();
    } else {
        // acquire new register and use it to store displ + addr
        GenReg displAddr = genGetReg();
        genLoadUInt(displ, displAddr); // ok, we use two's complenent for signed
        genOp3r(GEN_ADD_R, displAddr, addr, displAddr);
        genFetch(displAddr, dest);
        genUngetReg(displAddr);
    }
}


void
genStore(GenReg src, GenReg addr)
{
    printInstr("movq");
    printReg(src);
    printComma();
    printMem(addr);
    printNl();
}

void
genStoreDispl(GenReg src, int64_t displ, GenReg addr)
{
    if (displ >= -128 && displ < 128) {
        printInstr("movq");
        printReg(src);
        printComma();
        printDisplMem(displ, addr);
        printNl();
    } else {
        // acquire new register and use it to store displ + addr
        GenReg displAddr = genGetReg();
        genLoadUInt(displ, displAddr); // ok, we use two's complenent for signed
        genOp3r(GEN_ADD_R, displAddr, addr, displAddr);
        genStore(src, displAddr);
        genUngetReg(displAddr);
    }
}

// 2 address instructions
void
genOp2r(enum GenOp op, GenReg reg0, GenReg reg1)
{
    assert(op >= GEN_OP2R_BEGIN && op < GEN_OP2R_END);

    if (op == GEN_UNARYMINUS_R) {
        printInstr("subq");
        printReg(reg0);
        printComma();
        printReg(ZERO);
        printComma();
        printReg(reg1);
        printNl();
        return;
    } else {
        assert(0);
    }
}

// 3 address instructions
void
genOp3r(enum GenOp op, GenReg reg0, GenReg reg1, GenReg reg2)
{
    assert(op >= GEN_OP3R_BEGIN && op < GEN_OP3R_END);

    GenReg reg2_ = reg2;

    if (op == GEN_ADD_R) {
        printInstr("addq");
    } else if (op == GEN_SUB_R) {
        printInstr("subq");
    } else if (op == GEN_IMUL_R) {
        printInstr("imulq");
    } else if (op == GEN_DIV_R || op == GEN_MOD_R) {
        printInstr("divq");
        reg2 = 4;
    } else {
        assert(0);
    }
    printReg(reg0);
    printComma();
    printReg(reg1);
    printComma();
    printReg(reg2);
    printNl();

    if (op == GEN_DIV_R || op == GEN_MOD_R) {
        printInstr("movq");
        printReg(op == GEN_DIV_R ? 4 : 5);
        printComma();
        printReg(reg2_);
        printNl();
    }
}

void
genOp3i(enum GenOp op, uint64_t val, GenReg reg1, GenReg reg2)
{
    assert(op >= GEN_OP3I_BEGIN && op < GEN_OP3I_END);

    if (val > 255) {
        GenReg tmp = genGetReg();
        genLoadUInt(val, tmp);
        genOp3r(op - GEN_OP3I_BEGIN + GEN_OP3R_BEGIN, tmp, reg1, reg2);
        genUngetReg(tmp);
        return;
    }

    GenReg reg2_ = reg2;

    if (op == GEN_ADD_I) {
        printInstr("addq");
    } else if (op == GEN_SUB_I) {
        printInstr("subq");
    } else if (op == GEN_IMUL_I) {
        printInstr("imulq");
    } else if (op == GEN_DIV_I || op == GEN_MOD_I) {
        printInstr("divq");
        reg2 = 4;
    } else {
        assert(0);
    }
    printUInt(val);
    printComma();
    printReg(reg1);
    printComma();
    printReg(reg2);
    printNl();

    if (op == GEN_DIV_I || op == GEN_MOD_I) {
        printInstr("movq");
        printReg(op == GEN_DIV_I ? 4 : 5);
        printComma();
        printReg(reg2_);
        printNl();
    }
}

// IO hack

void
genOutHack(GenReg src)
{
    printInstr("subq");
    printUInt(8 * (3 + 1));
    printComma();
    printReg(SP);
    printComma();
    printReg(SP);
    printNl();

    // store first function parameter
    genStoreDispl(src, 24, SP);

    // call function
    genLoadLabel("print_uint64", FUNC_ADDR);
    printInstr("call");
    printReg(FUNC_ADDR);
    printComma();
    printReg(RET_ADDR);
    printNl();

    printInstr("addq");
    printUInt(8 * (3 + 1));
    printComma();
    printReg(SP);
    printComma();
    printReg(SP);
    printNl();

    // print an additional newline
    printInstr("putc");
    printUInt('\n');
    printNl();
}

void
genInHack(GenReg dest)
{
    printInstr("subq");
    printUInt(8 * (3 + 0));
    printComma();
    printReg(SP);
    printComma();
    printReg(SP);
    printNl();

    genLoadLabel("get_uint64", FUNC_ADDR);

    printInstr("call");
    printReg(FUNC_ADDR);
    printComma();
    printReg(RET_ADDR);
    printNl();

    // fetch return value
    genFetchDispl(16, SP, dest);

    printInstr("addq");
    printUInt(8 * (3 + 0));
    printComma();
    printReg(SP);
    printComma();
    printReg(SP);
    printNl();
}
#ifndef ABC_GEN_H
#define ABC_GEN_H

#include <stdint.h>
#include <stdio.h>

// register type
typedef uint8_t GenReg;

// output needs to be set before using the rest of the interface
void genSetOutput(FILE *out);

// acquire / release register
GenReg genGetReg(void);
void genUngetReg(GenReg reg);

// header / footer
void genHeader(void);
void genFooter(void);

// set active segment
void genText(void);
void genData(void);
void genBSS(void);

// generate data
void genLabeledUInt(const char *label, uint64_t val);

// load literal into register
void genLoadUInt(uint64_t val, GenReg reg);
void genLoadLabel(const char *label, GenReg reg);

// fetch / store quad word (8 bytes)
void genFetch(GenReg addr, GenReg dest);
void genFetchDispl(int64_t displ, GenReg addr, GenReg dest);
void genStore(GenReg src, GenReg addr);
void genStoreDispl(GenReg src, int64_t displ, GenReg addr);

// supported instruction
enum GenOp
{
    GEN_OP2R_BEGIN,
    GEN_UNARYMINUS_R,
    GEN_OP2R_END,

    GEN_OP3R_BEGIN = GEN_OP2R_END,
    GEN_ADD_R = GEN_OP3R_BEGIN,   // addition
    GEN_SUB_R,                    // subtraction
    GEN_IMUL_R,                   // multiplication
    GEN_DIV_R,                    // division
    GEN_MOD_R,                    // modulo
    GEN_OP3R_END,

    GEN_OP3I_BEGIN = GEN_OP3R_END,
    GEN_ADD_I = GEN_OP3I_BEGIN,
    GEN_SUB_I,
    GEN_IMUL_I,
    GEN_DIV_I,
    GEN_MOD_I,
    GEN_OP3I_END,
};

// 2 address instructions
void genOp2r(enum GenOp op, GenReg reg0, GenReg reg1);

// 3 address instructions
void genOp3r(enum GenOp op, GenReg reg0, GenReg reg1, GenReg reg2);
void genOp3i(enum GenOp op, uint64_t val, GenReg reg1, GenReg reg2);

// IO hack
void genOutHack(GenReg src);
void genInHack(GenReg dest);

#endif // ABC_GEN_H
#include <stdbool.h>
#include <stdio.h>

#include "finalize.h"
#include "lexer.h"

struct Token token;

static void
cleanup(void)
{
    releaseStr(&token.val);
}

//------------------------------------------------------------------------------

// position of current character ch
static struct TokenPos curr = {
    1,
    0,
};

static int ch;

static int
nextCh(void)
{
    ++curr.col;
    ch = getchar();
    if (ch == '\n') {
        ++curr.line;
        curr.col = 0;
    }
    return ch;
}

static bool
isWhiteSpace(int ch)
{
    return ch == ' ' || ch == '\t';
}

static bool
isDecDigit(int ch)
{
    return ch >= '0' && ch <= '9';
}

static bool
isOctDigit(int ch)
{
    return ch >= '0' && ch <= '7';
}

static bool
isHexDigit(int ch)
{
    return isDecDigit(ch) || (ch >= 'a' && ch <= 'f') ||
           (ch >= 'A' && ch <= 'F');
}

static bool
isLetter(int ch)
{
    return ((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A' && ch <= 'Z')) ||
           ch == '_';
}

enum TokenKind
getToken(void)
{
    static bool first = true;
    if (first) {
        first = false;
        finalizeRegister(cleanup);
    }

    // init ch, skip white spaces and newlines
    while (ch == 0 || isWhiteSpace(ch) || ch == '\n') {
        nextCh();
    }

    token.pos.line = curr.line;
    token.pos.col = curr.col;

    clearStr(&token.val);

    if (ch == EOF) {
        return token.kind = EOI;
    } else if (isDecDigit(ch)) {
        // parse literal
        if (ch == '0') {
            appendCharToStr(&token.val, ch);
            nextCh();
            if (ch == 'x') {
                appendCharToStr(&token.val, ch);
                nextCh();
                if (isHexDigit(ch)) {
                    while (isHexDigit(ch)) {
                        appendCharToStr(&token.val, ch);
                        nextCh();
                    }
                    return token.kind = HEX_LITERAL;
                }
                return token.kind = BAD_TOKEN;
            }
            while (isOctDigit(ch)) {
                appendCharToStr(&token.val, ch);
                nextCh();
            }
            return token.kind = OCT_LITERAL;
        } else if (isDecDigit(ch)) {
            while (isDecDigit(ch)) {
                appendCharToStr(&token.val, ch);
                nextCh();
            }
            return token.kind = DEC_LITERAL;
        }
    } else if (ch == '&') {
        appendCharToStr(&token.val, ch);
        nextCh();
        if (ch == '&') {
            appendCharToStr(&token.val, ch);
            nextCh();
            return token.kind = AMPERSAND2;
        }
        return token.kind = AMPERSAND;
    } else if (ch == '*') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = ASTERISK;
    } else if (ch == '^') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = CARET;
    } else if (ch == '$') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = DOLLAR;
    } else if (ch == '=') {
        appendCharToStr(&token.val, ch);
        nextCh();
        if (ch == '=') {
            appendCharToStr(&token.val, ch);
            nextCh();
            return token.kind = EQUAL2;
        }
        return token.kind = EQUAL;
    } else if (ch == '!') {
        appendCharToStr(&token.val, ch);
        nextCh();
        if (ch == '=') {
            appendCharToStr(&token.val, ch);
            nextCh();
            return token.kind = NOT_EQUAL;
        }
        return token.kind = NOT;
    } else if (ch == '>') {
        appendCharToStr(&token.val, ch);
        nextCh();
        if (ch == '=') {
            appendCharToStr(&token.val, ch);
            nextCh();
            return token.kind = GREATER_EQUAL;
        }
        return token.kind = GREATER;
    } else if (ch == '<') {
        appendCharToStr(&token.val, ch);
        nextCh();
        if (ch == '=') {
            appendCharToStr(&token.val, ch);
            nextCh();
            return token.kind = LESS_EQUAL;
        }
        return token.kind = LESS;
    } else if (ch == '(') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = LPAREN;
    } else if (ch == '-') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = MINUS;
    } else if (ch == '%') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = PERCENT;
    } else if (ch == '+') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = PLUS;
    } else if (ch == ')') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = RPAREN;
    } else if (ch == ';') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = SEMICOLON;
    } else if (ch == '/') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = SLASH;
    } else if (ch == '~') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = TILDE;
    } else if (ch == '|') {
        appendCharToStr(&token.val, ch);
        nextCh();
        if (ch == '|') {
            appendCharToStr(&token.val, ch);
            nextCh();
            return token.kind = VBAR2;
        }
        return token.kind = VBAR;
    } else if (isLetter(ch)) {
        do {
            appendCharToStr(&token.val, ch);
            nextCh();
        } while (isLetter(ch) || isDecDigit(ch));
        return token.kind = IDENTIFIER;
    }

    nextCh();
    return token.kind = BAD_TOKEN;
}
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#ifndef ABC_LEXER_H
#define ABC_LEXER_H

#include <stddef.h>

#include "str.h"
#include "tokenkind.h"

enum TokenKind getToken(void);

struct Token
{
    enum TokenKind kind;
    struct TokenPos
    {
        size_t line, col;
    } pos;
    struct Str val;
};

extern struct Token token;

#endif // ABC_LEXER_H
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
CPPFLAGS += -Wall -Wcast-qual
LDFLAGS += -lm
#
# patch: If user has not defined CC and default value does not exist use gcc
#
ifeq ($(origin CC),default)
    cc_check := $(shell $(CC) -v > /dev/null 2>&1 && echo "sane")
    ifneq ($(strip $(cc_check)),sane)
        CC := gcc
    endif
endif

#
# List of files that need to be generated before compilation and rules to
# generate them
#

generated_files := gen_tokenkind.h gen_strtokenkind.c 

gen_tokenkind.h gen_strtokenkind.c : tokenkind.txt | xgen_tokenkind
        ./xgen_tokenkind $^ gen_tokenkind.h gen_strtokenkind.c
#
# Define list of source files, object files, targets, etc
#

# all source files
src :=\
    $(filter-out gen_%,\
        $(wildcard *.c))

# all object files
obj :=\
    $(patsubst %.c,%.o,\
        $(src))

# all targets (test programs)
target :=\
    $(filter xtest%,\
        $(patsubst %.c,%,\
            $(src)))

# all generators for source files
generator :=\
    $(filter xgen%,\
        $(patsubst %.c,%,\
            $(src)))

# objects that are required by the targets
lib.o :=\
    $(filter-out xtest% xgen%,\
        $(obj))

# dependency file that will be generated by compiler
deps :=\
    $(patsubst %,%.d,\
        $(src))

# dependency file leftovers of gone source files
obsolete.deps:=\
    $(filter-out $(deps),\
        $(wildcard *.c.d))


#
# Build rules
#
.PHONY: all
.DEFAULT_GOAL := all
all: $(target) $(obj) $(generator)

# rule for removing obsolete dependency files
.PHONY: $(obsolete.deps)
$(obsolete.deps) :
        $(RM) $(obsolete.deps)

# delete implicit rule for building an executable directly from its source file
% : %.c

# rule for source file generators
xgen% : xgen%.c
        $(CC) -o $@ $^ $(LDFLAGS)

# our rule: to build target link its object file against library object files
%: %.o $(lib.o) | $(obsolete.deps)
        $(CC) -o $@ $^ $(LDFLAGS)

# our rule to build objects: also generate a dependency file
%.o: %.c | $(obsolete.deps) $(generated_files)
        $(CC) -c $(CPPFLAGS) $(CFLAGS) -MT $@ -MMD -MP -MF $<.d $<

.PHONY: clean
clean:
        $(RM) $(target) $(generator) $(obj) $(deps) $(obsolete.deps)
        $(RM) $(generated_files)

#
# Include dependencies (if already generated)
#
-include $(deps)
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
#include <stdalign.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>

#include "finalize.h"
#include "memregion.h"

enum {
    EXTRA_CAPACITY = 64,
};

union BlockHeader {
    struct Block
    {
        struct Block *next;
        char *free, *end;
    } block;
    max_align_t align;
};

static struct Block head, *tail = &head, *released;

static void
cleanup(void)
{
    for (struct Block *b = head.next, *next; b; b = next) {
        next = b->next;
        free(b);
    }
}

static void *
allocBlock(size_t capacity)
{
    static bool first = true;
    if (first) {
        first = false;
        finalizeRegister(cleanup);
    }

    size_t size = sizeof(union BlockHeader) + capacity;
    struct Block *b = malloc(size);
    if (!b) {
        fprintf(stderr, "allocBlock: out of memory\n");
        finalizeExit(1);
    }
    b->end = (char *)b + size;
    return b;
}

static size_t
roundUp(size_t a, size_t b)
{
    // will here be optimized to: (a + b - 1) & ~(b - 1);
    return (a + b - 1) / b * b;
}

void *
allocFromMemRegion(size_t numBytes)
{
    numBytes = roundUp(numBytes, alignof(max_align_t));

    while (tail->free + numBytes > tail->end) {
        if ((tail->next = released) != 0) {
            released = released->next;
        } else {
            tail->next = allocBlock(numBytes + EXTRA_CAPACITY);
        }
        tail = tail->next;
        tail->free = (char *)tail + sizeof(union BlockHeader);
        tail->next = 0;
    }
    tail->free += numBytes;
    return tail->free - numBytes;
}

void
releaseMemRegion(void)
{
    tail->next = released;
    released = head.next;
    head.next = 0;
    tail = &head;
}

void
printInfoMemRegion(void)
{
    printf("MemRegion:\n");
    printf("used blocks:\n");
    for (const struct Block *b = head.next; b; b = b->next) {
        size_t cap = b->end - (const char *)b - sizeof(union BlockHeader);
        size_t avail = b->end - b->free;
        printf("  block at address %p, cap = %zu, avail = %zu\n",
               (const void *)b, cap, avail);
    }
    printf("released blocks:\n");
    for (const struct Block *b = released; b; b = b->next) {
        size_t cap = b->end - (const char *)b - sizeof(union BlockHeader);
        size_t avail = b->end - b->free;
        printf("  block at address %p, cap = %zu, avail = %zu\n",
               (const void *)b, cap, avail);
    }
    printf("\n");
}
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
#ifndef ABC_MEMREGION_H
#define ABC_MEMREGION_H

#include <stddef.h>

void *allocFromMemRegion(size_t numBytes);
void releaseMemRegion(void);
void printInfoMemRegion(void);

#endif // ABC_MEMREGION_H
#include <assert.h>
#include <inttypes.h>
#include <math.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>

#include "expr.h"
#include "finalize.h"
#include "gen.h"
#include "lexer.h"
#include "parser.h"
#include "sym.h"
#include "tokenkind.h"

// set output for code generation

static FILE *out;

void
setParserOut(FILE *out_)
{
    out = out_;
    genSetOutput(out);
}

// for log support

static FILE *logOut;

void
setParserLog(FILE *logOut_)
{
    logOut = logOut_;
}

// error handling
void
expectedError(const char *expectedStr)
{
    fprintf(stderr, "%zu.%zu: error expected '%s' got '%s'\n", token.pos.line,
            token.pos.col, expectedStr, strTokenKind(token.kind));
    finalizeExit(1);
}

void
errorAtPos(struct TokenPos pos, const char *msg)
{
    fprintf(stderr, "%zu.%zu: %s\n", pos.line, pos.col, msg);
    finalizeExit(1);
}

void
expected(enum TokenKind tokenKind)
{
    if (tokenKind != token.kind) {
        expectedError(strTokenKind(tokenKind));
    }
}

// parse functions
void parseExprStatement(void);
const struct Expr *parseAssignmentExpr(void);
const struct Expr *parseExpr(void);
const struct Expr *parseTerm(void);
const struct Expr *parseUnaryExpr(void);
const struct Expr *parseFactor(void);

void
parse(void)
{
    while (token.kind != EOI) {
        if (token.kind == DOLLAR) {
            getToken();
            if (token.kind == GREATER) {
                getToken();
                // read unsigned integer
                GenReg dest = genGetReg(), val = genGetReg();
                struct TokenPos pos = token.pos;
                const struct Expr *expr = parseExpr();
                if (!isLValueExpr(expr)) {
                    errorAtPos(pos, "L-value expected");
                }
                genInHack(val);
                loadExprAddr(expr, dest);
                genStore(val, dest);
                genUngetReg(dest);
                genUngetReg(val);
            } else if (token.kind == LESS) {
                getToken();
                // print unsigned integer
                GenReg src = genGetReg();
                const struct Expr *expr = parseAssignmentExpr();
                loadExpr(expr, src);
                genOutHack(src);
                genUngetReg(src);
            } else {
                expectedError("'>' or '<'");
            }
            expected(SEMICOLON);
            getToken();
        } else {
            parseExprStatement();
        }
    }
}

void
parseExprStatement(void)
{
    GenReg dest = genGetReg();
    const struct Expr *expr = parseAssignmentExpr();
    const struct Expr *folded = constFoldExpr(expr);
    if (logOut) {
        fprintf(logOut, "\\section{}\n");
        printExprTree(expr, logOut);
        if (folded != expr) {
            fprintf(logOut, "after constant folding:\n");
            printExprTree(folded, logOut);
        }
        genSetOutput(logOut);
        fprintf(logOut, "\\begin{lstlisting}\n");
        loadExpr(folded, dest);
        fprintf(logOut, "\\end{lstlisting}\n");
        genSetOutput(out);
    }
    loadExpr(folded, dest);
    deleteAllExpr();
    genUngetReg(dest);
    expected(SEMICOLON);
    getToken();
}

const struct Expr *
parseAssignmentExpr(void)
{
    struct TokenPos pos = token.pos;
    const struct Expr *expr = parseExpr();
    while (token.kind == EQUAL) {
        if (!isLValueExpr(expr)) {
            // instead of many error functions we need a one error handling
            // function that is more flexible to use -> CBE about ellipse
            errorAtPos(pos, "L-value expected");
        }
        getToken();
        const struct Expr *exprRight = parseAssignmentExpr();
        expr = newBinaryExpr(EK_ASSIGN, expr, exprRight);
    }
    return expr;
}

const struct Expr *
parseExpr(void)
{
    const struct Expr *expr = parseTerm();
    while (token.kind == PLUS || token.kind == MINUS) {
        enum TokenKind op = token.kind;
        getToken();
        const struct Expr *exprRight = parseTerm();
        if (op == PLUS) {
            expr = newBinaryExpr(EK_ADD, expr, exprRight);
        } else {
            expr = newBinaryExpr(EK_SUB, expr, exprRight);
        }
    }
    return expr;
}

const struct Expr *
parseTerm(void)
{
    const struct Expr *expr = parseUnaryExpr();
    while (token.kind == ASTERISK || token.kind == SLASH ||
           token.kind == PERCENT) {
        enum TokenKind op = token.kind;
        getToken();
        const struct Expr *exprRight = parseUnaryExpr();
        if (op == ASTERISK) {
            expr = newBinaryExpr(EK_MUL, expr, exprRight);
        } else if (op == SLASH) {
            expr = newBinaryExpr(EK_DIV, expr, exprRight);
        } else if (op == PERCENT) {
            expr = newBinaryExpr(EK_MOD, expr, exprRight);
        }
    }
    return expr;
}

const struct Expr *
parseUnaryExpr(void)
{
    if (token.kind == PLUS || token.kind == MINUS) {
        enum TokenKind op = token.kind;
        getToken();
        const struct Expr *expr = parseUnaryExpr();
        if (op == MINUS) {
            return newUnaryExpr(EK_UNARY_MINUS, expr);
        }
        return newUnaryExpr(EK_UNARY_PLUS, expr);
    }
    return parseFactor();
}

const struct Expr *
parseFactor(void)
{
    if (token.kind == IDENTIFIER) {
        const struct UStr *identifier = UStrAdd(token.val.cstr);
        struct Sym *sym = SymFind(identifier);
        if (!sym) {
            SymAdd(identifier);
        }
        getToken();
        return newIdentifierExpr(identifier);
    } else if (token.kind == DEC_LITERAL) {
        uint64_t uint = strtoull(token.val.cstr, 0, 10);
        getToken();
        return newUnsignedLiteralExpr(uint);
    } else if (token.kind == HEX_LITERAL) {
        uint64_t uint = strtoull(token.val.cstr, 0, 10);
        getToken();
        return newUnsignedLiteralExpr(uint);
    } else if (token.kind == OCT_LITERAL) {
        uint64_t uint = strtoull(token.val.cstr, 0, 10);
        getToken();
        return newUnsignedLiteralExpr(uint);
    } else if (token.kind == LPAREN) {
        getToken();
        const struct Expr *expr = parseAssignmentExpr();
        expected(RPAREN);
        getToken();
        return expr;
    } else {
        expectedError("factor");
        return 0; // never reached
    }
}
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
#ifndef ABC_PARSER_H
#define ABC_PARSER_H

#include <stdio.h>

void setParserOut(FILE *out);
void setParserLog(FILE *out);
void parse(void);

#endif // ABC_PARSER_H
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>

#include "finalize.h"
#include "str.h"

enum
{
    MIN_CAPACITY = 8
};

void
releaseStr(const struct Str *str)
{
    free((char *)(uintptr_t)str->cstr);
}

void
clearStr(struct Str *str)
{
    if (str->capacity == 0) {
        str->end = str->cstr = malloc(MIN_CAPACITY);
        if (!str->cstr) {
            fprintf(stderr, "clearStr: out of memory\n");
            finalizeExit(1);
        }
        str->capacity = MIN_CAPACITY;
    }
    *(str->end = str->cstr) = 0;
}

void
appendCharToStr(struct Str *str, char c)
{
    size_t len = str->end - str->cstr; // length without terminating 0

    // check if another character and 0 byte fits into string
    if (len + 2 > str->capacity) {
        str->capacity = len + 2;
        if (str->capacity < MIN_CAPACITY) {
            str->capacity = MIN_CAPACITY;
        } else {
            str->capacity *= 2;
        }
        str->cstr = realloc(str->cstr, str->capacity);
        if (!str->cstr) {
            fprintf(stderr, "appendCharToStr: out of memory\n");
            finalizeExit(1);
        }
        str->end = str->cstr + len;
    }

    *str->end++ = c;
    *str->end = 0;
}
#ifndef ABC_STR_H
#define ABC_STR_H

#include <stddef.h>

struct Str
{
    char *cstr, *end;
    size_t capacity;
};

// destructor
void releaseStr(const struct Str *str);

// set str->cstr to empty string
void clearStr(struct Str *str);

// append character to str->cstr
void appendCharToStr(struct Str *str, char c);

#endif // ABC_STR_H
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>

#include "finalize.h"
#include "gen.h"
#include "sym.h"

struct SymTabNode
{
    struct SymTabNode *next;
    struct Sym sym;
};

static struct SymTabNode *symTab;

static void
cleanup(void)
{
    for (struct SymTabNode *n = symTab, *next; n; n = next) {
        next = n->next;
        free(n);
    }
}

struct Sym *
SymAdd(const struct UStr *identifier)
{
    assert(identifier);
    static bool first = true;
    if (first) {
        first = false;
        finalizeRegister(cleanup);
    }

    struct Sym *found = SymFind(identifier);
    if (found) {
        return found;
    }

    struct SymTabNode *n = malloc(sizeof(*n));
    if (!n) {
        fprintf(stderr, "SymAdd: out of memory\n");
        finalizeExit(1);
    }

    // initialize list element
    *(const struct UStr **)(uintptr_t)&n->sym.identifier = identifier;
    n->sym.value = 0;

    // prepend to list
    n->next = symTab;
    symTab = n;

    return &n->sym;
}

struct Sym *
SymFind(const struct UStr *identifier)
{
    assert(identifier);
    for (struct SymTabNode *n = symTab; n; n = n->next) {
        if (n->sym.identifier == identifier) {
            return &n->sym;
        }
    }
    return 0;
}

void
printSymtab(void)
{
    genData();
    for (const struct SymTabNode *n = symTab; n; n = n->next) {
        if (n->sym.value != 0) {
            genLabeledUInt(n->sym.identifier->cstr, n->sym.value);
        }
    }
    genBSS();
    for (const struct SymTabNode *n = symTab; n; n = n->next) {
        if (n->sym.value == 0) {
            genLabeledUInt(n->sym.identifier->cstr, n->sym.value);
        }
    }
}
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
#ifndef ABC_SYM_H
#define ABC_SYM_H

#include "ustr.h"

struct Sym
{
    const struct UStr * const identifier;
    double value;
};

struct Sym *SymAdd(const struct UStr *identifier);
struct Sym *SymFind(const struct UStr *identifier);
void printSymtab(void);

#endif // ABC_SYM_H
#include <stdlib.h>
#include <stdio.h>

#include "finalize.h"
#include "tokenkind.h"

#include "gen_strtokenkind.c"
1
2
3
4
5
6
7
8
#ifndef TOKENKIND_H
#define TOKENKIND_H

#include "gen_tokenkind.h"

const char *strTokenKind(enum TokenKind tokenKind);

#endif // TOKENKIND_H
EOI
BAD_TOKEN
DEC_LITERAL
HEX_LITERAL
OCT_LITERAL
AMPERSAND
AMPERSAND2
ASTERISK
CARET
DOLLAR
EQUAL
EQUAL2
NOT
NOT_EQUAL
GREATER
GREATER_EQUAL
LESS
LESS_EQUAL
LPAREN
MINUS
PERCENT
PLUS
RPAREN
SEMICOLON
SLASH
TILDE
VBAR
VBAR2
IDENTIFIER
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
#include <stdlib.h>
#include <stdio.h>

#include "finalize.h"
#include "gen.h"
#include "lexer.h"
#include "parser.h"
#include "sym.h"

void
printHeader(FILE *out) {
    fprintf(out, "\\documentclass[preview, margin=1cm]{standalone}\n");
    fprintf(out, "\\usepackage{amsmath}\n");
    fprintf(out, "\\usepackage{forest}\n");
    fprintf(out, "\\usepackage{listings}\n");
    fprintf(out, "\\lstdefinelanguage\n");
    fprintf(out, "    [ulm]{Assembler} %% add \"ulm\" dialect of Assembler\n");
    fprintf(out, "    [x86masm]{Assembler} %% based on \"x86masm\" dialect\n");
    fprintf(out, "    %% with these extra keywords:\n");
    fprintf(out, "    {morekeywords={halt, jmp, subq, jnz, jne, jz, je, %%\n");
    fprintf(out, "                   ldzwq, addq, imulq, ja, jb, getc, %%\n");
    fprintf(out, "                   divq, putc, movb, movzbq, movq, %%\n");
    fprintf(out, "                   shldwq, ldpa, ldfp,}} %%\n");
    fprintf(out, "\\definecolor{codegreen}{rgb}{0,0.6,0}\n");
    fprintf(out, "\\definecolor{codegray}{rgb}{0.5,0.5,0.5}\n");
    fprintf(out, "\\definecolor{codepurple}{rgb}{0.58,0,0.82}\n");
    fprintf(out, "\\definecolor{backcolour}{rgb}{0.95,0.95,0.92}\n");
    fprintf(out, "\\lstset{\n");
    fprintf(out, "    commentstyle=\\color{codegreen},\n");
    fprintf(out, "    keywordstyle=\\color{magenta}\\bfseries,\n");
    fprintf(out, "    numberstyle=\\small\\color{codegray},\n");
    fprintf(out, "    stringstyle=\\color{codepurple},\n");
    fprintf(out, "    basicstyle=\\ttfamily,\n");
    fprintf(out, "    breakatwhitespace=false,\n");
    fprintf(out, "    breaklines=true,\n");
    fprintf(out, "    captionpos=b,\n");
    fprintf(out, "    keepspaces=true,\n");
    fprintf(out, "    numbers=left,\n");
    fprintf(out, "    numbersep=5pt,\n");
    fprintf(out, "    showspaces=false,\n");
    fprintf(out, "    showstringspaces=true,\n");
    fprintf(out, "    showtabs=false,\n");
    fprintf(out, "    tabsize=2,\n");
    fprintf(out, "    language={[ulm]Assembler},\n");
    fprintf(out, "}\n");
    fprintf(out, "\\begin{document}\n");
}

void
printFooter(FILE *out)
{
    fprintf(out, "\\end{document}\n");
}

void
usage(const char *prg)
{
    fprintf(stderr, "usage: %s [out [log]]\n", prg);
    finalizeExit(1);
}

int
main(int argc, char *argv[])
{
    FILE *out = stdout, *logOut = 0;

    if (argc > 3) {
        usage(argv[0]);
    } else if (argc >= 2) {
        out = fopen(argv[1], "w");
        if (!out) {
            fprintf(stderr, "can not open output file %s\n", argv[1]);
            finalizeExit(1);
        }
        if (argc == 3) {
            logOut = fopen(argv[2], "w");
            if (!logOut) {
                fprintf(stderr, "can not open output file %s\n", argv[2]);
                finalizeExit(1);
            }
        }
    }

    if (out) {
        setParserOut(out);
    }
    if (logOut) {
        setParserLog(logOut);
        printHeader(logOut);
    }

    genHeader();
    getToken();
    parse();
    genFooter();
    printSymtab();

    if (out) {
        fclose(out);
    }
    if (logOut) {
        printFooter(logOut);
        fclose(logOut);
    }
    finalize();
}