Overview Home Back Next 1 2 3 4 5 Content of this session Browse Files

CPW Pt.6 Expression Trees

This is a longer workout and split into three steps:

Step (A): Some Design Patterns
Step (B): Some Implementation Patterns
Step (C): Integrating the Expr Class into the calculator
And there is an optional quiz: Quiz 20 with 20 bonus points

For the quiz you simply have to submit the class for expression trees and the new implementation for the calculator. In the videos you can see the complete implementation. Of course you can apply further improvements or bug fixes (for both bugs in code and typos in comments).

As code base you can use either your own code or the codes used in the video:

session18/git/abc/lexer.c

#include <stdbool.h>
#include <stdio.h>

#include "lexer.h"

struct Token token;

//------------------------------------------------------------------------------

// position of current character ch
static struct TokenPos curr = {
    1,
    0,
};

static int ch;

static int
nextCh(void)
{
    ++curr.col;
    ch = getchar();
    if (ch == '\n') {
        ++curr.line;
        curr.col = 0;
    }
    return ch;
}

static bool
isWhiteSpace(int ch)
{
    return ch == ' ' || ch == '\t';
}

static bool
isDecDigit(int ch)
{
    return ch >= '0' && ch <= '9';
}

static bool
isOctDigit(int ch)
{
    return ch >= '0' && ch <= '7';
}

static bool
isHexDigit(int ch)
{
    return isDecDigit(ch) || (ch >= 'a' && ch <= 'f') ||
           (ch >= 'A' && ch <= 'F');
}

static bool
isLetter(int ch)
{
    return ((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A' && ch <= 'Z')) ||
           ch == '_';
}

enum TokenKind
getToken(void)
{
    unsigned long long val = 0;

    // init ch, skip white spaces and newlines
    while (ch == 0 || isWhiteSpace(ch) || ch == '\n') {
        nextCh();
    }

    token.pos.line = curr.line;
    token.pos.col = curr.col;

    clearStr(&token.val);

    if (ch == EOF) {
        return token.kind = EOI;
    } else if (isDecDigit(ch)) {
        // parse literal
        if (ch == '0') {
            appendCharToStr(&token.val, ch);
            nextCh();
            if (ch == 'x') {
                appendCharToStr(&token.val, ch);
                nextCh();
                if (isHexDigit(ch)) {
                    while (isHexDigit(ch)) {
                        appendCharToStr(&token.val, ch);
                        nextCh();
                    }
                    return token.kind = HEX_LITERAL;
                }
                return token.kind = BAD_TOKEN;
            }
            while (isOctDigit(ch)) {
                appendCharToStr(&token.val, ch);
                nextCh();
            }
            return token.kind = OCT_LITERAL;
        } else if (isDecDigit(ch)) {
            while (isDecDigit(ch)) {
                appendCharToStr(&token.val, ch);
                nextCh();
            }
            return token.kind = DEC_LITERAL;
        }
    } else if (ch == '+') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = PLUS;
    } else if (ch == '-') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = MINUS;
    } else if (ch == '*') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = ASTERISK;
    } else if (ch == '/') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = SLASH;
    } else if (ch == '%') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = PERCENT;
    } else if (ch == '=') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = EQUAL;
    } else if (ch == '(') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = LPAREN;
    } else if (ch == ')') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = RPAREN;
    } else if (ch == ';') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = SEMICOLON;
    } else if (ch == '^') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = CARET;
    } else if (isLetter(ch)) {
        do {
            appendCharToStr(&token.val, ch);
            nextCh();
        } while (isLetter(ch) || isDecDigit(ch));
        return token.kind = IDENTIFIER;
    }

    nextCh();
    return token.kind = BAD_TOKEN;
}

session18/git/abc/lexer.h

#ifndef ABC_LEXER_H
#define ABC_LEXER_H

#include <stddef.h>

#include "str.h"
#include "tokenkind.h"

enum TokenKind getToken(void);

struct Token
{
    enum TokenKind kind;
    struct TokenPos
    {
        size_t line, col;
    } pos;
    struct Str val;
};

extern struct Token token;

#endif // ABC_LEXER_H

session18/git/abc/tokenkind.c

#include <stdio.h>
#include <stdlib.h>

#include "tokenkind.h"

const char *
strTokenKind(enum TokenKind tokenKind)
{
    switch (tokenKind) {
        case EOI:
            return "EOI";
        case BAD_TOKEN:
            return "BAD_TOKEN";
        case HEX_LITERAL:
            return "HEX_LITERAL";
        case OCT_LITERAL:
            return "OCT_LITERAL";
        case DEC_LITERAL:
            return "DEC_LITERAL";
        case PLUS:
            return "PLUS";
        case MINUS:
            return "MINUS";
        case ASTERISK:
            return "ASTERISK";
        case SLASH:
            return "SLASH";
        case PERCENT:
            return "PERCENT";
        case EQUAL:
            return "EQUAL";
        case LPAREN:
            return "LPAREN";
        case RPAREN:
            return "RPAREN";
        case SEMICOLON:
            return "SEMICOLON";
        case CARET:
            return "CARET";
        case IDENTIFIER:
            return "IDENTIFIER";
        default:
            fprintf(stderr, "internal error in strTokenKind: tokenKind = %d\n",
                    tokenKind);
            exit(1);
            return "";
    }
}

session18/git/abc/tokenkind.h

#ifndef ABC_TOKENKIND_H
#define ABC_TOKENKIND_H

enum TokenKind
{
    EOI,            // end of input
    BAD_TOKEN,
    HEX_LITERAL,
    OCT_LITERAL,
    DEC_LITERAL,
    PLUS,           // '+'
    MINUS,          // '-'
    ASTERISK,       // '*'
    SLASH,          // '/'
    PERCENT,        // '%'
    EQUAL,          // '='
    LPAREN,         // '('
    RPAREN,         // ')'
    SEMICOLON,      // ';'
    CARET,          // '^'
    IDENTIFIER,
};

const char *strTokenKind(enum TokenKind tokenKind);

#endif // ABC_TOKENKIND_H

session18/git/abc/xtest_calc.c

#include <assert.h>
#include <stddef.h>
#include <inttypes.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>

#include "lexer.h"
#include "tokenkind.h"

// error handling

void
expectedError(const char *expectedStr)
{
    fprintf(stderr, "%zu.%zu: error expected '%s' got '%s'\n", token.pos.line,
            token.pos.col, expectedStr, strTokenKind(token.kind));
    exit(1);
}

void
expected(enum TokenKind tokenKind)
{
    if (tokenKind != token.kind) {
        expectedError(strTokenKind(tokenKind));
    }
}

// for debugging
void
printWithIndent(size_t indent, const char *str)
{
#ifndef NDEBUG
    for (size_t i = 0; i < indent * 4; ++i) {
        printf(" ");
    }
    printf("%s\n", str);
#endif // NDEBUG
}

// parse functions
void parseInputSequence(size_t);
void parseExprStatement(size_t);
double parseExpr(size_t);
double parseTerm(size_t);
double parsePowerExpr(size_t);
double parseUnaryExpr(size_t);
double parseFactor(size_t);

void
parseInputSequence(size_t indent)
{
    while (token.kind != EOI) {
        parseExprStatement(indent + 1);
    }
}

void
parseExprStatement(size_t indent)
{
    printWithIndent(indent, "parseExprStatement");
    printf("> %lf\n", parseExpr(indent + 1));
    expected(SEMICOLON);
    getToken();
}

double
parseExpr(size_t indent)
{
    printWithIndent(indent, "parseExpr");
    double val = parseTerm(indent + 1);
    while (token.kind == PLUS || token.kind == MINUS) {
        printWithIndent(indent, strTokenKind(token.kind));
        enum TokenKind op = token.kind;
        getToken();
        double valRight = parseTerm(indent + 1);
        if (op == PLUS) {
            val += valRight;
        } else {
            val -= valRight;
        }
    }
    return val;
}

double
parseTerm(size_t indent)
{
    printWithIndent(indent, "parseTerm");
    double val = parsePowerExpr(indent + 1);
    while (token.kind == ASTERISK || token.kind == SLASH) {
        printWithIndent(indent, strTokenKind(token.kind));
        enum TokenKind op = token.kind;
        getToken();
        double valRight = parsePowerExpr(indent + 1);
        if (op == ASTERISK) {
            val *= valRight;
        } else {
            val /= valRight;
        }
    }
    return val;
}

double
parsePowerExpr(size_t indent)
{
    printWithIndent(indent, "parsePowerExpr");
    double val = parseUnaryExpr(indent + 1);
    while (token.kind == CARET) {
        getToken();
        double valRight = parsePowerExpr(indent + 1);
        val = pow(val, valRight);
    }
    return val;
}

double
parseUnaryExpr(size_t indent)
{
    printWithIndent(indent, "parseUnaryExpr");
    if (token.kind == PLUS || token.kind == MINUS) {
        enum TokenKind op = token.kind;
        getToken();
        double val = parseUnaryExpr(indent + 1);
        if (op == MINUS) {
            val = -val;
        }
        return val;
    }
    return parseFactor(indent + 1);
}

double
parseFactor(size_t indent)
{
    printWithIndent(indent, "parseFactor");
    double val = 0;
    if (token.kind == DEC_LITERAL) {
        printWithIndent(indent, token.val.cstr);
        val = strtoull(token.val.cstr, 0, 10);
        getToken();
        return val;
    } else if (token.kind == HEX_LITERAL) {
        printWithIndent(indent, token.val.cstr);
        val = strtoull(token.val.cstr, 0, 16);
        getToken();
        return val;
    } else if (token.kind == OCT_LITERAL) {
        printWithIndent(indent, token.val.cstr);
        val = strtoull(token.val.cstr, 0, 8);
        getToken();
        return val;
    } else if (token.kind == LPAREN) {
        printWithIndent(indent, strTokenKind(token.kind));
        getToken();
        val = parseExpr(indent + 1);
        expected(RPAREN);
        printWithIndent(indent, strTokenKind(token.kind));
        getToken();
        return val;
    } else {
        expectedError("factor");
        return val; // never reached
    }
}


// test for the parse

int
main(void)
{
    // we need a current token before we begin parsing
    getToken();
    parseInputSequence(0);
}