Overview Home Back Next 1 2 3 4 5 Content of this session

CBE Pt. 15: Unions in C

Access to theseus

We only allow IPs the belong to our network to login on theseus. For accessing the server from at home first login with ssh on theon. From there you can login on theseus with ssh theseus.

Provided Material

Below you find a checkout of my repository with which I started in the video. Of course you can reuse your own code. This is just provided for comparison.

session18/git/abc/lexer.h

#ifndef ABC_LEXER_H
#define ABC_LEXER_H

#include <stddef.h>

#include "str.h"
#include "tokenkind.h"

enum TokenKind getToken(void);

struct Token
{
    enum TokenKind kind;
    struct TokenPos
    {
        size_t line, col;
    } pos;
    struct Str val;
};

extern struct Token token;

#endif // ABC_LEXER_H

session18/git/abc/tokenkind.c

#include <stdio.h>
#include <stdlib.h>

#include "tokenkind.h"

const char *
strTokenKind(enum TokenKind tokenKind)
{
    switch (tokenKind) {
        case EOI:
            return "EOI";
        case BAD_TOKEN:
            return "BAD_TOKEN";
        case HEX_LITERAL:
            return "HEX_LITERAL";
        case OCT_LITERAL:
            return "OCT_LITERAL";
        case DEC_LITERAL:
            return "DEC_LITERAL";
        case PLUS:
            return "PLUS";
        case MINUS:
            return "MINUS";
        case ASTERISK:
            return "ASTERISK";
        case SLASH:
            return "SLASH";
        case PERCENT:
            return "PERCENT";
        case EQUAL:
            return "EQUAL";
        case LPAREN:
            return "LPAREN";
        case RPAREN:
            return "RPAREN";
        case SEMICOLON:
            return "SEMICOLON";
        case CARET:
            return "CARET";
        case IDENTIFIER:
            return "IDENTIFIER";
        default:
            fprintf(stderr, "internal error in strTokenKind: tokenKind = %d\n",
                    tokenKind);
            exit(1);
            return "";
    }
}

session18/git/abc/tokenkind.h

#ifndef ABC_TOKENKIND_H
#define ABC_TOKENKIND_H

enum TokenKind
{
    EOI,            // end of input
    BAD_TOKEN,
    HEX_LITERAL,
    OCT_LITERAL,
    DEC_LITERAL,
    PLUS,           // '+'
    MINUS,          // '-'
    ASTERISK,       // '*'
    SLASH,          // '/'
    PERCENT,        // '%'
    EQUAL,          // '='
    LPAREN,         // '('
    RPAREN,         // ')'
    SEMICOLON,      // ';'
    CARET,          // '^'
    IDENTIFIER,
};

const char *strTokenKind(enum TokenKind tokenKind);

#endif // ABC_TOKENKIND_H

session18/git/abc/xtest_calc.c

#include <assert.h>
#include <stddef.h>
#include <inttypes.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>

#include "lexer.h"
#include "tokenkind.h"

// error handling

void
expectedError(const char *expectedStr)
{
    fprintf(stderr, "%zu.%zu: error expected '%s' got '%s'\n", token.pos.line,
            token.pos.col, expectedStr, strTokenKind(token.kind));
    exit(1);
}

void
expected(enum TokenKind tokenKind)
{
    if (tokenKind != token.kind) {
        expectedError(strTokenKind(tokenKind));
    }
}

// for debugging
void
printWithIndent(size_t indent, const char *str)
{
#ifndef NDEBUG
    for (size_t i = 0; i < indent * 4; ++i) {
        printf(" ");
    }
    printf("%s\n", str);
#endif // NDEBUG
}

// parse functions
void parseInputSequence(size_t);
void parseExprStatement(size_t);
double parseExpr(size_t);
double parseTerm(size_t);
double parsePowerExpr(size_t);
double parseUnaryExpr(size_t);
double parseFactor(size_t);

void
parseInputSequence(size_t indent)
{
    while (token.kind != EOI) {
        parseExprStatement(indent + 1);
    }
}

void
parseExprStatement(size_t indent)
{
    printWithIndent(indent, "parseExprStatement");
    printf("> %lf\n", parseExpr(indent + 1));
    expected(SEMICOLON);
    getToken();
}

double
parseExpr(size_t indent)
{
    printWithIndent(indent, "parseExpr");
    double val = parseTerm(indent + 1);
    while (token.kind == PLUS || token.kind == MINUS) {
        printWithIndent(indent, strTokenKind(token.kind));
        enum TokenKind op = token.kind;
        getToken();
        double valRight = parseTerm(indent + 1);
        if (op == PLUS) {
            val += valRight;
        } else {
            val -= valRight;
        }
    }
    return val;
}

double
parseTerm(size_t indent)
{
    printWithIndent(indent, "parseTerm");
    double val = parsePowerExpr(indent + 1);
    while (token.kind == ASTERISK || token.kind == SLASH) {
        printWithIndent(indent, strTokenKind(token.kind));
        enum TokenKind op = token.kind;
        getToken();
        double valRight = parsePowerExpr(indent + 1);
        if (op == ASTERISK) {
            val *= valRight;
        } else {
            val /= valRight;
        }
    }
    return val;
}

double
parsePowerExpr(size_t indent)
{
    printWithIndent(indent, "parsePowerExpr");
    double val = parseUnaryExpr(indent + 1);
    while (token.kind == CARET) {
        getToken();
        double valRight = parsePowerExpr(indent + 1);
        val = pow(val, valRight);
    }
    return val;
}

double
parseUnaryExpr(size_t indent)
{
    printWithIndent(indent, "parseUnaryExpr");
    if (token.kind == PLUS || token.kind == MINUS) {
        enum TokenKind op = token.kind;
        getToken();
        double val = parseUnaryExpr(indent + 1);
        if (op == MINUS) {
            val = -val;
        }
        return val;
    }
    return parseFactor(indent + 1);
}

double
parseFactor(size_t indent)
{
    printWithIndent(indent, "parseFactor");
    double val = 0;
    if (token.kind == DEC_LITERAL) {
        printWithIndent(indent, token.val.cstr);
        val = strtoull(token.val.cstr, 0, 10);
        getToken();
        return val;
    } else if (token.kind == HEX_LITERAL) {
        printWithIndent(indent, token.val.cstr);
        val = strtoull(token.val.cstr, 0, 16);
        getToken();
        return val;
    } else if (token.kind == OCT_LITERAL) {
        printWithIndent(indent, token.val.cstr);
        val = strtoull(token.val.cstr, 0, 8);
        getToken();
        return val;
    } else if (token.kind == LPAREN) {
        printWithIndent(indent, strTokenKind(token.kind));
        getToken();
        val = parseExpr(indent + 1);
        expected(RPAREN);
        printWithIndent(indent, strTokenKind(token.kind));
        getToken();
        return val;
    } else {
        expectedError("factor");
        return val; // never reached
    }
}


// test for the parse

int
main(void)
{
    // we need a current token before we begin parsing
    getToken();
    parseInputSequence(0);
}

session19/git/abc/Makefile

CPPFLAGS += -Wall -Wcast-qual
LDFLAGS += -lm
#
# patch: If user has not defined CC and default value does not exist use gcc
#
ifeq ($(origin CC),default)
    cc_check := $(shell $(CC) -v > /dev/null 2>&1 && echo "sane")
    ifneq ($(strip $(cc_check)),sane)
        CC := gcc
    endif
endif

#
# Define list of source files, object files, targets, etc
#

# all source files
src :=\
    $(wildcard *.c)

# all object files
obj :=\
    $(patsubst %.c,%.o,\
        $(src))

# all targets
target :=\
    $(filter xtest%,\
        $(patsubst %.c,%,\
            $(src)))

# objects that are required by the targets
lib.o :=\
    $(filter-out xtest%,\
        $(obj))

# dependency file that will be generated by compiler
deps :=\
    $(patsubst %,%.d,\
        $(src))

# dependency file leftovers of gone source files
obsolete.deps:=\
    $(filter-out $(deps),\
        $(wildcard *.c.d))

#
# Build rules
#
.PHONY: all
all: $(target) $(obj)

# rule for removing obsolete dependency files
.PHONY: $(obsolete.deps)
$(obsolete.deps) :
        $(RM) $(obsolete.deps)

# delete implicit rule for building an executable directly from its source file
% : %.c

# our rule: to build target link its object file against library object files
%: %.o $(lib.o) | $(obsolete.deps)
        $(CC) -o $@ $^ $(LDFLAGS)

# our rule to build objects: also generate a dependency file
%.o: %.c | $(obsolete.deps)
        $(CC) -c $(CPPFLAGS) $(CFLAGS) -MT $@ -MMD -MP -MF $<.d $<

.PHONY: clean
clean:
        $(RM) $(target) $(obj) $(deps) $(obsolete.deps)

#
# Include dependencies (if already generated)
#
-include $(deps)

session19/git/abc/README.md

ABC: ABC is A Bloody Compiler
=============================

This is our compiler project

## How to use

Use `make` to ...

session19/git/abc/lexer.c

#include <stdbool.h>
#include <stdio.h>

#include "lexer.h"

struct Token token;

//------------------------------------------------------------------------------

// position of current character ch
static struct TokenPos curr = {
    1,
    0,
};

static int ch;

static int
nextCh(void)
{
    ++curr.col;
    ch = getchar();
    if (ch == '\n') {
        ++curr.line;
        curr.col = 0;
    }
    return ch;
}

static bool
isWhiteSpace(int ch)
{
    return ch == ' ' || ch == '\t';
}

static bool
isDecDigit(int ch)
{
    return ch >= '0' && ch <= '9';
}

static bool
isOctDigit(int ch)
{
    return ch >= '0' && ch <= '7';
}

static bool
isHexDigit(int ch)
{
    return isDecDigit(ch) || (ch >= 'a' && ch <= 'f') ||
           (ch >= 'A' && ch <= 'F');
}

static bool
isLetter(int ch)
{
    return ((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A' && ch <= 'Z')) ||
           ch == '_';
}

enum TokenKind
getToken(void)
{
    // init ch, skip white spaces and newlines
    while (ch == 0 || isWhiteSpace(ch) || ch == '\n') {
        nextCh();
    }

    token.pos.line = curr.line;
    token.pos.col = curr.col;

    clearStr(&token.val);

    if (ch == EOF) {
        return token.kind = EOI;
    } else if (isDecDigit(ch)) {
        // parse literal
        if (ch == '0') {
            appendCharToStr(&token.val, ch);
            nextCh();
            if (ch == 'x') {
                appendCharToStr(&token.val, ch);
                nextCh();
                if (isHexDigit(ch)) {
                    while (isHexDigit(ch)) {
                        appendCharToStr(&token.val, ch);
                        nextCh();
                    }
                    return token.kind = HEX_LITERAL;
                }
                return token.kind = BAD_TOKEN;
            }
            while (isOctDigit(ch)) {
                appendCharToStr(&token.val, ch);
                nextCh();
            }
            return token.kind = OCT_LITERAL;
        } else if (isDecDigit(ch)) {
            while (isDecDigit(ch)) {
                appendCharToStr(&token.val, ch);
                nextCh();
            }
            return token.kind = DEC_LITERAL;
        }
    } else if (ch == '+') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = PLUS;
    } else if (ch == '-') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = MINUS;
    } else if (ch == '*') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = ASTERISK;
    } else if (ch == '/') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = SLASH;
    } else if (ch == '%') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = PERCENT;
    } else if (ch == '=') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = EQUAL;
    } else if (ch == '(') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = LPAREN;
    } else if (ch == ')') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = RPAREN;
    } else if (ch == ';') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = SEMICOLON;
    } else if (ch == '^') {
        appendCharToStr(&token.val, ch);
        nextCh();
        return token.kind = CARET;
    } else if (isLetter(ch)) {
        do {
            appendCharToStr(&token.val, ch);
            nextCh();
        } while (isLetter(ch) || isDecDigit(ch));
        return token.kind = IDENTIFIER;
    }

    nextCh();
    return token.kind = BAD_TOKEN;
}

session19/git/abc/str.c

#include <stdio.h>
#include <stdlib.h>

#include "str.h"

enum
{
    MIN_CAPACITY = 8
};

void
clearStr(struct Str *str)
{
    if (str->capacity == 0) {
        str->end = str->cstr = malloc(MIN_CAPACITY);
        if (!str->cstr) {
            fprintf(stderr, "clearStr: out of memory\n");
            exit(1);
        }
        str->capacity = MIN_CAPACITY;
    }
    *(str->end = str->cstr) = 0;
}

void
appendCharToStr(struct Str *str, char c)
{
    size_t len = str->end - str->cstr; // length without terminating 0

    // check if another character and 0 byte fits into string
    if (len + 2 > str->capacity) {
        str->capacity = len + 2;
        if (str->capacity < MIN_CAPACITY) {
            str->capacity = MIN_CAPACITY;
        } else {
            str->capacity *= 2;
        }
        str->cstr = realloc(str->cstr, str->capacity);
        if (!str->cstr) {
            fprintf(stderr, "appendCharToStr: out of memory\n");
            exit(1);
        }
        str->end = str->cstr + len;
    }

    *str->end++ = c;
    *str->end = 0;
}

session19/git/abc/str.h

#ifndef ABC_STR_H
#define ABC_STR_H

#include <stddef.h>

struct Str
{
    char *cstr, *end;
    size_t capacity;
};

// set str->cstr to empty string
void clearStr(struct Str *str);

// append character to str->cstr
void appendCharToStr(struct Str *str, char c);

#endif // ABC_STR_H

session19/git/abc/test_calc.in

2 + 5 / 3 * 2;
(2 + 5) / 3;
((10 - 213) * 25) + 27;
(7 - 3) * 4^3;
(1 + 1) * 1 - 2;
--1;
1--1^2;
1--1^3;
1++2^2;
4 + 1 * 3;
-1^2;
1-1^2;
1+-1^2;

session19/git/abc/test_lexer.in

a = 5;
b = 42;
c = (a + b) *2;
123 0123 0xaB12 abc +-/*%^()

session19/git/abc/tokenkind.c

#include <stdio.h>
#include <stdlib.h>

#include "tokenkind.h"

const char *
strTokenKind(enum TokenKind tokenKind)
{
    switch (tokenKind) {
        case EOI:
            return "EOI";
        case BAD_TOKEN:
            return "BAD_TOKEN";
        case HEX_LITERAL:
            return "HEX_LITERAL";
        case OCT_LITERAL:
            return "OCT_LITERAL";
        case DEC_LITERAL:
            return "DEC_LITERAL";
        case PLUS:
            return "PLUS";
        case MINUS:
            return "MINUS";
        case ASTERISK:
            return "ASTERISK";
        case SLASH:
            return "SLASH";
        case PERCENT:
            return "PERCENT";
        case EQUAL:
            return "EQUAL";
        case LPAREN:
            return "LPAREN";
        case RPAREN:
            return "RPAREN";
        case SEMICOLON:
            return "SEMICOLON";
        case CARET:
            return "CARET";
        case IDENTIFIER:
            return "IDENTIFIER";
        default:
            fprintf(stderr, "internal error in strTokenKind: tokenKind = %d\n",
                    tokenKind);
            exit(1);
            return "";
    }
}

session19/git/abc/tokenkind.h

#ifndef ABC_TOKENKIND_H
#define ABC_TOKENKIND_H

enum TokenKind
{
    EOI,            // end of input
    BAD_TOKEN,
    HEX_LITERAL,
    OCT_LITERAL,
    DEC_LITERAL,
    PLUS,           // '+'
    MINUS,          // '-'
    ASTERISK,       // '*'
    SLASH,          // '/'
    PERCENT,        // '%'
    EQUAL,          // '='
    LPAREN,         // '('
    RPAREN,         // ')'
    SEMICOLON,      // ';'
    CARET,          // '^'
    IDENTIFIER,
};

const char *strTokenKind(enum TokenKind tokenKind);

#endif // ABC_TOKENKIND_H

session19/git/abc/ustr.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "ustr.h"

struct Node
{
    struct Node *next;
    struct UStr ustr;
};

static struct Node *node;

const struct UStr *
UStrAdd_(const char *s, bool *added)
{
    size_t len = strlen(s);
    if (added) {
        *added = true;
    }
    for (struct Node *n = node; n; n = n->next) {
        if (len == n->ustr.len && !strcmp(s, n->ustr.cstr)) {
            if (added) {
                *added = false;
            }
            return &n->ustr;
        }
    }

    struct Node *n = malloc(len + 1 + sizeof(size_t) + sizeof(struct Node *));
    if (!n) {
        fprintf(stderr, "makeUStr: out of memory\n");
        abort();
    }

    n->next = node;
    n->ustr.len = len;
    strcpy(n->ustr.cstr, s);
    node = n;
    return &node->ustr;
}

const struct UStr *
UStrAdd(const char *s)
{
    return UStrAdd_(s, 0);
}

void
UStrPrintPool(void)
{
    for (const struct Node *n = node; n; n = n->next) {
        printf("%s\n", n->ustr.cstr);
    }
}

session19/git/abc/ustr.h

#ifndef UTILS_USTR_H
#define UTILS_USTR_H

#include <stdbool.h>
#include <stddef.h>

struct UStr
{
    size_t len;
    char cstr[];
};

const struct UStr *UStrAdd_(const char *s, bool *added);
const struct UStr *UStrAdd(const char *s);
void UStrPrintPool(void);

#endif // UTILS_USTR_H

session19/git/abc/xtest_calc.c

#include <assert.h>
#include <stddef.h>
#include <inttypes.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>

#include "expr.h"
#include "lexer.h"
#include "tokenkind.h"

// error handling

void
expectedError(const char *expectedStr)
{
    fprintf(stderr, "%zu.%zu: error expected '%s' got '%s'\n", token.pos.line,
            token.pos.col, expectedStr, strTokenKind(token.kind));
    exit(1);
}

void
expected(enum TokenKind tokenKind)
{
    if (tokenKind != token.kind) {
        expectedError(strTokenKind(tokenKind));
    }
}

// parse functions
void parseInputSequence(void);
void parseExprStatement(void);
const struct Expr *parseExpr(void);
const struct Expr *parseTerm(void);
const struct Expr *parsePowerExpr(void);
const struct Expr *parseUnaryExpr(void);
const struct Expr *parseFactor(void);

void
parseInputSequence(void)
{
    while (token.kind != EOI) {
        parseExprStatement();
    }
}

void
parseExprStatement(void)
{
    const struct Expr *expr = parseExpr();
    //printf("> %lf\n", evalExpr(expr));
    printExprTree(expr);
    deleteExpr(expr);
    expected(SEMICOLON);
    getToken();
}

const struct Expr *
parseExpr(void)
{
    const struct Expr *expr = parseTerm();
    while (token.kind == PLUS || token.kind == MINUS) {
        enum TokenKind op = token.kind;
        getToken();
        const struct Expr *exprRight = parseTerm();
        if (op == PLUS) {
            expr = newBinaryExpr(EK_ADD, expr, exprRight);
        } else {
            expr = newBinaryExpr(EK_SUB, expr, exprRight);
        }
    }
    return expr;
}

const struct Expr *
parseTerm(void)
{
    const struct Expr *expr = parsePowerExpr();
    while (token.kind == ASTERISK || token.kind == SLASH) {
        enum TokenKind op = token.kind;
        getToken();
        const struct Expr *exprRight = parsePowerExpr();
        if (op == ASTERISK) {
            expr = newBinaryExpr(EK_MUL, expr, exprRight);
        } else {
            expr = newBinaryExpr(EK_DIV, expr, exprRight);
        }
    }
    return expr;
}

const struct Expr *
parsePowerExpr(void)
{
    const struct Expr *expr = parseUnaryExpr();
    while (token.kind == CARET) {
        getToken();
        const struct Expr *exprRight = parsePowerExpr();
        expr = newBinaryExpr(EK_POW, expr, exprRight);
    }
    return expr;
}

const struct Expr *
parseUnaryExpr(void)
{
    if (token.kind == PLUS || token.kind == MINUS) {
        enum TokenKind op = token.kind;
        getToken();
        const struct Expr *expr = parseUnaryExpr();
        if (op == MINUS) {
            return newUnaryExpr(EK_UNARY_MINUS, expr);
        }
        return newUnaryExpr(EK_UNARY_PLUS, expr);
    }
    return parseFactor();
}

const struct Expr *
parseFactor(void)
{
    if (token.kind == DEC_LITERAL) {
        uint64_t uint = strtoull(token.val.cstr, 0, 10);
        getToken();
        return newUnsignedLiteralExpr(uint);
    } else if (token.kind == HEX_LITERAL) {
        uint64_t uint = strtoull(token.val.cstr, 0, 10);
        getToken();
        return newUnsignedLiteralExpr(uint);
    } else if (token.kind == OCT_LITERAL) {
        uint64_t uint = strtoull(token.val.cstr, 0, 10);
        getToken();
        return newUnsignedLiteralExpr(uint);
    } else if (token.kind == LPAREN) {
        getToken();
        const struct Expr *expr = parseExpr();
        expected(RPAREN);
        getToken();
        return expr;
    } else {
        expectedError("factor");
        return 0; // never reached
    }
}


// test for the parse

int
main(void)
{
    // we need a current token before we begin parsing
    getToken();
    parseInputSequence();
}

session19/git/abc/xtest_lexer.c

#include <stdio.h>

#include "lexer.h"

int
main(void)
{
    while (getToken() != EOI) {
        printf("%zu.%zu: %s '%s'\n", token.pos.line, token.pos.col,
               strTokenKind(token.kind), token.val.cstr);
    }
}

session19/git/abc/xtest_ustr.c

#include <stdio.h>

#include "ustr.h"

int
main(void)
{
    const struct UStr *kwIf = UStrAdd("if");
    const struct UStr *kwWhile = UStrAdd("while");

    char *line = 0;
    size_t capacity = 0;
    ssize_t len;

    while ((len = getline(&line, &capacity, stdin)) > 0) {
        line[len - 1] = 0;
        bool added = false;
        const struct UStr *ident = UStrAdd_(line, &added);

        if (ident == kwIf) {
            printf("keyword 'if'\n");
        } else if (ident == kwWhile) {
            printf("keyword 'while'\n");
        } else {
            printf("identifier '%s'\n", ident->cstr);
            if (added) {
                printf("this is new\n");
            }
        }
    }

    printf("Pool of UStr:\n");
    UStrPrintPool();
}