Getting Started with Git

Exercise: ABC Compiler Project

Create a repository for the project. Initialize it with what we have developed so far:

#
# patch: If user has not defined CC and default value does not exist use gcc
#
ifeq ($(origin CC),default)
    cc_check := $(shell $(CC) -v > /dev/null 2>&1 && echo "sane")
    ifneq ($(strip $(cc_check)),sane)
        CC := gcc
    endif
endif

#
# Define list of source files, object files, targets, etc
#

# all source files
src :=\
    $(wildcard *.c)

# all object files
obj :=\
    $(patsubst %.c,%.o,\
        $(src))

# all targets
target :=\
    $(filter xtest%,\
        $(patsubst %.c,%,\
            $(src)))

# objects that are required by the targets
lib.o :=\
    $(filter-out xtest%,\
        $(obj))

# dependency file that will be generated by compiler
deps :=\
    $(patsubst %,%.d,\
        $(src))

# dependency file leftovers of gone source files
obsolete.deps:=\
    $(filter-out $(deps),\
        $(wildcard *.c.d))

#
# Build rules
#
.PHONY: all
all: $(target) $(obj)

# rule for removing obsolete dependency files
.PHONY: $(obsolete.deps)
$(obsolete.deps) :
        $(RM) $(obsolete.deps)

# delete implicit rule for building an executable directly from its source file
% : %.c

# our rule: to build target link its object file against library object files
%: %.o $(obj) | $(obsolete.deps)
        $(CC) -o $@ $(LDFLAGS) $^

# our rule to build objects: also generate a dependency file
%.o: %.c | $(obsolete.deps)
        $(CC) -c $(CPPFLAGS) $(CFLAGS) -MT $@ -MMD -MP -MF $<.d $<

.PHONY: clean
clean:
        $(RM) $(target) $(obj) $(deps) $(obsolete.deps)

#
# Include dependencies (if already generated)
#
-include $(deps)
#ifndef ABC_LEXER_H
#define ABC_LEXER_H

#include <stddef.h>

/*
   Token kind:

    0 = EOI (end of input)
    1 = BAD_TOKEN
    2 = HEX_LITERAL
    3 = OCT_LITERAL
    4 = DEC_LITERAL
    5 = PLUS ('+')
    6 = MINUS ('-')
    7 = ASTERISK ('*')
    8 = SLASH ('/')
    9 = PERCENT ('%')
   10 = EQUAL ('=')
   11 = LPAREN (left paranthesis '(')
   12 = RPAREN (right paranthesis ')')
   13 = SEMICOLON
   14 = IDENTIFIER
*/

int getToken(void);

extern int token_kind;
extern size_t token_line;
extern size_t token_col;


#endif // ABC_LEXER_H
#include <stdbool.h>
#include <stdio.h>

#include "lexer.h"

int token_kind;
size_t token_line;
size_t token_col;

//------------------------------------------------------------------------------

// position of current character ch
static size_t curr_line = 1;
static size_t curr_col;

static int ch;

static int
nextCh(void)
{
    ++curr_col;
    ch = getchar();
    if (ch == '\n') {
        ++curr_line;
        curr_col = 0;
    }
    return ch;
}

static bool
isWhiteSpace(int ch)
{
    return ch == ' ' || ch == '\t';
}

static bool
isDecDigit(int ch)
{
    return ch >= '0' && ch <= '9';
}

static bool
isOctDigit(int ch)
{
    return ch >= '0' && ch <= '7';
}

static bool
isHexDigit(int ch)
{
    return isDecDigit(ch) || (ch >= 'a' && ch <= 'f') ||
           (ch >= 'A' && ch <= 'F');
}

static bool
isLetter(int ch)
{
    return ((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A' && ch <= 'Z')) ||
           ch == '_';
}

int
getToken(void)
{
    unsigned long long val = 0;

    // init ch, skip white spaces and newlines
    while (ch == 0 || isWhiteSpace(ch) || ch == '\n') {
        nextCh();
    }

    token_line = curr_line;
    token_col = curr_col;

    if (ch == EOF) {
        return 0; // EOI
    } else if (isDecDigit(ch)) {
        // parse literal
        if (ch == '0') {
            nextCh();
            if (ch == 'x') {
                nextCh();
                if (isHexDigit(ch)) {
                    while (isHexDigit(ch)) {
                        nextCh();
                    }
                    return 2; // HEX_LITERAL
                }
                return 1; // BAD_TOKEN
            }
            while (isOctDigit(ch)) {
                ch -= '0';
                nextCh();
            }
            return 3; // OCT_LITERAL
        } else if (isDecDigit(ch)) {
            while (isDecDigit(ch)) {
                nextCh();
            }
            return 4; // DEC_LITERAL
        }
    } else if (ch == '+') {
        nextCh();
        return 5; // PLUS
    } else if (ch == '-') {
        nextCh();
        return 6; // MINUS
    } else if (ch == '*') {
        nextCh();
        return 7; // ASTERISK
    } else if (ch == '/') {
        nextCh();
        return 8; // SLASH
    } else if (ch == '%') {
        nextCh();
        return 9; // PERCENT
    } else if (ch == '=') {
        nextCh();
        return 10; // EQUAL
    } else if (ch == '(') {
        nextCh();
        return 11; // LPAREN
    } else if (ch == ')') {
        nextCh();
        return 12; // RPAREN
    } else if (ch == ';') {
        nextCh();
        return 13; // SEMICOLON
    } else if (isLetter(ch)) {
        do {
            nextCh();
        } while (isLetter(ch) || isDecDigit(ch));
        return 14; // IDENTIFIER
    }

    nextCh();
    return 1; // BAD_TOKEN
}
#include <stdio.h>

#include "lexer.h"

int
main(void)
{
    int token;
    while ((token = getToken()) != 0) {
        printf("%zu.%zu: ", token_line, token_col);
        if (token == 1) {
            printf("BAD_TOKEN\n");
        } else if (token == 2) {
            printf("HEX_LITERAL\n");
        } else if (token == 3) {
            printf("OCT_LITERAL\n");
        } else if (token == 4) {
            printf("DEC_LITERAL\n");
        } else if (token == 5) {
            printf("PLUS\n");
        } else if (token == 6) {
            printf("MINUS\n");
        } else if (token == 7) {
            printf("ASTERISK\n");
        } else if (token == 8) {
            printf("SLASH\n");
        } else if (token == 9) {
            printf("PERCENT\n");
        } else if (token == 10) {
            printf("EQUAL\n");
        } else if (token == 11) {
            printf("LPAREN\n");
        } else if (token == 12) {
            printf("RPAREN\n");
        } else if (token == 13) {
            printf("SEMICOLON\n");
        } else if (token == 14) {
            printf("IDENTIFIER\n");
        } else {
            printf("?? internal error ?? token = %d\n", token);
        }
    }
}
1
2
3
4
a = 5;
b = 42;
c = (a + b) *2;
123 0123 0xaB12 abc +-/*%^()

Some Bugfix

You certainly saw that there was a bug in lexer.c. The column number of a token was not correct. This was fix with a new commit:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
MCL:abc lehn$ git show 10aef1fe0e95973b6f2e05a59b88cea71b8cd3c2
commit 10aef1fe0e95973b6f2e05a59b88cea71b8cd3c2 (HEAD -> master, tag: v.0.0.2, origin/master, origin/HEAD)
Author: Michael C. Lehn <michel.lehn@uni-ulm.de>
Date:   Fri Jun 10 09:27:29 2022 +0200

    bug fix: col number

diff --git a/lexer.c b/lexer.c
index e202c80..01e552d 100644
--- a/lexer.c
+++ b/lexer.c
@@ -22,7 +22,7 @@ nextCh(void)
     ch = getchar();
     if (ch == '\n') {
        ++curr_line;
-       curr_col = 1;
+       curr_col = 0;
     }
     return ch;
 }

Afterwards the tag v.0.0.2 had to be changed. It still was the tag for the buggy commit. For that the tag v.0.0.2 in the local repository was deleted and also in the remote repository:

1
2
git -d v.0.0.2
git push --delete origin v.0.0.2

After that the new commit with the bug fix got the tag v.0.0.2 and tags were also pushed to the remote repository:

1
2
git tag v.0.0.2 -a
git push origin --tags

ABC Compiler Project: Tag v.0.0.2

With git checkout tags/<tag> you can go back to a given tag in your repository. I will use this so that you can see below my state of the project with tag v.0.0.2:

theon$ git checkout tags/v.0.0.2
Note: switching to 'tags/v.0.0.2'.

You are in 'detached HEAD' state. You can look around, make experimental
changes and commit them, and you can discard any commits you make in this
state without impacting any branches by switching back to a branch.

If you want to create a new branch to retain commits you create, you may
do so (now or later) by using -c with the switch command. Example:

  git switch -c <new-branch-name>

Or undo this operation with:

  git switch -

Turn off this advice by setting config variable advice.detachedHead to false

HEAD is now at 10aef1f bug fix: col number
theon$ 

And of course you can follow git's advice to get rid of this message...

#include <stdbool.h>
#include <stdio.h>

#include "lexer.h"

int token_kind;
size_t token_line;
size_t token_col;

//------------------------------------------------------------------------------

// position of current character ch
static size_t curr_line = 1;
static size_t curr_col;

static int ch;

static int
nextCh(void)
{
    ++curr_col;
    ch = getchar();
    if (ch == '\n') {
        ++curr_line;
        curr_col = 0;
    }
    return ch;
}

static bool
isWhiteSpace(int ch)
{
    return ch == ' ' || ch == '\t';
}

static bool
isDecDigit(int ch)
{
    return ch >= '0' && ch <= '9';
}

static bool
isOctDigit(int ch)
{
    return ch >= '0' && ch <= '7';
}

static bool
isHexDigit(int ch)
{
    return isDecDigit(ch) || (ch >= 'a' && ch <= 'f') ||
           (ch >= 'A' && ch <= 'F');
}

static bool
isLetter(int ch)
{
    return ((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A' && ch <= 'Z')) ||
           ch == '_';
}

int
getToken(void)
{
    unsigned long long val = 0;

    // init ch, skip white spaces and newlines
    while (ch == 0 || isWhiteSpace(ch) || ch == '\n') {
        nextCh();
    }

    token_line = curr_line;
    token_col = curr_col;

    if (ch == EOF) {
        return 0; // EOI
    } else if (isDecDigit(ch)) {
        // parse literal
        if (ch == '0') {
            nextCh();
            if (ch == 'x') {
                nextCh();
                if (isHexDigit(ch)) {
                    while (isHexDigit(ch)) {
                        nextCh();
                    }
                    return 2; // HEX_LITERAL
                }
                return 1; // BAD_TOKEN
            }
            while (isOctDigit(ch)) {
                ch -= '0';
                nextCh();
            }
            return 3; // OCT_LITERAL
        } else if (isDecDigit(ch)) {
            while (isDecDigit(ch)) {
                nextCh();
            }
            return 4; // DEC_LITERAL
        }
    } else if (ch == '+') {
        nextCh();
        return 5; // PLUS
    } else if (ch == '-') {
        nextCh();
        return 6; // MINUS
    } else if (ch == '*') {
        nextCh();
        return 7; // ASTERISK
    } else if (ch == '/') {
        nextCh();
        return 8; // SLASH
    } else if (ch == '%') {
        nextCh();
        return 9; // PERCENT
    } else if (ch == '=') {
        nextCh();
        return 10; // EQUAL
    } else if (ch == '(') {
        nextCh();
        return 11; // LPAREN
    } else if (ch == ')') {
        nextCh();
        return 12; // RPAREN
    } else if (ch == ';') {
        nextCh();
        return 13; // SEMICOLON
    } else if (isLetter(ch)) {
        do {
            nextCh();
        } while (isLetter(ch) || isDecDigit(ch));
        return 14; // IDENTIFIER
    }

    nextCh();
    return 1; // BAD_TOKEN
}
#ifndef ABC_LEXER_H
#define ABC_LEXER_H

#include <stddef.h>

/*
   Token kind:

    0 = EOI (end of input)
    1 = BAD_TOKEN
    2 = HEX_LITERAL
    3 = OCT_LITERAL
    4 = DEC_LITERAL
    5 = PLUS ('+')
    6 = MINUS ('-')
    7 = ASTERISK ('*')
    8 = SLASH ('/')
    9 = PERCENT ('%')
   10 = EQUAL ('=')
   11 = LPAREN (left paranthesis '(')
   12 = RPAREN (right paranthesis ')')
   13 = SEMICOLON
   14 = IDENTIFIER
*/

int getToken(void);

extern int token_kind;
extern size_t token_line;
extern size_t token_col;


#endif // ABC_LEXER_H
#
# patch: If user has not defined CC and default value does not exist use gcc
#
ifeq ($(origin CC),default)
    cc_check := $(shell $(CC) -v > /dev/null 2>&1 && echo "sane")
    ifneq ($(strip $(cc_check)),sane)
        CC := gcc
    endif
endif

#
# Define list of source files, object files, targets, etc
#

# all source files
src :=\
    $(wildcard *.c)

# all object files
obj :=\
    $(patsubst %.c,%.o,\
        $(src))

# all targets
target :=\
    $(filter xtest%,\
        $(patsubst %.c,%,\
            $(src)))

# objects that are required by the targets
lib.o :=\
    $(filter-out xtest%,\
        $(obj))

# dependency file that will be generated by compiler
deps :=\
    $(patsubst %,%.d,\
        $(src))

# dependency file leftovers of gone source files
obsolete.deps:=\
    $(filter-out $(deps),\
        $(wildcard *.c.d))

#
# Build rules
#
.PHONY: all
all: $(target) $(obj)

# rule for removing obsolete dependency files
.PHONY: $(obsolete.deps)
$(obsolete.deps) :
        $(RM) $(obsolete.deps)

# delete implicit rule for building an executable directly from its source file
% : %.c

# our rule: to build target link its object file against library object files
%: %.o $(obj) | $(obsolete.deps)
        $(CC) -o $@ $(LDFLAGS) $^

# our rule to build objects: also generate a dependency file
%.o: %.c | $(obsolete.deps)
        $(CC) -c $(CPPFLAGS) $(CFLAGS) -MT $@ -MMD -MP -MF $<.d $<

.PHONY: clean
clean:
        $(RM) $(target) $(obj) $(deps) $(obsolete.deps)

#
# Include dependencies (if already generated)
#
-include $(deps)
ABC: ABC is A Bloody Compiler
=============================

This is our compiler project

## How to use

Use `make` to ...
1
2
3
4
a = 5;
b = 42;
c = (a + b) *2;
123 0123 0xaB12 abc +-/*%^()
#include <stdio.h>

#include "lexer.h"

int
main(void)
{
    int token;
    while ((token = getToken()) != 0) {
        printf("%zu.%zu: ", token_line, token_col);
        if (token == 1) {
            printf("BAD_TOKEN\n");
        } else if (token == 2) {
            printf("HEX_LITERAL\n");
        } else if (token == 3) {
            printf("OCT_LITERAL\n");
        } else if (token == 4) {
            printf("DEC_LITERAL\n");
        } else if (token == 5) {
            printf("PLUS\n");
        } else if (token == 6) {
            printf("MINUS\n");
        } else if (token == 7) {
            printf("ASTERISK\n");
        } else if (token == 8) {
            printf("SLASH\n");
        } else if (token == 9) {
            printf("PERCENT\n");
        } else if (token == 10) {
            printf("EQUAL\n");
        } else if (token == 11) {
            printf("LPAREN\n");
        } else if (token == 12) {
            printf("RPAREN\n");
        } else if (token == 13) {
            printf("SEMICOLON\n");
        } else if (token == 14) {
            printf("IDENTIFIER\n");
        } else {
            printf("?? internal error ?? token = %d\n", token);
        }
    }
}