lexer.c (session15/abc/lexer.c)

Overview Home Back Next Content of this session Browse Files
#include <stdbool.h>
#include <stdio.h>

#include "lexer.h"

int token_kind;
size_t token_line;
size_t token_col;

//------------------------------------------------------------------------------

// position of current character ch
static size_t curr_line = 1;
static size_t curr_col;

static int ch;

static int
nextCh(void)
{
    ++curr_col;
    ch = getchar();
    if (ch == '\n') {
        ++curr_line;
        curr_col = 0;
    }
    return ch;
}

static bool
isWhiteSpace(int ch)
{
    return ch == ' ' || ch == '\t';
}

static bool
isDecDigit(int ch)
{
    return ch >= '0' && ch <= '9';
}

static bool
isOctDigit(int ch)
{
    return ch >= '0' && ch <= '7';
}

static bool
isHexDigit(int ch)
{
    return isDecDigit(ch) || (ch >= 'a' && ch <= 'f') ||
           (ch >= 'A' && ch <= 'F');
}

static bool
isLetter(int ch)
{
    return ((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A' && ch <= 'Z')) ||
           ch == '_';
}

int
getToken(void)
{
    unsigned long long val = 0;

    // init ch, skip white spaces and newlines
    while (ch == 0 || isWhiteSpace(ch) || ch == '\n') {
        nextCh();
    }

    token_line = curr_line;
    token_col = curr_col;

    if (ch == EOF) {
        return 0; // EOI
    } else if (isDecDigit(ch)) {
        // parse literal
        if (ch == '0') {
            nextCh();
            if (ch == 'x') {
                nextCh();
                if (isHexDigit(ch)) {
                    while (isHexDigit(ch)) {
                        nextCh();
                    }
                    return 2; // HEX_LITERAL
                }
                return 1; // BAD_TOKEN
            }
            while (isOctDigit(ch)) {
                ch -= '0';
                nextCh();
            }
            return 3; // OCT_LITERAL
        } else if (isDecDigit(ch)) {
            while (isDecDigit(ch)) {
                nextCh();
            }
            return 4; // DEC_LITERAL
        }
    } else if (ch == '+') {
        nextCh();
        return 5; // PLUS
    } else if (ch == '-') {
        nextCh();
        return 6; // MINUS
    } else if (ch == '*') {
        nextCh();
        return 7; // ASTERISK
    } else if (ch == '/') {
        nextCh();
        return 8; // SLASH
    } else if (ch == '%') {
        nextCh();
        return 9; // PERCENT
    } else if (ch == '=') {
        nextCh();
        return 10; // EQUAL
    } else if (ch == '(') {
        nextCh();
        return 11; // LPAREN
    } else if (ch == ')') {
        nextCh();
        return 12; // RPAREN
    } else if (ch == ';') {
        nextCh();
        return 13; // SEMICOLON
    } else if (isLetter(ch)) {
        do {
            nextCh();
        } while (isLetter(ch) || isDecDigit(ch));
        return 14; // IDENTIFIER
    }

    nextCh();
    return 1; // BAD_TOKEN
}