1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#include <stdbool.h>
#include <stdio.h>

#include "lexer.h"

int ch;

int
nextCh(void)
{
    ch = getchar();
    return ch;
}

bool
isWhiteSpace(int ch)
{
    return ch == ' ' || ch == '\t';
}

bool
isDecDigit(int ch)
{
    return ch >= '0' && ch <= '9';
}

bool
isOctDigit(int ch)
{
    return ch >= '0' && ch <= '7';
}

bool
isHexDigit(int ch)
{
    return isDecDigit(ch) || (ch >= 'a' && ch <= 'f') ||
           (ch >= 'A' && ch <= 'F');
}


int
getToken(void)
{
    // init ch, skip white spaces and newlines
    while (ch == 0 || isWhiteSpace(ch) || ch == '\n') {
        nextCh();
    }

    if (ch == EOF) {
        return 0; // EOI
    } else if (isDecDigit(ch)) {
        // parse literal
        if (ch == '0') {
            nextCh();
            if (ch == 'x') {
                nextCh();
                if (isHexDigit(ch)) {
                    while (isHexDigit(ch)) {
                        nextCh();
                    }
                    return 2; // HEX_LITERAL
                }
                return 1; // BAD_TOKEN
            }
            while (isOctDigit(ch)) {
                ch -= '0';
                nextCh();
            }
            return 3; // OCT_LITERAL
        } else if (isDecDigit(ch)) {
            while (isDecDigit(ch)) {
                nextCh();
            }
            return 4; // DEC_LITERAL
        }
    } if (ch == '+') {
        nextCh();
        return 5; // PLUS
    }

    nextCh();
    return 1; // BAD_TOKEN
}