#include #include #include "finalize.h" #include "lexer.h" struct Token token; static void cleanup(void) { releaseStr(&token.val); } //------------------------------------------------------------------------------ // position of current character ch static struct TokenPos curr = { 1, 0, }; static int ch; static int nextCh(void) { ++curr.col; ch = getchar(); if (ch == '\n') { ++curr.line; curr.col = 0; } return ch; } static bool isWhiteSpace(int ch) { return ch == ' ' || ch == '\t'; } static bool isDecDigit(int ch) { return ch >= '0' && ch <= '9'; } static bool isOctDigit(int ch) { return ch >= '0' && ch <= '7'; } static bool isHexDigit(int ch) { return isDecDigit(ch) || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F'); } static bool isLetter(int ch) { return ((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A' && ch <= 'Z')) || ch == '_'; } enum TokenKind getToken(void) { static bool first = true; if (first) { first = false; finalizeRegister(cleanup); } // init ch, skip white spaces and newlines while (ch == 0 || isWhiteSpace(ch) || ch == '\n') { nextCh(); } token.pos.line = curr.line; token.pos.col = curr.col; clearStr(&token.val); if (ch == EOF) { return token.kind = EOI; } else if (isDecDigit(ch)) { // parse literal if (ch == '0') { appendCharToStr(&token.val, ch); nextCh(); if (ch == 'x') { appendCharToStr(&token.val, ch); nextCh(); if (isHexDigit(ch)) { while (isHexDigit(ch)) { appendCharToStr(&token.val, ch); nextCh(); } return token.kind = HEX_LITERAL; } return token.kind = BAD_TOKEN; } while (isOctDigit(ch)) { appendCharToStr(&token.val, ch); nextCh(); } return token.kind = OCT_LITERAL; } else if (isDecDigit(ch)) { while (isDecDigit(ch)) { appendCharToStr(&token.val, ch); nextCh(); } return token.kind = DEC_LITERAL; } } else if (ch == '&') { appendCharToStr(&token.val, ch); nextCh(); if (ch == '&') { appendCharToStr(&token.val, ch); nextCh(); return token.kind = AMPERSAND2; } return token.kind = AMPERSAND; } else if (ch == '*') { appendCharToStr(&token.val, ch); nextCh(); return token.kind = ASTERISK; } else if (ch == '^') { appendCharToStr(&token.val, ch); nextCh(); return token.kind = CARET; } else if (ch == '$') { appendCharToStr(&token.val, ch); nextCh(); return token.kind = DOLLAR; } else if (ch == '=') { appendCharToStr(&token.val, ch); nextCh(); if (ch == '=') { appendCharToStr(&token.val, ch); nextCh(); return token.kind = EQUAL2; } return token.kind = EQUAL; } else if (ch == '!') { appendCharToStr(&token.val, ch); nextCh(); if (ch == '=') { appendCharToStr(&token.val, ch); nextCh(); return token.kind = NOT_EQUAL; } return token.kind = NOT; } else if (ch == '>') { appendCharToStr(&token.val, ch); nextCh(); if (ch == '=') { appendCharToStr(&token.val, ch); nextCh(); return token.kind = GREATER_EQUAL; } return token.kind = GREATER; } else if (ch == '<') { appendCharToStr(&token.val, ch); nextCh(); if (ch == '=') { appendCharToStr(&token.val, ch); nextCh(); return token.kind = LESS_EQUAL; } return token.kind = LESS; } else if (ch == '(') { appendCharToStr(&token.val, ch); nextCh(); return token.kind = LPAREN; } else if (ch == '-') { appendCharToStr(&token.val, ch); nextCh(); return token.kind = MINUS; } else if (ch == '%') { appendCharToStr(&token.val, ch); nextCh(); return token.kind = PERCENT; } else if (ch == '+') { appendCharToStr(&token.val, ch); nextCh(); return token.kind = PLUS; } else if (ch == ')') { appendCharToStr(&token.val, ch); nextCh(); return token.kind = RPAREN; } else if (ch == ';') { appendCharToStr(&token.val, ch); nextCh(); return token.kind = SEMICOLON; } else if (ch == '/') { appendCharToStr(&token.val, ch); nextCh(); return token.kind = SLASH; } else if (ch == '~') { appendCharToStr(&token.val, ch); nextCh(); return token.kind = TILDE; } else if (ch == '|') { appendCharToStr(&token.val, ch); nextCh(); if (ch == '|') { appendCharToStr(&token.val, ch); nextCh(); return token.kind = VBAR2; } return token.kind = VBAR; } else if (isLetter(ch)) { do { appendCharToStr(&token.val, ch); nextCh(); } while (isLetter(ch) || isDecDigit(ch)); return token.kind = IDENTIFIER; } nextCh(); return token.kind = BAD_TOKEN; }