#include #include #include "lexer.h" struct Token token; //------------------------------------------------------------------------------ // position of current character ch static struct TokenPos curr = { 1, 0, }; static int ch; static int nextCh(void) { ++curr.col; ch = getchar(); if (ch == '\n') { ++curr.line; curr.col = 0; } return ch; } static bool isWhiteSpace(int ch) { return ch == ' ' || ch == '\t'; } static bool isDecDigit(int ch) { return ch >= '0' && ch <= '9'; } static bool isOctDigit(int ch) { return ch >= '0' && ch <= '7'; } static bool isHexDigit(int ch) { return isDecDigit(ch) || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F'); } static bool isLetter(int ch) { return ((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A' && ch <= 'Z')) || ch == '_'; } enum TokenKind getToken(void) { unsigned long long val = 0; // init ch, skip white spaces and newlines while (ch == 0 || isWhiteSpace(ch) || ch == '\n') { nextCh(); } token.pos.line = curr.line; token.pos.col = curr.col; clearStr(&token.val); if (ch == EOF) { return token.kind = EOI; } else if (isDecDigit(ch)) { // parse literal if (ch == '0') { appendCharToStr(&token.val, ch); nextCh(); if (ch == 'x') { appendCharToStr(&token.val, ch); nextCh(); if (isHexDigit(ch)) { while (isHexDigit(ch)) { appendCharToStr(&token.val, ch); nextCh(); } return token.kind = HEX_LITERAL; } return token.kind = BAD_TOKEN; } while (isOctDigit(ch)) { appendCharToStr(&token.val, ch); nextCh(); } return token.kind = OCT_LITERAL; } else if (isDecDigit(ch)) { while (isDecDigit(ch)) { appendCharToStr(&token.val, ch); nextCh(); } return token.kind = DEC_LITERAL; } } else if (ch == '+') { appendCharToStr(&token.val, ch); nextCh(); return token.kind = PLUS; } else if (ch == '-') { appendCharToStr(&token.val, ch); nextCh(); return token.kind = MINUS; } else if (ch == '*') { appendCharToStr(&token.val, ch); nextCh(); return token.kind = ASTERISK; } else if (ch == '/') { appendCharToStr(&token.val, ch); nextCh(); return token.kind = SLASH; } else if (ch == '%') { appendCharToStr(&token.val, ch); nextCh(); return token.kind = PERCENT; } else if (ch == '=') { appendCharToStr(&token.val, ch); nextCh(); return token.kind = EQUAL; } else if (ch == '(') { appendCharToStr(&token.val, ch); nextCh(); return token.kind = LPAREN; } else if (ch == ')') { appendCharToStr(&token.val, ch); nextCh(); return token.kind = RPAREN; } else if (ch == ';') { appendCharToStr(&token.val, ch); nextCh(); return token.kind = SEMICOLON; } else if (ch == '^') { appendCharToStr(&token.val, ch); nextCh(); return token.kind = CARET; } else if (isLetter(ch)) { do { appendCharToStr(&token.val, ch); nextCh(); } while (isLetter(ch) || isDecDigit(ch)); return token.kind = IDENTIFIER; } nextCh(); return token.kind = BAD_TOKEN; }