#include #include #include #include #include #include void usage(const char *prg) { fprintf(stderr, "usage: %s tokenkind.txt\n", prg); exit(1); } /* * Similar to Perl: Chops of the trailing newline (actually '\n' on Unix and * '\r\n' on DOS) by overwriting it with a null byte. */ void chopNl(char *line, size_t len) { line[len - 2] == '\r' ? (line[len -2] = 0) : (line[len - 1] = 0); } bool isSpace(int ch) { return ch == ' ' || ch == '\r' || ch == '\f' || ch == '\v' || ch == '\t'; } static bool isLetter(int ch) { return ((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A' && ch <= 'Z')) || ch == '_'; } /* * Split white space separated column with format * [ [ [ field expected\n", *tokenKind); exit(1); } *tokenKindVal = line; for (; *line && !isSpace(*line); ++line) { } if (!isSpace(*line)) { return; } *line = 0; // advance to next column while (isSpace(*++line)) { } if (sscanf(line, "%d", prec) != 1) { fprintf(stderr, "%s: prec field expected\n", *tokenKind); exit(1); } // advance to next column for (; *line && !isSpace(*line); ++line) { } while (isSpace(*++line)) { } if (!*line) { fprintf(stderr, "%s: expression kind expected\n", *tokenKind); exit(1); } *exprKind = line; } void printIndent(FILE *out, int indentLevel) { assert(indentLevel >= 0); fprintf(out, "%*s", indentLevel * 4, ""); } /* * See https://cplusplus.com/reference/cstdarg/va_list/ for details on how * variable arguments can be handled platform independent. */ void printCode(FILE *out, int indentLevel, const char *fmt, ...) { printIndent(out, indentLevel); if (!fmt) { return; } va_list argp; va_start(argp, fmt); vfprintf(out, fmt, argp); va_end(argp); } enum OutSelect { OUT_TOKEN_KIND, OUT_STR_TOKEN_KIND, OUT_TOKEN_KIND_PREC, OUT_MAKE_BINARY_EXPR_KIND, OUT_PARSE_PUNCTUATOR, OUT_PARSE_KEYWORD, NUM_OUT, }; const char *outFilename[NUM_OUT] = { "gen_tokenkind.h", "gen_strtokenkind.c", "gen_tokenkindprec.c", "gen_makebinaryexprkind.c", "gen_parsepunctuator.c", "gen_parsekeyword.c", }; FILE *outFile[NUM_OUT]; void closeOutFiles(void) { for (int i = 0; i < NUM_OUT; ++i) { if (outFile[i]) { fclose(outFile[i]); } } } void openOutFiles(void) { for (int i = 0; i < NUM_OUT; ++i) { outFile[i] = fopen(outFilename[i], "w"); if (!outFile[i]) { fprintf(stderr, "can not open output file %s\n", outFilename[i]); closeOutFiles(); exit(1); } } } void printHeader(enum OutSelect outSelect) { FILE *out = outFile[outSelect]; switch (outSelect) { case OUT_TOKEN_KIND: printCode(out, 0, "enum TokenKind\n"); printCode(out, 0, "{\n"); return; case OUT_STR_TOKEN_KIND: printCode(out, 0, "const char *\n"); printCode(out, 0, "strTokenKind(enum TokenKind tokenKind)\n"); printCode(out, 0, "{\n"); printCode(out, 1, "switch (tokenKind) {\n"); return; case OUT_TOKEN_KIND_PREC: printCode(out, 0, "static int\n"); printCode(out, 0, "tokenKindPrec(enum TokenKind tokenKind)\n"); printCode(out, 0, "{\n"); printCode(out, 1, "switch (tokenKind) {\n"); return; case OUT_MAKE_BINARY_EXPR_KIND: printCode(out, 0, "static enum ExprKind\n"); printCode(out, 0, "makeBinaryExprKind(enum TokenKind tokenKind)\n"); printCode(out, 0, "{\n"); printCode(out, 1, "switch (tokenKind) {\n"); return; case OUT_PARSE_PUNCTUATOR: case OUT_PARSE_KEYWORD: return; default: assert(0); } } void printFooter(enum OutSelect outSelect) { FILE *out = outFile[outSelect]; switch (outSelect) { case OUT_TOKEN_KIND: printCode(out, 0, "};\n"); return; case OUT_STR_TOKEN_KIND: printCode(out, 1, "default:\n"); printCode(out, 2, "fprintf(stderr, \"internal error in strTokenKind: " "tokenKind = %%d\",\n"); printCode(out, 3, "tokenKind);\n"); printCode(out, 2, "finalizeExit(1);\n"); printCode(out, 2, "return \"\";\n"); printCode(out, 1, "}\n"); printCode(out, 0, "}\n"); return; case OUT_TOKEN_KIND_PREC: printCode(out, 1, "default:\n"); printCode(out, 2, "return 0;\n"); printCode(out, 1, "}\n"); printCode(out, 0, "}\n"); return; case OUT_MAKE_BINARY_EXPR_KIND: printCode(out, 1, "default:\n"); printCode(out, 2, "fprintf(stderr, \"internal error in " "makeBinaryExprKind (tokenKind = %%d)\",\n"); printCode(out, 3, "tokenKind);\n"); printCode(out, 2, "finalizeExit(1);\n"); printCode(out, 2, "return 0;\n"); printCode(out, 1, "}\n"); printCode(out, 0, "}\n"); return; case OUT_PARSE_PUNCTUATOR: case OUT_PARSE_KEYWORD: return; default: assert(0); } } void printHeaders(void) { for (int i = 0; i < NUM_OUT; ++i) { printHeader(i); } } void printFooters(void) { for (int i = 0; i < NUM_OUT; ++i) { printFooter(i); } } void printTokenKind(const char *tk) { FILE *out = outFile[OUT_TOKEN_KIND]; printCode(out, 1, "%s,\n", tk); } void printStrTokenKind(const char *tk) { FILE *out = outFile[OUT_STR_TOKEN_KIND]; printCode(out, 1, "case %s:\n", tk); printCode(out, 2, "return \"%s\";\n", tk); } void printTokenKindPrec(const char *tk, int prec) { FILE *out = outFile[OUT_TOKEN_KIND_PREC]; printCode(out, 1, "case %s:\n", tk); printCode(out, 2, "return %d;\n", prec); } void printMakeBinaryExprKind(const char *tk, const char *exprKind) { FILE *out = outFile[OUT_MAKE_BINARY_EXPR_KIND]; printCode(out, 1, "case %s:\n", tk); printCode(out, 2, "return %s;\n", exprKind); } //------------------------------------------------------------------------------ struct CharNode { struct CharNode *next[256]; char *tokenKind; } lexTreePunctuator, lexTreeKeyword; void lexTreeAdd(const char *tk, const char *tkVal) { struct CharNode *n = isLetter(*tkVal) ? &lexTreeKeyword : &lexTreePunctuator; const char *s = tkVal; for (size_t c; (c = *s++); n = n->next[c]) { if (!n->next[c]) { n->next[c] = calloc(1, sizeof(*n->next[c])); if (!n->next[c]) { fprintf(stderr, "lexTreeAdd: out of memeory\n"); exit(1); } } } n->tokenKind = strdup(tk); } void lexTreeDestroy_(struct CharNode *n, struct CharNode *root) { for (int i = 0; i < sizeof(n->next) / sizeof(n->next[0]); ++i) { if (n->next[i]) { lexTreeDestroy_(n->next[i], root); } } if (n->tokenKind) { free(n->tokenKind); } if (n != root) { free(n); } } void lexTreeDestroy(void) { lexTreeDestroy_(&lexTreePunctuator, &lexTreePunctuator); lexTreeDestroy_(&lexTreeKeyword, &lexTreeKeyword); } void printParsePunctuator_(FILE *out, const struct CharNode *n, int level) { int identLevel = level + 1; for (size_t i = 0; i < sizeof(n->next) / sizeof(n->next[0]); ++i) { if (!n->next[i]) { continue; } printCode(out, identLevel, "%sif (ch == '%c') {\n", level ? "" : "} else ", (char)i); printCode(out, identLevel + 1, "appendCharToStr(&token.val, ch);\n"); printCode(out, identLevel + 1, "nextCh();\n"); printParsePunctuator_(out, n->next[i], level + 1); if (level) { printCode(out, identLevel, "}\n"); } } if (n->tokenKind) { printCode(out, identLevel, "return token.kind = %s;\n", n->tokenKind); } } void printParsePunctuator(void) { printParsePunctuator_(outFile[OUT_PARSE_PUNCTUATOR], &lexTreePunctuator, 0); } void printParseKeyword_(FILE *out, const struct CharNode *n, int level) { int identLevel = level + 1; for (size_t i = 0; i < sizeof(n->next) / sizeof(n->next[0]); ++i) { if (!n->next[i]) { continue; } printCode(out, identLevel, "%sif (ch == '%c') {\n", level ? "" : "} else ", (char)i); printCode(out, identLevel + 1, "appendCharToStr(&token.val, ch);\n"); printCode(out, identLevel + 1, "nextCh();\n"); printParseKeyword_(out, n->next[i], level + 1); if (level) { printCode(out, identLevel, "}\n"); } } if (n->tokenKind) { printCode(out, identLevel, "if (!isLetter(ch) || isDecDigit(ch)) {\n"); printCode(out, identLevel + 1, "return token.kind = %s;\n", n->tokenKind); printCode(out, identLevel, "}\n"); } } void printParseKeyword(void) { printParseKeyword_(outFile[OUT_PARSE_KEYWORD], &lexTreeKeyword, 1); } //------------------------------------------------------------------------------ int main(int argc, char *argv[]) { if (argc != 2) { usage(argv[0]); } FILE *in = fopen(argv[1], "r"); if (!in) { fprintf(stderr, "can not open input file '%s'\n", argv[1]); } openOutFiles(); printHeaders(); char *line = 0; size_t capacity = 0; ssize_t len; while ((len = getline(&line, &capacity, in)) > 0) { chopNl(line, len); char *tokenKind, *tokenKindVal, *exprKind; int prec; split(line, &tokenKind, &tokenKindVal, &prec, &exprKind); if (!*line) { continue; } if (tokenKind) { printTokenKind(tokenKind); printStrTokenKind(tokenKind); } if (tokenKindVal) { lexTreeAdd(tokenKind, tokenKindVal); } if (prec) { printTokenKindPrec(tokenKind, prec); printMakeBinaryExprKind(tokenKind, exprKind); } } free(line); printParsePunctuator(); printParseKeyword(); printFooters(); fclose(in); closeOutFiles(); lexTreeDestroy(); }