Auto-generated Code for Token Kinds
The source code for strTokenKind() and the enum constants for enum TokenKind can be generated from a text file that contains in each line the identifier for a token kind. In Quiz 22 you wrote a program for that. This now can be integrated into the project. By modifying the makefile we can make sure that these source files always will be up to date.
Solution for Quiz 22 (With Some Improvement)
On Unix system newline are encode with single character '\n' (ASCII 10). In the Windows world two characters are used '\r' (ASCII 13) and '\n'. In the material provided for Quiz 22 only the Unix case was handled correctly. Line are read with getline() and then the last character (the character before the null byte) overwritten with a null byte. So for a Unix or Windows text files this overwrites the \n'. For a Windows text file we actually have to overwrite the second last character (the '\r') character.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 | #include <stdio.h>
#include <stdlib.h>
void
printEnumHeader(FILE *out)
{
fprintf(out, "enum TokenKind\n");
fprintf(out, "{\n");
}
void
printEnumFooter(FILE *out)
{
fprintf(out, "};\n");
}
void
printStrHeader(FILE *out)
{
fprintf(out, "const char *\n");
fprintf(out, "strTokenKind(enum TokenKind tokenKind)\n");
fprintf(out, "{\n");
fprintf(out, "%*sswitch (tokenKind) {\n", 4, "");
}
void
printStrFooter(FILE *out)
{
fprintf(out, "%*sdefault:\n", 4, "");
fprintf(out,
"%*sfprintf(stderr, \"internal error in strTokenKind: "
"tokenKind = %%d\",",
8, "");
fprintf(out, "%*stokenKind);\n", 4, "");
fprintf(out, "%*sfinalizeExit(1);\n", 4, "");
fprintf(out, "%*sreturn \"\";\n", 4, "");
fprintf(out, "%*s}\n", 4, "");
fprintf(out, "}\n");
}
void
usage(const char *prg)
{
fprintf(stderr, "usage: %s input output1 outpu2\n", prg);
exit(1);
}
int
main(int argc, char *argv[])
{
if (argc != 4) {
usage(argv[0]);
}
FILE *in = fopen(argv[1], "r");
FILE *outEnum = fopen(argv[2], "w");
FILE *outStr = fopen(argv[3], "w");
if (!in) {
fprintf(stderr, "can not open input file '%s'\n", argv[1]);
}
if (!outEnum) {
fprintf(stderr, "can not open output file '%s'\n", argv[2]);
}
if (!outStr) {
fprintf(stderr, "can not open output file '%s'\n", argv[3]);
}
printEnumHeader(outEnum);
printStrHeader(outStr);
char *line = 0;
size_t capacity = 0;
ssize_t len;
while ((len = getline(&line, &capacity, in)) > 0) {
line[len - 2] == '\r' ? (line[len - 2] = 0) : (line[len - 1] = 0);
fprintf(outEnum, "%*s%s,\n", 4, "", line);
fprintf(outStr, "%*scase %s:\n", 4, "", line);
fprintf(outStr, "%*sreturn \"%s\";\n", 8, "", line);
}
free(line);
printEnumFooter(outEnum);
printStrFooter(outStr);
fclose(in);
fclose(outEnum);
fclose(outStr);
}
|
My Status of the ABC Project
In my project the makefile was modified so that program with the prefix xgen are considered to generate source files that are required. Hence, the program make_tokenkind.c was renamed into xgen_tokenkind.c.
The source files that need to be generated can be specified in a variable. Here you can see that first the executable xgen_tokenkind get created and then form tokenkind.txt the source files gen_tokenkind.h and gen_strtokenkind.c are generated. If xgen_tokenkind.c or tokenkind.txt will be changed then the generated source files will be updated and all object files and targets that depend on the generated source files.
theon$ make gcc -o xgen_tokenkind xgen_tokenkind.c -lm xgen_tokenkind tokenkind.txt gen_tokenkind.h gen_strtokenkind.c gcc -c -Wall -Wcast-qual -MT xtest_calc.o -MMD -MP -MF xtest_calc.c.d xtest_calc.c gcc -c -Wall -Wcast-qual -MT expr.o -MMD -MP -MF expr.c.d expr.c gcc -c -Wall -Wcast-qual -MT finalize.o -MMD -MP -MF finalize.c.d finalize.c gcc -c -Wall -Wcast-qual -MT lexer.o -MMD -MP -MF lexer.c.d lexer.c gcc -c -Wall -Wcast-qual -MT parser.o -MMD -MP -MF parser.c.d parser.c gcc -c -Wall -Wcast-qual -MT str.o -MMD -MP -MF str.c.d str.c gcc -c -Wall -Wcast-qual -MT sym.o -MMD -MP -MF sym.c.d sym.c gcc -c -Wall -Wcast-qual -MT tokenkind.o -MMD -MP -MF tokenkind.c.d tokenkind.c gcc -c -Wall -Wcast-qual -MT ustr.o -MMD -MP -MF ustr.c.d ustr.c gcc -o xtest_calc xtest_calc.o expr.o finalize.o lexer.o parser.o str.o sym.o tokenkind.o ustr.o -lm gcc -c -Wall -Wcast-qual -MT xtest_expr.o -MMD -MP -MF xtest_expr.c.d xtest_expr.c gcc -o xtest_expr xtest_expr.o expr.o finalize.o lexer.o parser.o str.o sym.o tokenkind.o ustr.o -lm gcc -c -Wall -Wcast-qual -MT xtest_finalize.o -MMD -MP -MF xtest_finalize.c.d xtest_finalize.c gcc -o xtest_finalize xtest_finalize.o expr.o finalize.o lexer.o parser.o str.o sym.o tokenkind.o ustr.o -lm gcc -c -Wall -Wcast-qual -MT xtest_lexer_sanity.o -MMD -MP -MF xtest_lexer_sanity.c.d xtest_lexer_sanity.c gcc -o xtest_lexer_sanity xtest_lexer_sanity.o expr.o finalize.o lexer.o parser.o str.o sym.o tokenkind.o ustr.o -lm gcc -c -Wall -Wcast-qual -MT xtest_lexer.o -MMD -MP -MF xtest_lexer.c.d xtest_lexer.c gcc -o xtest_lexer xtest_lexer.o expr.o finalize.o lexer.o parser.o str.o sym.o tokenkind.o ustr.o -lm gcc -c -Wall -Wcast-qual -MT xtest_sym.o -MMD -MP -MF xtest_sym.c.d xtest_sym.c gcc -o xtest_sym xtest_sym.o expr.o finalize.o lexer.o parser.o str.o sym.o tokenkind.o ustr.o -lm gcc -c -Wall -Wcast-qual -MT xtest_ustr.o -MMD -MP -MF xtest_ustr.c.d xtest_ustr.c gcc -o xtest_ustr xtest_ustr.o expr.o finalize.o lexer.o parser.o str.o sym.o tokenkind.o ustr.o -lm gcc -c -Wall -Wcast-qual -MT xgen_tokenkind.o -MMD -MP -MF xgen_tokenkind.c.d xgen_tokenkind.c theon$
Here all files from my project at the current status:
#include <assert.h>
#include <inttypes.h>
#include <math.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include "expr.h"
#include "finalize.h"
#include "sym.h"
struct Expr
{
enum ExprKind kind;
union
{
struct
{
const struct Expr *left, *right;
} binary;
const struct Expr *unary;
struct
{
struct
{
uint64_t uint;
} literal;
const struct UStr *identifier;
} primary;
};
};
// constructors
static struct Expr *
newExpr(void)
{
struct Expr *expr = malloc(sizeof(*expr));
if (!expr) {
fprintf(stderr, "newExpr: out of memory\n");
finalizeExit(1);
}
return expr;
}
struct Expr *
newUnsignedLiteralExpr(uint64_t uint)
{
struct Expr *expr = newExpr();
expr->kind = EK_UNSIGNED_LITERAL;
expr->primary.literal.uint = uint;
return expr;
}
struct Expr *
newIdentifierExpr(const struct UStr *identifier)
{
assert(identifier);
struct Expr *expr = newExpr();
expr->kind = EK_IDENTIFIER;
expr->primary.identifier = identifier;
return expr;
}
struct Expr *
newUnaryExpr(enum ExprKind kind, const struct Expr *unary)
{
assert(kind >= EK_UNARY && kind < EK_UNARY_END);
assert(unary);
struct Expr *expr = newExpr();
expr->kind = kind;
expr->unary = unary;
return expr;
}
struct Expr *
newBinaryExpr(enum ExprKind kind, const struct Expr *left,
const struct Expr *right)
{
assert(kind >= EK_BINARY && kind < EK_BINARY_END);
assert(left);
assert(right);
struct Expr *expr = newExpr();
expr->kind = kind;
expr->binary.left = left;
expr->binary.right = right;
return expr;
}
// destructor
void
deleteExpr(const struct Expr *expr)
{
assert(expr);
assert(expr->kind >= EK_BINARY && expr->kind < EK_PRIMARY_END);
if (expr->kind >= EK_BINARY && expr->kind < EK_BINARY_END) {
deleteExpr(expr->binary.left);
deleteExpr(expr->binary.right);
} else if (expr->kind >= EK_UNARY && expr->kind < EK_UNARY_END) {
deleteExpr(expr->unary);
}
free((struct Expr *)(uintptr_t)expr);
}
// methods
bool
isLValueExpr(const struct Expr *expr)
{
return expr->kind == EK_IDENTIFIER;
}
double
evalExpr(const struct Expr *expr)
{
assert(expr);
assert(expr->kind >= EK_BINARY && expr->kind < EK_PRIMARY_END);
if (expr->kind >= EK_BINARY && expr->kind < EK_UNARY_END) {
switch (expr->kind) {
case EK_ADD:
return evalExpr(expr->binary.left) +
evalExpr(expr->binary.right);
case EK_ASSIGN:
{
const struct Expr *left = expr->binary.left;
assert(isLValueExpr(left));
struct Sym *lValue = SymFind(left->primary.identifier);
return lValue->value = evalExpr(expr->binary.right);
}
case EK_SUB:
return evalExpr(expr->binary.left) -
evalExpr(expr->binary.right);
case EK_MUL:
return evalExpr(expr->binary.left) *
evalExpr(expr->binary.right);
case EK_DIV:
return evalExpr(expr->binary.left) /
evalExpr(expr->binary.right);
case EK_POW:
return pow(evalExpr(expr->binary.left),
evalExpr(expr->binary.right));
case EK_UNARY_MINUS:
return -evalExpr(expr->unary);
case EK_UNARY_PLUS:
return evalExpr(expr->unary);
default:;
}
} else if (expr->kind == EK_IDENTIFIER) {
struct Sym *sym = SymFind(expr->primary.identifier);
assert(sym);
return sym->value;
} else if (expr->kind == EK_UNSIGNED_LITERAL) {
return expr->primary.literal.uint;
}
fprintf(stderr, "evalExpr: internal error. kind = %d\n", expr->kind);
finalizeExit(1);
return 0; // never reached; prevent compiler warning
}
static void
printIndent(size_t indent, FILE *out)
{
for (size_t i = 0; i < indent * 4; ++i) {
fprintf(out, " ");
}
}
static void
printExprNode(const struct Expr *expr, size_t indent, FILE *out)
{
assert(expr);
printIndent(indent, out);
if (expr->kind >= EK_BINARY && expr->kind < EK_UNARY_END) {
switch (expr->kind) {
case EK_ADD:
fprintf(out, "[ +\n");
return;
case EK_ASSIGN:
fprintf(out, "[ {=}\n");
return;
case EK_SUB:
fprintf(out, "[ -\n");
return;
case EK_MUL:
fprintf(out, "[ *\n");
return;
case EK_DIV:
fprintf(out, "[ /\n");
return;
case EK_POW:
fprintf(out, "[ $\\hat{}$\n");
return;
case EK_UNARY_MINUS:
fprintf(out, "[ -\n");
return;
case EK_UNARY_PLUS:
fprintf(out, "[ +\n");
return;
default:;
}
} else if (expr->kind == EK_UNSIGNED_LITERAL) {
fprintf(out, "[ %" PRIu64 "]\n", expr->primary.literal.uint);
return;
} else if (expr->kind == EK_IDENTIFIER) {
fprintf(out, "[ %s ]\n", expr->primary.identifier->cstr);
return;
}
fprintf(stderr, "printExprNode: internal error. kind = %d\n", expr->kind);
finalizeExit(1);
}
static void
printExprTree_(const struct Expr *expr, size_t indent, FILE *out)
{
assert(expr);
assert(expr->kind >= EK_BINARY && expr->kind < EK_PRIMARY_END);
if (expr->kind >= EK_BINARY && expr->kind < EK_BINARY_END) {
printExprNode(expr, indent, out);
printExprTree_(expr->binary.left, indent + 1, out);
printExprTree_(expr->binary.right, indent + 1, out);
printIndent(indent, out);
fprintf(out, "]\n");
} else if (expr->kind >= EK_UNARY && expr->kind < EK_UNARY_END) {
printExprNode(expr, indent, out);
printExprTree_(expr->unary, indent + 1, out);
printIndent(indent, out);
fprintf(out, "]\n");
} else {
printExprNode(expr, indent, out);
}
}
void
printExprTree(const struct Expr *expr, FILE *out)
{
fprintf(out, "\\begin{center}\n");
fprintf(out, "\\begin{forest}\n");
fprintf(out, "for tree={draw,circle,calign=fixed edge angles}\n");
printExprTree_(expr, 0, out);
fprintf(out, "\\end{forest}\n");
fprintf(out, "\\end{center}\n");
}
#ifndef ABC_EXPR_H
#define ABC_EXPR_H
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include "ustr.h"
enum ExprKind
{
EK_BINARY,
// binary expression
EK_ADD = EK_BINARY,
EK_ASSIGN,
EK_SUB,
EK_MUL,
EK_DIV,
EK_POW,
EK_BINARY_END,
EK_UNARY = EK_BINARY_END,
// unary expression
EK_UNARY_MINUS = EK_UNARY,
EK_UNARY_PLUS,
EK_UNARY_END,
EK_PRIMARY = EK_UNARY_END,
// primary expression
EK_UNSIGNED_LITERAL = EK_PRIMARY,
EK_IDENTIFIER,
EK_PRIMARY_END,
};
struct Expr;
// constructors
struct Expr *newUnsignedLiteralExpr(uint64_t uint);
struct Expr *newIdentifierExpr(const struct UStr *identifier);
struct Expr *newUnaryExpr(enum ExprKind kind, const struct Expr *unary);
struct Expr *newBinaryExpr(enum ExprKind kind, const struct Expr *left,
const struct Expr *right);
// destrcutor
void deleteExpr(const struct Expr *expr);
// methods
bool isLValueExpr(const struct Expr *expr);
double evalExpr(const struct Expr *expr);
void printExprTree(const struct Expr *expr, FILE *out);
#endif // ABC_EXPR_H
#include <stdlib.h>
#include <stdio.h>
#include "finalize.h"
struct Node
{
struct Node *next;
void (*callback)(void);
};
static struct Node *list;
void
finalizeRegister(void (*callback)(void))
{
struct Node *n = malloc(sizeof(*n));
if (!n) {
fprintf(stderr, "finalizeRegister: out of memory\n");
finalizeExit(1);
}
// initialize list node and prepend to list
n->next = list;
n->callback = callback;
list = n;
}
void
finalize(void)
{
for (struct Node *n = list, *next; n; n = next) {
// keep copy of pointer to next node
next = n->next;
// callback
n->callback();
// free node itself
free(n);
}
}
void
finalizeExit(int code)
{
finalize();
exit(code);
}
#ifndef ABC_FINALIZE_H
#define ABC_FINALIZE_H
void finalizeRegister(void (*callback)(void));
void finalize(void);
void finalizeExit(int code);
#endif // ABC_FINALIZE_H
#include <stdbool.h>
#include <stdio.h>
#include "finalize.h"
#include "lexer.h"
struct Token token;
static void
cleanup(void)
{
releaseStr(&token.val);
}
//------------------------------------------------------------------------------
// position of current character ch
static struct TokenPos curr = {
1,
0,
};
static int ch;
static int
nextCh(void)
{
++curr.col;
ch = getchar();
if (ch == '\n') {
++curr.line;
curr.col = 0;
}
return ch;
}
static bool
isWhiteSpace(int ch)
{
return ch == ' ' || ch == '\t';
}
static bool
isDecDigit(int ch)
{
return ch >= '0' && ch <= '9';
}
static bool
isOctDigit(int ch)
{
return ch >= '0' && ch <= '7';
}
static bool
isHexDigit(int ch)
{
return isDecDigit(ch) || (ch >= 'a' && ch <= 'f') ||
(ch >= 'A' && ch <= 'F');
}
static bool
isLetter(int ch)
{
return ((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A' && ch <= 'Z')) ||
ch == '_';
}
enum TokenKind
getToken(void)
{
static bool first = true;
if (first) {
first = false;
finalizeRegister(cleanup);
}
// init ch, skip white spaces and newlines
while (ch == 0 || isWhiteSpace(ch) || ch == '\n') {
nextCh();
}
token.pos.line = curr.line;
token.pos.col = curr.col;
clearStr(&token.val);
if (ch == EOF) {
return token.kind = EOI;
} else if (isDecDigit(ch)) {
// parse literal
if (ch == '0') {
appendCharToStr(&token.val, ch);
nextCh();
if (ch == 'x') {
appendCharToStr(&token.val, ch);
nextCh();
if (isHexDigit(ch)) {
while (isHexDigit(ch)) {
appendCharToStr(&token.val, ch);
nextCh();
}
return token.kind = HEX_LITERAL;
}
return token.kind = BAD_TOKEN;
}
while (isOctDigit(ch)) {
appendCharToStr(&token.val, ch);
nextCh();
}
return token.kind = OCT_LITERAL;
} else if (isDecDigit(ch)) {
while (isDecDigit(ch)) {
appendCharToStr(&token.val, ch);
nextCh();
}
return token.kind = DEC_LITERAL;
}
} else if (ch == '+') {
appendCharToStr(&token.val, ch);
nextCh();
return token.kind = PLUS;
} else if (ch == '-') {
appendCharToStr(&token.val, ch);
nextCh();
return token.kind = MINUS;
} else if (ch == '*') {
appendCharToStr(&token.val, ch);
nextCh();
return token.kind = ASTERISK;
} else if (ch == '/') {
appendCharToStr(&token.val, ch);
nextCh();
return token.kind = SLASH;
} else if (ch == '%') {
appendCharToStr(&token.val, ch);
nextCh();
return token.kind = PERCENT;
} else if (ch == '=') {
appendCharToStr(&token.val, ch);
nextCh();
return token.kind = EQUAL;
} else if (ch == '(') {
appendCharToStr(&token.val, ch);
nextCh();
return token.kind = LPAREN;
} else if (ch == ')') {
appendCharToStr(&token.val, ch);
nextCh();
return token.kind = RPAREN;
} else if (ch == ';') {
appendCharToStr(&token.val, ch);
nextCh();
return token.kind = SEMICOLON;
} else if (ch == '^') {
appendCharToStr(&token.val, ch);
nextCh();
return token.kind = CARET;
} else if (isLetter(ch)) {
do {
appendCharToStr(&token.val, ch);
nextCh();
} while (isLetter(ch) || isDecDigit(ch));
return token.kind = IDENTIFIER;
}
nextCh();
return token.kind = BAD_TOKEN;
}
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 | #ifndef ABC_LEXER_H
#define ABC_LEXER_H
#include <stddef.h>
#include "str.h"
#include "tokenkind.h"
enum TokenKind getToken(void);
struct Token
{
enum TokenKind kind;
struct TokenPos
{
size_t line, col;
} pos;
struct Str val;
};
extern struct Token token;
#endif // ABC_LEXER_H
|
CPPFLAGS += -Wall -Wcast-qual
LDFLAGS += -lm
#
# patch: If user has not defined CC and default value does not exist use gcc
#
ifeq ($(origin CC),default)
cc_check := $(shell $(CC) -v > /dev/null 2>&1 && echo "sane")
ifneq ($(strip $(cc_check)),sane)
CC := gcc
endif
endif
#
# List of files that need to be generated before compilation and rules to
# generate them
#
generated_files := gen_tokenkind.h gen_strtokenkind.c
gen_tokenkind.h gen_strtokenkind.c : tokenkind.txt | xgen_tokenkind
xgen_tokenkind $^ gen_tokenkind.h gen_strtokenkind.c
#
# Define list of source files, object files, targets, etc
#
# all source files
src :=\
$(filter-out gen_%,\
$(wildcard *.c))
# all object files
obj :=\
$(patsubst %.c,%.o,\
$(src))
# all targets (test programs)
target :=\
$(filter xtest%,\
$(patsubst %.c,%,\
$(src)))
# all generators for source files
generator :=\
$(filter xgen%,\
$(patsubst %.c,%,\
$(src)))
# objects that are required by the targets
lib.o :=\
$(filter-out xtest% xgen%,\
$(obj))
# dependency file that will be generated by compiler
deps :=\
$(patsubst %,%.d,\
$(src))
# dependency file leftovers of gone source files
obsolete.deps:=\
$(filter-out $(deps),\
$(wildcard *.c.d))
#
# Build rules
#
.PHONY: all
.DEFAULT_GOAL := all
all: $(target) $(obj) $(generator)
# rule for removing obsolete dependency files
.PHONY: $(obsolete.deps)
$(obsolete.deps) :
$(RM) $(obsolete.deps)
# delete implicit rule for building an executable directly from its source file
% : %.c
# rule for source file generators
xgen% : xgen%.c
$(CC) -o $@ $^ $(LDFLAGS)
# our rule: to build target link its object file against library object files
%: %.o $(lib.o) | $(obsolete.deps)
$(CC) -o $@ $^ $(LDFLAGS)
# our rule to build objects: also generate a dependency file
%.o: %.c | $(obsolete.deps) $(generated_files)
$(CC) -c $(CPPFLAGS) $(CFLAGS) -MT $@ -MMD -MP -MF $<.d $<
.PHONY: clean
clean:
$(RM) $(target) $(generator) $(obj) $(deps) $(obsolete.deps)
$(RM) $(generated_files)
#
# Include dependencies (if already generated)
#
-include $(deps)
#include <assert.h>
#include <inttypes.h>
#include <math.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include "expr.h"
#include "finalize.h"
#include "lexer.h"
#include "parser.h"
#include "sym.h"
#include "tokenkind.h"
// for log support
static FILE *out;
void
setParserLog(FILE *out_)
{
out = out_;
}
// error handling
void
expectedError(const char *expectedStr)
{
fprintf(stderr, "%zu.%zu: error expected '%s' got '%s'\n", token.pos.line,
token.pos.col, expectedStr, strTokenKind(token.kind));
finalizeExit(1);
}
void
errorAtPos(struct TokenPos pos, const char *msg)
{
fprintf(stderr, "%zu.%zu: %s\n", pos.line, pos.col, msg);
finalizeExit(1);
}
void
expected(enum TokenKind tokenKind)
{
if (tokenKind != token.kind) {
expectedError(strTokenKind(tokenKind));
}
}
// parse functions
void parseExprStatement(void);
const struct Expr *parseAssignmentExpr(void);
const struct Expr *parseExpr(void);
const struct Expr *parseTerm(void);
const struct Expr *parsePowerExpr(void);
const struct Expr *parseUnaryExpr(void);
const struct Expr *parseFactor(void);
void
parse(void)
{
while (token.kind != EOI) {
parseExprStatement();
}
}
void
parseExprStatement(void)
{
const struct Expr *expr = parseAssignmentExpr();
printf("> %lf\n", evalExpr(expr));
if (out) {
printExprTree(expr, out);
}
deleteExpr(expr);
expected(SEMICOLON);
getToken();
}
const struct Expr *
parseAssignmentExpr(void)
{
struct TokenPos pos = token.pos;
const struct Expr *expr = parseExpr();
while (token.kind == EQUAL) {
if (!isLValueExpr(expr)) {
// instead of many error functions we need a one error handling
// function that is more flexible to use -> CBE about ellipse
errorAtPos(pos, "L-value expected");
}
getToken();
const struct Expr *exprRight = parseAssignmentExpr();
expr = newBinaryExpr(EK_ASSIGN, expr, exprRight);
}
return expr;
}
const struct Expr *
parseExpr(void)
{
const struct Expr *expr = parseTerm();
while (token.kind == PLUS || token.kind == MINUS) {
enum TokenKind op = token.kind;
getToken();
const struct Expr *exprRight = parseTerm();
if (op == PLUS) {
expr = newBinaryExpr(EK_ADD, expr, exprRight);
} else {
expr = newBinaryExpr(EK_SUB, expr, exprRight);
}
}
return expr;
}
const struct Expr *
parseTerm(void)
{
const struct Expr *expr = parsePowerExpr();
while (token.kind == ASTERISK || token.kind == SLASH) {
enum TokenKind op = token.kind;
getToken();
const struct Expr *exprRight = parsePowerExpr();
if (op == ASTERISK) {
expr = newBinaryExpr(EK_MUL, expr, exprRight);
} else {
expr = newBinaryExpr(EK_DIV, expr, exprRight);
}
}
return expr;
}
const struct Expr *
parsePowerExpr(void)
{
const struct Expr *expr = parseUnaryExpr();
while (token.kind == CARET) {
getToken();
const struct Expr *exprRight = parsePowerExpr();
expr = newBinaryExpr(EK_POW, expr, exprRight);
}
return expr;
}
const struct Expr *
parseUnaryExpr(void)
{
if (token.kind == PLUS || token.kind == MINUS) {
enum TokenKind op = token.kind;
getToken();
const struct Expr *expr = parseUnaryExpr();
if (op == MINUS) {
return newUnaryExpr(EK_UNARY_MINUS, expr);
}
return newUnaryExpr(EK_UNARY_PLUS, expr);
}
return parseFactor();
}
const struct Expr *
parseFactor(void)
{
if (token.kind == IDENTIFIER) {
const struct UStr *identifier = UStrAdd(token.val.cstr);
struct Sym *sym = SymFind(identifier);
if (!sym) {
SymAdd(identifier);
}
getToken();
return newIdentifierExpr(identifier);
} else if (token.kind == DEC_LITERAL) {
uint64_t uint = strtoull(token.val.cstr, 0, 10);
getToken();
return newUnsignedLiteralExpr(uint);
} else if (token.kind == HEX_LITERAL) {
uint64_t uint = strtoull(token.val.cstr, 0, 10);
getToken();
return newUnsignedLiteralExpr(uint);
} else if (token.kind == OCT_LITERAL) {
uint64_t uint = strtoull(token.val.cstr, 0, 10);
getToken();
return newUnsignedLiteralExpr(uint);
} else if (token.kind == LPAREN) {
getToken();
const struct Expr *expr = parseAssignmentExpr();
expected(RPAREN);
getToken();
return expr;
} else {
expectedError("factor");
return 0; // never reached
}
}
#ifndef ABC_PARSER_H
#define ABC_PARSER_H
#include <stdio.h>
void setParserLog(FILE *out);
void parse(void);
#endif // ABC_PARSER_H
ABC: ABC is A Bloody Compiler
=============================
This is our compiler project
## How to use
Use `make` to ...
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include "finalize.h"
#include "str.h"
enum
{
MIN_CAPACITY = 8
};
void
releaseStr(const struct Str *str)
{
free((char *)(uintptr_t)str->cstr);
}
void
clearStr(struct Str *str)
{
if (str->capacity == 0) {
str->end = str->cstr = malloc(MIN_CAPACITY);
if (!str->cstr) {
fprintf(stderr, "clearStr: out of memory\n");
finalizeExit(1);
}
str->capacity = MIN_CAPACITY;
}
*(str->end = str->cstr) = 0;
}
void
appendCharToStr(struct Str *str, char c)
{
size_t len = str->end - str->cstr; // length without terminating 0
// check if another character and 0 byte fits into string
if (len + 2 > str->capacity) {
str->capacity = len + 2;
if (str->capacity < MIN_CAPACITY) {
str->capacity = MIN_CAPACITY;
} else {
str->capacity *= 2;
}
str->cstr = realloc(str->cstr, str->capacity);
if (!str->cstr) {
fprintf(stderr, "appendCharToStr: out of memory\n");
finalizeExit(1);
}
str->end = str->cstr + len;
}
*str->end++ = c;
*str->end = 0;
}
#ifndef ABC_STR_H
#define ABC_STR_H
#include <stddef.h>
struct Str
{
char *cstr, *end;
size_t capacity;
};
// destructor
void releaseStr(const struct Str *str);
// set str->cstr to empty string
void clearStr(struct Str *str);
// append character to str->cstr
void appendCharToStr(struct Str *str, char c);
#endif // ABC_STR_H
#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include "finalize.h"
#include "sym.h"
struct SymTabNode
{
struct SymTabNode *next;
struct Sym sym;
};
static struct SymTabNode *symTab;
static void
cleanup(void)
{
for (struct SymTabNode *n = symTab, *next; n; n = next) {
next = n->next;
free(n);
}
}
struct Sym *
SymAdd(const struct UStr *identifier)
{
assert(identifier);
static bool first = true;
if (first) {
first = false;
finalizeRegister(cleanup);
}
struct Sym *found = SymFind(identifier);
if (found) {
return found;
}
struct SymTabNode *n = malloc(sizeof(*n));
if (!n) {
fprintf(stderr, "SymAdd: out of memory\n");
finalizeExit(1);
}
// initialize list element
*(const struct UStr **)(uintptr_t)&n->sym.identifier = identifier;
n->sym.value = 0;
// prepend to list
n->next = symTab;
symTab = n;
return &n->sym;
}
struct Sym *
SymFind(const struct UStr *identifier)
{
assert(identifier);
for (struct SymTabNode *n = symTab; n; n = n->next) {
if (n->sym.identifier == identifier) {
return &n->sym;
}
}
return 0;
}
#ifndef ABC_SYM_H
#define ABC_SYM_H
#include "ustr.h"
struct Sym
{
const struct UStr * const identifier;
double value;
};
struct Sym *SymAdd(const struct UStr *identifier);
struct Sym *SymFind(const struct UStr *identifier);
#endif // ABC_SYM_H
2 + 5 / 3 * 2;
(2 + 5) / 3;
((10 - 213) * 25) + 27;
(7 - 3) * 4^3;
(1 + 1) * 1 - 2;
--1;
1--1^2;
1--1^3;
1++2^2;
4 + 1 * 3;
-1^2;
1-1^2;
1+-1^2;
a = 5;
b = 42;
c = (a + b) *2;
123 0123 0xaB12 abc +-/*%^()
#include <stdlib.h>
#include <stdio.h>
#include "finalize.h"
#include "tokenkind.h"
#include "gen_strtokenkind.c"
1 2 3 4 5 6 7 8 | #ifndef TOKENKIND_H
#define TOKENKIND_H
#include "gen_tokenkind.h"
const char *strTokenKind(enum TokenKind tokenKind);
#endif // TOKENKIND_H
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | EOI
BAD_TOKEN
HEX_LITERAL
OCT_LITERAL
DEC_LITERAL
PLUS
MINUS
ASTERISK
SLASH
PERCENT
EQUAL
LPAREN
RPAREN
SEMICOLON
CARET
IDENTIFIER
|
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "finalize.h"
#include "ustr.h"
struct Node
{
struct Node *next;
struct UStr ustr;
};
static struct Node *node;
static void
cleanup(void)
{
for (struct Node *n = node, *next; n; n = next) {
next = n->next;
free(n);
}
}
const struct UStr *
UStrAdd_(const char *s, bool *added)
{
static bool first = true;
if (first) {
first = false;
finalizeRegister(cleanup);
}
size_t len = strlen(s);
if (added) {
*added = true;
}
for (struct Node *n = node; n; n = n->next) {
if (len == n->ustr.len && !strcmp(s, n->ustr.cstr)) {
if (added) {
*added = false;
}
return &n->ustr;
}
}
struct Node *n = malloc(len + 1 + sizeof(size_t) + sizeof(struct Node *));
if (!n) {
fprintf(stderr, "makeUStr: out of memory\n");
abort();
}
n->next = node;
n->ustr.len = len;
strcpy(n->ustr.cstr, s);
node = n;
return &node->ustr;
}
const struct UStr *
UStrAdd(const char *s)
{
return UStrAdd_(s, 0);
}
void
UStrPrintPool(void)
{
for (const struct Node *n = node; n; n = n->next) {
printf("%s\n", n->ustr.cstr);
}
}
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 | #ifndef UTILS_USTR_H
#define UTILS_USTR_H
#include <stdbool.h>
#include <stddef.h>
struct UStr
{
size_t len;
char cstr[];
};
const struct UStr *UStrAdd_(const char *s, bool *added);
const struct UStr *UStrAdd(const char *s);
void UStrPrintPool(void);
#endif // UTILS_USTR_H
|
#include <stdio.h>
#include <stdlib.h>
void
printEnumHeader(FILE *out)
{
fprintf(out, "enum TokenKind\n");
fprintf(out, "{\n");
}
void
printEnumFooter(FILE *out)
{
fprintf(out, "};\n");
}
void
printStrHeader(FILE *out)
{
fprintf(out, "const char *\n");
fprintf(out, "strTokenKind(enum TokenKind tokenKind)\n");
fprintf(out, "{\n");
fprintf(out, "%*sswitch (tokenKind) {\n", 4, "");
}
void
printStrFooter(FILE *out)
{
fprintf(out, "%*sdefault:\n", 4, "");
fprintf(out,
"%*sfprintf(stderr, \"internal error in strTokenKind: "
"tokenKind = %%d\",",
8, "");
fprintf(out, "%*stokenKind);\n", 4, "");
fprintf(out, "%*sfinalizeExit(1);\n", 4, "");
fprintf(out, "%*sreturn \"\";\n", 4, "");
fprintf(out, "%*s}\n", 4, "");
fprintf(out, "}\n");
}
void
usage(const char *prg)
{
fprintf(stderr, "usage: %s input output1 outpu2\n", prg);
exit(1);
}
int
main(int argc, char *argv[])
{
if (argc != 4) {
usage(argv[0]);
}
FILE *in = fopen(argv[1], "r");
FILE *outEnum = fopen(argv[2], "w");
FILE *outStr = fopen(argv[3], "w");
if (!in) {
fprintf(stderr, "can not open input file '%s'\n", argv[1]);
}
if (!outEnum) {
fprintf(stderr, "can not open output file '%s'\n", argv[2]);
}
if (!outStr) {
fprintf(stderr, "can not open output file '%s'\n", argv[3]);
}
printEnumHeader(outEnum);
printStrHeader(outStr);
char *line = 0;
size_t capacity = 0;
ssize_t len;
while ((len = getline(&line, &capacity, in)) > 0) {
line[len - 2] == '\r' ? (line[len -2] = 0) : (line[len - 1] = 0);
fprintf(outEnum, "%*s%s,\n", 4, "", line);
fprintf(outStr, "%*scase %s:\n", 4, "", line);
fprintf(outStr, "%*sreturn \"%s\";\n", 8, "", line);
}
free(line);
printEnumFooter(outEnum);
printStrFooter(outStr);
fclose(in);
fclose(outEnum);
fclose(outStr);
}
#include <stdlib.h>
#include <stdio.h>
#include "finalize.h"
#include "lexer.h"
#include "parser.h"
void
printHeader(FILE *out)
{
fprintf(out, "\\documentclass[preview, margin=0.2cm]{standalone}\n");
fprintf(out, "\\usepackage{amsmath}\n");
fprintf(out, "\\usepackage{forest}\n");
fprintf(out, "\\begin{document}\n");
}
void
printFooter(FILE *out)
{
fprintf(out, "\\end{document}\n");
}
void
usage(const char *prg)
{
fprintf(stderr, "usage: %s [out]\n", prg);
finalizeExit(1);
}
int
main(int argc, char *argv[])
{
FILE *out = 0;
if (argc > 2) {
usage(argv[0]);
} else if (argc == 2) {
out = fopen(argv[1], "w");
if (!out) {
fprintf(stderr, "can not open output file %s\n", argv[1]);
finalizeExit(1);
}
}
if (out) {
setParserLog(out);
printHeader(out);
}
getToken();
parse();
if (out) {
printFooter(out);
fclose(out);
}
finalize();
}
#include <stdio.h>
#include "expr.h"
#include "finalize.h"
#include "sym.h"
int
main(void)
{
SymAdd(UStrAdd("x"));
struct Expr *x = newIdentifierExpr(UStrAdd("x"));
SymAdd(UStrAdd("y"));
struct Expr *y = newIdentifierExpr(UStrAdd("y"));
struct Expr *val1 = newUnsignedLiteralExpr(1);
struct Expr *val2 = newUnsignedLiteralExpr(2);
struct Expr *val42 = newUnsignedLiteralExpr(42);
struct Expr *sum1 = newBinaryExpr(EK_ADD, x, val42);
struct Expr *sum2 = newBinaryExpr(EK_ADD, y, val1);
struct Expr *assign1 = newBinaryExpr(EK_ASSIGN, y, sum1);
struct Expr *assign2 = newBinaryExpr(EK_ASSIGN, sum2, val2);
printExprTree(assign1, stderr);
printExprTree(assign2, stderr);
if (isLValueExpr(y)) {
printf("assign1 = %lf\n", evalExpr(assign1));
} else {
printf("Lvalue expected\n");
}
if (isLValueExpr(sum2)) {
printf("assign2 = %lf\n", evalExpr(assign2));
} else {
printf("Lvalue expected\n");
}
finalize();
}
#include <stdio.h>
#include "finalize.h"
void
foo(void)
{
printf("foo called!\n");
}
void
bar(void)
{
printf("bar called!\n");
}
int
main(void)
{
// register some callbacks
finalizeRegister(foo);
finalizeRegister(bar);
// do something ...
// ...
// finalize and exit with exit code 1
finalizeExit(1);
}
#include <stdio.h>
#include "finalize.h"
#include "lexer.h"
int
main(void)
{
while (getToken() != EOI) {
printf("%zu.%zu: %s '%s'\n", token.pos.line, token.pos.col,
strTokenKind(token.kind), token.val.cstr);
}
finalize();
}
#include <stdio.h>
#include <stdlib.h>
#include "finalize.h"
#include "lexer.h"
int
main(void)
{
enum TokenKind kind;
while ((kind = getToken()) != EOI) {
if (kind != token.kind) {
fprintf(stderr, "getToken() returned %d but token.kind equals %d\n", kind,
token.kind);
finalizeExit(1);
}
}
finalize();
}
#include <stdio.h>
#include "finalize.h"
#include "sym.h"
#include "ustr.h"
void
assignOrInsert(const struct UStr *identifier, double value)
{
SymAdd(identifier)->value = value;
}
void
lookup(const struct UStr *identifier)
{
const struct Sym *found = SymFind(identifier);
if (found) {
printf("%s = %lf\n", found->identifier->cstr, found->value);
} else {
printf("%s not defined\n", identifier->cstr);
}
}
int
main(void)
{
// x = 42;
assignOrInsert(UStrAdd("x"), 42);
// x = 2;
assignOrInsert(UStrAdd("x"), 2);
// y = 5;
assignOrInsert(UStrAdd("y"), 5);
// add symbol with default value
SymAdd(UStrAdd("a"));
// lookup some symbols
lookup(UStrAdd("a"));
lookup(UStrAdd("x"));
lookup(UStrAdd("y"));
lookup(UStrAdd("z"));
finalize();
}
#include <stdlib.h>
#include <stdio.h>
#include "finalize.h"
#include "ustr.h"
int
main(void)
{
const struct UStr *kw[] = {
UStrAdd("if"),
UStrAdd("for"),
UStrAdd("while"),
};
char *line = 0;
size_t capacity = 0;
ssize_t len;
while ((len = getline(&line, &capacity, stdin)) > 0) {
line[len - 1] = 0;
bool added, error = false;
const struct UStr *identifier = UStrAdd_(line, &added);
for (size_t i = 0; i < sizeof(kw) / sizeof(kw[0]); ++i) {
if (kw[i] == identifier) {
printf("can not use keyword as identifier\n");
error = true;
}
}
if (error) {
continue;
}
if (added) {
printf("new ");
}
printf("identifier '%s'\n", identifier->cstr);
}
free(line); // ok even if line is null pointer
printf("List of unique strings:\n");
UStrPrintPool();
finalize();
}