aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Smeding <tom.smeding@gmail.com>2018-01-03 23:10:59 +0100
committerTom Smeding <tom.smeding@gmail.com>2018-01-03 23:10:59 +0100
commit9911f9a73c7dc46069199e52f2bc54082d10366c (patch)
tree914cd4fc2367207271c1c53c7f11a96ed9bbc9b7
Initial
-rw-r--r--.gitignore4
-rw-r--r--Makefile28
-rw-r--r--c.l119
-rw-r--r--c.y248
-rw-r--r--compiler.c279
-rw-r--r--compiler.h7
-rw-r--r--ir.c110
-rw-r--r--ir.h83
-rw-r--r--main.c37
-rw-r--r--node.c140
-rw-r--r--node.h56
-rw-r--r--symbol.c23
-rw-r--r--symbol.h29
-rw-r--r--symtab.c60
-rw-r--r--symtab.h23
-rw-r--r--test/t1.c21
-rw-r--r--type.c79
-rw-r--r--type.h32
18 files changed, 1378 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b711b6b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+as1
+as2
+bison-out
+ccomp
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..e206eea
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,28 @@
+CC = gcc
+LEX = flex
+YAXX = bison
+
+CFLAGS = -Wall -Wextra -std=c11 -g -I. -D_GNU_SOURCE
+LDFLAGS = -lfl
+
+TARGET = ccomp
+
+.PHONY: all clean
+
+all: $(TARGET)
+
+clean:
+ rm -f $(TARGET) *.o
+ rm -rf bison-out
+
+
+bison-out/y.tab.c bison-out/y.tab.h: c.y
+ @mkdir -p bison-out
+ bison -d -b bison-out/y c.y
+
+bison-out/lex.yy.c: c.l
+ @mkdir -p bison-out
+ flex -o bison-out/lex.yy.c c.l
+
+$(TARGET): $(wildcard *.c) $(wildcard *.h) bison-out/y.tab.c bison-out/lex.yy.c
+ $(CC) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@
diff --git a/c.l b/c.l
new file mode 100644
index 0000000..376ed4c
--- /dev/null
+++ b/c.l
@@ -0,0 +1,119 @@
+%{
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include "type.h"
+#include "y.tab.h"
+
+#define DEBUG
+
+int yylex(void);
+
+int lineno=1;
+
+static enum state {
+ SOP,
+ STERM,
+ SPTR
+} state = STERM;
+
+__attribute__((format (printf, 1, 2)))
+void pdebug(const char *format, ...);
+
+#define RETS(val_) {pdebug(#val_ ": %s\n", yytext); yylval.id = strdup(yytext); return val_;}
+#define RET_TYPE(val_, ty_) {pdebug(#val_ ": %s (" #ty_ ")\n", yytext); yylval.type = ty_; return val_;}
+#define RET(val_) {pdebug(#val_ "\n"); return val_;}
+
+%}
+
+%option noinput nounput
+
+LETTER [a-zA-Z]
+DIGIT [0-9]
+ID {LETTER}({LETTER}|{DIGIT}|_)*
+DIGITS {DIGIT}{DIGIT}*
+
+NUM -?{DIGITS}
+
+%x C_COMMENT
+
+
+%%
+
+{NUM} { state = SOP; RETS(NUM); }
+
+int { state = SPTR; RET_TYPE(INT, type_int(16)); }
+void { state = SPTR; RET_TYPE(VOID, type_void()); }
+if { RET(IF); }
+else { RET(ELSE); }
+while { RET(WHILE); }
+for { RET(FOR); }
+return { state = STERM; RET(RETURN); }
+
+{ID} { state = SOP; RETS(ID); }
+
+"//"[^\n]* {}
+
+"/*" { pdebug("C_COMMENT... "); fflush(stdout); BEGIN(C_COMMENT); }
+<C_COMMENT>"*/" { pdebug("done\n"); BEGIN(INITIAL); }
+<C_COMMENT>. {}
+<C_COMMENT>\n { lineno++; }
+
+\n { lineno++; }
+[ \t]+ {}
+
+"+" { state = STERM; RETS(ADDOP); }
+[/%] { state = STERM; RETS(MULOP); }
+"<"=?|">"=?|==|!= { state = STERM; RETS(RELOP); }
+&&|"||" { state = STERM; RETS(BOOLOP); }
+= { state = STERM; RET(ASSIGN); }
+"!" { state = STERM; RET(NOT); }
+
+"-" {
+ switch (state) {
+ case SOP: state = STERM; RETS(ADDOP);
+ case STERM: state = STERM; RETS(NEGATE);
+ default: fprintf(stderr, "Unexpected '-'\n"); exit(1);
+ }
+ }
+
+"*" {
+ switch (state) {
+ case SOP: state = STERM; RETS(MULOP);
+ case STERM: state = STERM; RET(DEREF);
+ case SPTR: state = SPTR; RET(PTR);
+ default: fprintf(stderr, "Unexpected '*'\n"); exit(1);
+ }
+ }
+
+"&" {
+ switch (state) {
+ case SOP: fprintf(stderr, "Bit-and not implemented\n"); exit(1);
+ case STERM: state = STERM; RET(ADDROF);
+ default: fprintf(stderr, "Unexpected '&'\n"); exit(1);
+ }
+ }
+
+")" { state = SOP; pdebug("')'\n"); return ')'; }
+
+. { pdebug(".: %c\n", yytext[0]); return yytext[0]; }
+
+
+%%
+
+
+void pdebug(const char *format, ...) {
+ (void)format;
+#ifdef DEBUG
+ va_list ap;
+ va_start(ap, format);
+ vfprintf(stderr, format, ap);
+ va_end(ap);
+#endif
+}
+
+
+/* vim:et:ts=4:sw=4
+*/
diff --git a/c.y b/c.y
new file mode 100644
index 0000000..5aafe26
--- /dev/null
+++ b/c.y
@@ -0,0 +1,248 @@
+%{
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include "node.h"
+
+static void yyerror(const char*);
+
+int yylex(void);
+
+static enum operator addop_to_oper(const char *op);
+static enum operator mulop_to_oper(const char *op);
+static enum operator relop_to_oper(const char *op);
+static enum operator boolop_to_oper(const char *op);
+
+extern int lineno;
+
+struct node *root_node;
+
+%}
+
+%start start
+
+%token ID NUM INT VOID IF ELSE WHILE FOR RETURN PTR
+%token DEREF ADDROF NEGATE NOT ADDOP MULOP RELOP BOOLOP ASSIGN
+
+%left ASSIGN
+%left BOOLOP
+%left RELOP
+%left ADDOP
+%left MULOP
+%left NOT NEGATE DEREF ADDROF
+
+%type <id> ID NUM ADDOP MULOP RELOP BOOLOP
+%type <node> toplevel toplevel_decl var_decl func_decl parameter_list parameter block
+%type <node> statement statement_list open_statement matched_statement other_statement
+%type <node> expression atom_expr expression_list
+%type <type> type INT VOID
+
+%union {
+ char *id;
+ struct node *node;
+ struct type *type;
+}
+
+%destructor { free($$); } <id>
+%destructor { node_delete_recursive($$); } <node>
+
+%%
+
+start: toplevel {
+ root_node = $1;
+ } ;
+
+toplevel: toplevel_decl toplevel {
+ $$ = node_make_2(N_LIST, $1, $2);
+ }
+ | toplevel_decl {
+ $$ = $1;
+ } ;
+
+toplevel_decl: var_decl | func_decl ;
+
+var_decl: type ID ASSIGN expression ';' {
+ $$ = node_make_1(N_VAR_DECL_INIT, $4);
+ $$->rtype = $1;
+ $$->name = $2;
+ }
+ | type ID ';' {
+ $$ = node_make_0(N_VAR_DECL);
+ $$->rtype = $1;
+ $$->name = $2;
+ } ;
+
+func_decl: type ID '(' parameter_list ')' block {
+ $$ = node_make_2(N_FUNC_DECL, $4, $6);
+ $$->rtype = $1;
+ $$->name = $2;
+ } ;
+
+type: type PTR { $$ = type_ptr($1); }
+ | INT { $$ = type_int(16); }
+ | VOID { $$ = type_void(); } ;
+
+parameter_list: parameter {
+ $$ = node_make_2(N_LIST, $1, node_make_0(N_LIST_END));
+ }
+ | parameter ',' parameter_list {
+ $$ = node_make_2(N_LIST, $1, $3);
+ } ;
+
+parameter: type ID {
+ $$ = node_make_0(N_VAR_DECL);
+ $$->rtype = $1;
+ $$->name = $2;
+ } ;
+
+block: '{' statement_list '}' {
+ $$ = node_make_1(N_BLOCK, $2);
+ } ;
+
+statement_list: statement {
+ $$ = node_make_2(N_LIST, $1, node_make_0(N_LIST_END));
+ }
+ | statement statement_list {
+ $$ = node_make_2(N_LIST, $1, $2);
+ } ;
+
+statement: open_statement | matched_statement ;
+
+open_statement: IF '(' expression ')' statement {
+ $$ = node_make_3(N_IF, $3, $5, node_make_0(N_LIST_END));
+ }
+ | IF '(' expression ')' matched_statement ELSE open_statement {
+ $$ = node_make_3(N_IF, $3, $5, $7);
+ }
+ | WHILE '(' expression ')' open_statement {
+ $$ = node_make_2(N_WHILE, $3, $5);
+ }
+ /*| FOR '(' statement ';' expression ';' statement ')' open_statement {
+ $$ = node_make_1(N_BLOCK,
+ node_make_list(
+ $3, node_make_2(N_WHILE, $5, node_make_list($9, $7))));
+ }*/ ;
+
+matched_statement:
+ IF '(' expression ')' matched_statement ELSE matched_statement {
+ $$ = node_make_3(N_IF, $3, $5, $7);
+ }
+ | WHILE '(' expression ')' matched_statement {
+ $$ = node_make_2(N_WHILE, $3, $5);
+ }
+ /*| FOR '(' statement ';' expression ';' statement ')' matched_statement {
+ $$ = node_make_1(N_BLOCK,
+ node_make_list(
+ $3, node_make_2(N_WHILE, $5, node_make_list($9, $7))));
+ }*/
+ | other_statement ;
+
+other_statement:
+ var_decl | expression ';' | block
+ | RETURN expression ';' {
+ $$ = node_make_1(N_RETURN, $2);
+ } ;
+
+expression: atom_expr
+ | NOT expression { $$ = node_make_1(N_UNOP, $2); $$->oper = OP_NOT; }
+ | NEGATE expression { $$ = node_make_1(N_UNOP, $2); $$->oper = OP_NEG; }
+ | DEREF expression { $$ = node_make_1(N_UNOP, $2); $$->oper = OP_DEREF; }
+ | ADDROF expression { $$ = node_make_1(N_UNOP, $2); $$->oper = OP_ADDROF; }
+ | expression ADDOP expression {
+ $$ = node_make_2(N_BINOP, $1, $3);
+ $$->oper = addop_to_oper($2);
+ free($2);
+ }
+ | expression MULOP expression {
+ $$ = node_make_2(N_BINOP, $1, $3);
+ $$->oper = mulop_to_oper($2);
+ free($2);
+ }
+ | expression RELOP expression {
+ $$ = node_make_2(N_BINOP, $1, $3);
+ $$->oper = relop_to_oper($2);
+ free($2);
+ }
+ | expression BOOLOP expression {
+ $$ = node_make_2(N_BINOP, $1, $3);
+ $$->oper = boolop_to_oper($2);
+ free($2);
+ }
+ | atom_expr ASSIGN expression {
+ $$ = node_make_2(N_BINOP, $1, $3);
+ $$->oper = OP_ASSIGN;
+ } ;
+
+atom_expr: NUM {
+ $$ = node_make_0(N_NUM);
+ $$->value = strtol($1, NULL, 10);
+ free($1);
+ }
+ | ID {
+ $$ = node_make_0(N_VAR);
+ $$->name = $1;
+ }
+ | '(' expression ')' {
+ $$ = $2;
+ }
+ | ID '(' expression_list ')' {
+ $$ = node_make_1(N_CALL, $3);
+ $$->name = $1;
+ }
+ | atom_expr '[' expression ']' {
+ $$ = node_make_1(N_UNOP, node_make_2(N_BINOP, $1, $3));
+ $$->child2->oper = OP_ADD;
+ $$->oper = OP_DEREF;
+ } ;
+
+expression_list: expression {
+ $$ = node_make_2(N_LIST, $1, node_make_0(N_LIST_END));
+ }
+ | expression ',' expression_list {
+ $$ = node_make_2(N_LIST, $1, $3);
+ } ;
+
+
+%%
+
+
+static void yyerror(const char *s) {
+ fprintf(stderr, "Parse error: %s\n", s);
+ exit(1);
+}
+
+static enum operator addop_to_oper(const char *op) {
+ if (strcmp(op, "+") == 0) return OP_ADD;
+ else if (strcmp(op, "-") == 0) return OP_SUB;
+ else assert(false);
+}
+
+static enum operator mulop_to_oper(const char *op) {
+ if (strcmp(op, "*") == 0) return OP_MUL;
+ else if (strcmp(op, "/") == 0) return OP_DIV;
+ else if (strcmp(op, "%") == 0) return OP_MOD;
+ else assert(false);
+}
+
+static enum operator relop_to_oper(const char *op) {
+ if (strcmp(op, "==") == 0) return OP_EQ;
+ else if (strcmp(op, "!=") == 0) return OP_NEQ;
+ else if (strcmp(op, "<") == 0) return OP_LT;
+ else if (strcmp(op, ">") == 0) return OP_GT;
+ else if (strcmp(op, "<=") == 0) return OP_LEQ;
+ else if (strcmp(op, ">=") == 0) return OP_GEQ;
+ else assert(false);
+}
+
+static enum operator boolop_to_oper(const char *op) {
+ if (strcmp(op, "&&") == 0) return OP_AND;
+ else if (strcmp(op, "||") == 0) return OP_OR;
+ else assert(false);
+}
+
+
+/* vim:et:ts=4:sw=4
+*/
diff --git a/compiler.c b/compiler.c
new file mode 100644
index 0000000..42ad25c
--- /dev/null
+++ b/compiler.c
@@ -0,0 +1,279 @@
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include "compiler.h"
+#include "symtab.h"
+
+
+static void write_func_params(struct pair_tn *params, struct node *list) {
+ if (list->type == N_LIST_END) return;
+ assert(list->child1->type == N_VAR_DECL);
+ params[0].type = list->child1->rtype;
+ params[0].name = strdup(list->child1->name);
+ write_func_params(params + 1, list->child2);
+}
+
+static bool oper_is_basic_arith(enum operator oper) {
+ return oper == OP_ADD || oper == OP_SUB || oper == OP_MUL || oper == OP_DIV || oper == OP_MOD;
+}
+
+static bool oper_is_comparison(enum operator oper) {
+ return oper == OP_EQ || oper == OP_NEQ ||
+ oper == OP_LT || oper == OP_GT ||
+ oper == OP_LEQ || oper == OP_GEQ;
+}
+
+static struct ref compile_expr(struct ir *ir, struct symtab *symtab, struct node *node);
+
+static void compile_boolexpr(
+ struct ir *ir, struct symtab *symtab, struct node *node, char *truedest, char *falsedest) {
+
+ if (node->type == N_BINOP && oper_is_comparison(node->oper)) {
+ struct ref r1 = compile_expr(ir, symtab, node->child1);
+ struct ref r2 = compile_expr(ir, symtab, node->child2);
+ ir_append(ir, irins_make_12(INS_CMP, r1, r2));
+ enum condcode condcode;
+ switch (node->oper) {
+ case OP_EQ: condcode = CCNZ; break;
+ case OP_NEQ: condcode = CCZ; break;
+ case OP_LT: condcode = CCGE; break;
+ case OP_GT: condcode = CCLE; break;
+ case OP_LEQ: condcode = CCG; break;
+ case OP_GEQ: condcode = CCL; break;
+ default: assert(false);
+ }
+ ir_append(ir, irins_make_jcc(falsedest, condcode));
+ ir_append(ir, irins_make_name(INS_JMP, truedest));
+ } else if (node->type == N_BINOP && node->oper == OP_AND) {
+ char *try2lbl = gen_label_name();
+ compile_boolexpr(ir, symtab, node->child1, try2lbl, falsedest);
+ ir_append(ir, irins_make_name(INS_LBL, try2lbl));
+ compile_boolexpr(ir, symtab, node->child2, truedest, falsedest);
+ } else if (node->type == N_BINOP && node->oper == OP_OR) {
+ char *try2lbl = gen_label_name();
+ compile_boolexpr(ir, symtab, node->child1, truedest, try2lbl);
+ ir_append(ir, irins_make_name(INS_LBL, try2lbl));
+ compile_boolexpr(ir, symtab, node->child2, truedest, falsedest);
+ } else if (node->type == N_UNOP && node->oper == OP_NOT) {
+ compile_boolexpr(ir, symtab, node->child1, falsedest, truedest);
+ } else {
+ struct ref ref = compile_expr(ir, symtab, node);
+ ir_append(ir, irins_make_12(INS_TEST, ref, ref));
+ ir_append(ir, irins_make_jcc(falsedest, CCZ));
+ ir_append(ir, irins_make_name(INS_JMP, truedest));
+ }
+}
+
+static struct ref compile_expr(struct ir *ir, struct symtab *symtab, struct node *node) {
+ switch (node->type) {
+ case N_NUM: {
+ struct ref ref = ref_next_register();
+ ir_append(ir, irins_make_01(INS_MOV, ref, ref_imm(node->value)));
+ return ref;
+ }
+
+ case N_VAR: {
+ struct symbol *sym = symtab_find(symtab, node->name);
+ if (!sym) {
+ fprintf(stderr, "Use of undeclared variable '%s'\n", node->name);
+ exit(1);
+ }
+ return sym->ref;
+ }
+
+ case N_BINOP: {
+ if (oper_is_basic_arith(node->oper)) {
+ struct ref r0 = ref_next_register();
+ struct ref r1 = compile_expr(ir, symtab, node->child1);
+ struct ref r2 = compile_expr(ir, symtab, node->child2);
+ enum instype instype;
+ switch (node->oper) {
+ case OP_ADD: instype = INS_ADD; break;
+ case OP_SUB: instype = INS_SUB; break;
+ case OP_MUL: instype = INS_MUL; break;
+ case OP_DIV: instype = INS_DIV; break;
+ case OP_MOD: instype = INS_MOD; break;
+ default: assert(false);
+ }
+ ir_append(ir, irins_make_012(instype, r0, r1, r2));
+ return r0;
+ } else if (oper_is_comparison(node->oper) || node->oper == OP_AND || node->oper == OP_OR) {
+ char *truelbl = gen_label_name();
+ char *falselbl = gen_label_name();
+ char *afterlbl = gen_label_name();
+ compile_boolexpr(ir, symtab, node, truelbl, falselbl);
+ struct ref cval = ref_next_register();
+ ir_append(ir, irins_make_name(INS_LBL, truelbl));
+ ir_append(ir, irins_make_01(INS_MOV, cval, ref_imm(1)));
+ ir_append(ir, irins_make_name(INS_JMP, afterlbl));
+ ir_append(ir, irins_make_name(INS_LBL, falselbl));
+ ir_append(ir, irins_make_01(INS_MOV, cval, ref_imm(0)));
+ ir_append(ir, irins_make_name(INS_LBL, afterlbl));
+ return cval;
+ } else if (node->oper == OP_ASSIGN) {
+ if (node->child1->type == N_VAR ||
+ (node->child1->type == N_UNOP && node->child1->oper == OP_DEREF)) {
+ struct ref r0 = compile_expr(ir, symtab, node->child1);
+ struct ref r1 = compile_expr(ir, symtab, node->child2);
+ ir_append(ir, irins_make_01(INS_MOV, r0, r1));
+ return r0;
+ } else {
+ fprintf(stderr, "Invalid left-hand side in assignment.\n");
+ exit(1);
+ }
+ } else {
+ assert(false);
+ }
+ }
+
+ case N_UNOP: {
+ switch (node->oper) {
+ case OP_NEG: {
+ struct ref r0 = ref_next_register();
+ struct ref r1 = compile_expr(ir, symtab, node->child1);
+ ir_append(ir, irins_make_01(INS_NEG, r0, r1));
+ return r0;
+ }
+
+ case OP_NOT: {
+ struct ref r0 = ref_next_register();
+ struct ref r1 = compile_expr(ir, symtab, node->child1);
+ ir_append(ir, irins_make_01(INS_NOT, r0, r1));
+ return r0;
+ }
+
+ case OP_DEREF: {
+ struct ref addr = compile_expr(ir, symtab, node->child1);
+ struct ref r1;
+ switch (addr.type) {
+ case REF_REG: r1 = ref_mem(addr.reg, 0, false); break;
+ case REF_IMM: r1 = ref_mem(-1, addr.imm, false); break;
+ case REF_MEM: {
+ r1 = ref_next_register();
+ ir_append(ir, irins_make_01(INS_MOV, r1, addr));
+ r1 = ref_mem(r1.reg, 0, false); break;
+ break;
+ }
+ }
+ struct ref r0 = ref_next_register();
+ ir_append(ir, irins_make_01(INS_MOV, r0, r1));
+ return r0;
+ }
+
+ case OP_ADDROF:
+ fprintf(stderr, "OP_ADDROF not implemented\n");
+ exit(1);
+
+ default:
+ assert(false);
+ }
+ }
+
+ case N_CALL: {
+ fprintf(stderr, "function calls not implemented\n");
+ exit(1);
+ /*struct symbol *sym = symtab_find(symtab, node->name);
+ if (!sym) {
+ fprintf(stderr, "Call of undeclared function '%s'\n", node->name);
+ exit(1);
+ }
+ ;*/
+ }
+
+ default:
+ fprintf(stderr, "Invalid node type %d in expression\n", node->type);
+ exit(1);
+ }
+}
+
+static void compile_node(struct ir *ir, struct symtab *symtab, struct node *node) {
+ switch (node->type) {
+ case N_LIST:
+ compile_node(ir, symtab, node->child1);
+ compile_node(ir, symtab, node->child2);
+ break;
+
+ case N_LIST_END:
+ break;
+
+ case N_BLOCK: {
+ symtab = symtab_sub(symtab);
+ compile_node(ir, symtab, node->child1);
+ symtab = symtab_delete_get_parent(symtab);
+ break;
+ }
+
+ case N_VAR_DECL_INIT:
+ case N_VAR_DECL: {
+ if (symtab_find_local(symtab, node->name)) {
+ fprintf(stderr, "Redeclaration of variable '%s'\n", node->name);
+ exit(1);
+ }
+ struct symbol *sym = symbol_make_var(node->name, node->rtype);
+ sym->ref = ref_next_register();
+ symtab_insert(symtab, sym);
+ ir_append(ir, irins_make_01(INS_MOV, sym->ref, ref_imm(node->value)));
+ break;
+ }
+
+ case N_FUNC_DECL: {
+ if (symtab_find(symtab, node->name)) {
+ fprintf(stderr, "Redeclaration of function '%s'\n", node->name);
+ exit(1);
+ }
+ int numparams = node_list_length(node->child1);
+ struct symbol *sym = symbol_make_func(node->name, node->rtype, numparams);
+ write_func_params(sym->params, node->child1);
+ symtab_insert(symtab_root(symtab), sym);
+ ir_append(ir, irins_make_name(INS_LBL, strdup(node->name)));
+ symtab = symtab_sub(symtab);
+ compile_node(ir, symtab, node->child2);
+ symtab = symtab_delete_get_parent(symtab);
+ break;
+ }
+
+ case N_IF: {
+ char *elselbl = gen_label_name();
+ char *afterlbl = gen_label_name();
+ struct ref condref = compile_expr(ir, symtab, node->child1);
+ ir_append(ir, irins_make_12(INS_TEST, condref, condref));
+ ir_append(ir, irins_make_jcc(elselbl, CCZ));
+ compile_node(ir, symtab, node->child2);
+ ir_append(ir, irins_make_name(INS_JMP, afterlbl));
+ ir_append(ir, irins_make_name(INS_LBL, elselbl));
+ compile_node(ir, symtab, node->child3);
+ ir_append(ir, irins_make_name(INS_LBL, afterlbl));
+ break;
+ }
+
+ case N_WHILE: {
+ char *startlbl = gen_label_name();
+ char *afterlbl = gen_label_name();
+ ir_append(ir, irins_make_name(INS_LBL, startlbl));
+ struct ref condref = compile_expr(ir, symtab, node->child1);
+ ir_append(ir, irins_make_12(INS_TEST, condref, condref));
+ ir_append(ir, irins_make_jcc(afterlbl, CCZ));
+ compile_node(ir, symtab, node->child2);
+ ir_append(ir, irins_make_name(INS_JMP, startlbl));
+ ir_append(ir, irins_make_name(INS_LBL, afterlbl));
+ break;
+ }
+
+ case N_RETURN: {
+ struct ref ref = compile_expr(ir, symtab, node->child1);
+ ir_append(ir, irins_make_1(INS_RETV, ref));
+ break;
+ }
+
+ default: // hmm, maybe it's an expression statement?
+ compile_expr(ir, symtab, node);
+ break;
+ }
+}
+
+struct ir* compile(struct node *node) {
+ struct ir *ir = ir_make();
+ struct symtab *symtab = symtab_make();
+ compile_node(ir, symtab, node);
+ return ir;
+}
diff --git a/compiler.h b/compiler.h
new file mode 100644
index 0000000..7465c9e
--- /dev/null
+++ b/compiler.h
@@ -0,0 +1,7 @@
+#pragma once
+
+#include "ir.h"
+#include "node.h"
+
+
+struct ir* compile(struct node *node);
diff --git a/ir.c b/ir.c
new file mode 100644
index 0000000..9d28e2f
--- /dev/null
+++ b/ir.c
@@ -0,0 +1,110 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include "ir.h"
+
+
+struct ir* ir_make(void) {
+ struct ir *ir = malloc(sizeof(struct ir));
+ ir->cap = 32;
+ ir->len = 0;
+ ir->inss = malloc(ir->cap * sizeof(struct irins*));
+ ir->globspace = 0;
+ return ir;
+}
+
+void ir_delete(struct ir *ir) {
+ for (int i = 0; i < ir->len; i++) {
+ irins_delete(ir->inss[i]);
+ }
+ free(ir->inss);
+ free(ir);
+}
+
+struct ref ir_reserve_global(struct ir *ir, int size) {
+ int offset = ir->globspace;
+ ir->globspace += size;
+ return ref_mem(-1, offset, true);
+}
+
+void ir_append(struct ir *ir, struct irins *ins) {
+ if (ir->len == ir->cap) {
+ ir->cap *= 2;
+ ir->inss = realloc(ir->inss, ir->cap * sizeof(struct irins*));
+ }
+ ir->inss[ir->len++] = ins;
+}
+
+struct irins* irins_make(enum instype type) {
+ struct irins *ins = malloc(sizeof(struct irins));
+ ins->type = type;
+ ins->name = NULL;
+ return ins;
+}
+
+struct irins* irins_make_name(enum instype type, char *name) {
+ struct irins *ins = irins_make(type);
+ ins->name = name;
+ return ins;
+}
+
+struct irins* irins_make_01(enum instype type, struct ref r0, struct ref r1) {
+ struct irins *ins = irins_make(type);
+ ins->r0 = r0;
+ ins->r1 = r1;
+ return ins;
+}
+
+struct irins* irins_make_012(enum instype type, struct ref r0, struct ref r1, struct ref r2) {
+ struct irins *ins = irins_make_01(type, r0, r1);
+ ins->r2 = r2;
+ return ins;
+}
+
+struct irins* irins_make_1(enum instype type, struct ref r1) {
+ struct irins *ins = irins_make(type);
+ ins->r1 = r1;
+ return ins;
+}
+
+struct irins* irins_make_12(enum instype type, struct ref r1, struct ref r2) {
+ struct irins *ins = irins_make(type);
+ ins->r1 = r1;
+ ins->r2 = r2;
+ return ins;
+}
+
+struct irins* irins_make_jcc(char *name, enum condcode condcode) {
+ struct irins *ins = irins_make(INS_JCC);
+ ins->name = name;
+ ins->condcode = condcode;
+ return ins;
+}
+
+void irins_delete(struct irins *ins) {
+ if (ins->name) free(ins->name);
+ free(ins);
+}
+
+char* gen_label_name(void) {
+ static int next = 0;
+ char *buf = malloc(16);
+ sprintf(buf, "L%d", next++);
+ return buf;
+}
+
+struct ref ref_reg(int reg) {
+ return (struct ref){REF_REG, reg, 0, false, 0};
+}
+
+struct ref ref_mem(int reg, int offset, bool rel_heap) {
+ return (struct ref){REF_MEM, reg, offset, rel_heap, 0};
+}
+
+struct ref ref_imm(int imm) {
+ return (struct ref){REF_IMM, -1, 0, false, imm};
+}
+
+struct ref ref_next_register(void) {
+ static int next = 0;
+ return ref_reg(next++);
+}
diff --git a/ir.h b/ir.h
new file mode 100644
index 0000000..b359d00
--- /dev/null
+++ b/ir.h
@@ -0,0 +1,83 @@
+#pragma once
+
+#include <stdbool.h>
+
+
+// IR uses an unlimited number of registers. This will later be fixed by a regalloc.
+
+enum instype {
+ INS_ADD, // r0 = r1 + r2
+ INS_SUB, // r0 = r1 - r2
+ INS_MUL, // r0 = r1 * r2
+ INS_DIV, // r0 = r1 / r2
+ INS_MOD, // r0 = r1 % r2
+ INS_NEG, // r0 = -r1
+ INS_NOT, // r0 = !r1
+ INS_TEST, // r1 & r2 (flags)
+ INS_PUSH, // push r1
+ INS_POP, // r0 = pop
+ INS_CMP, // r1 - r2 (flags)
+ INS_LBL, // label name
+ INS_JMP, // jmp name
+ INS_JCC, // if condcode, jmp name
+ INS_CALL, // call name
+ INS_RET, // return
+ INS_RETV, // return r1
+ INS_MOV, // r0 = r1
+};
+
+enum condcode {
+ CCZ, CCNZ, CCL, CCG, CCLE, CCGE
+};
+
+enum reftype {
+ REF_REG, // reg
+ REF_MEM, // [reg + offset + (rel_heap ? heap_base : 0)]
+ REF_IMM, // imm
+};
+
+struct ref {
+ enum reftype type;
+ int reg; // if -1 in a REF_MEM, unused
+ int offset;
+ bool rel_heap; // whether the heap base address should be added to the offset
+ int imm;
+};
+
+struct irins {
+ enum instype type;
+ struct ref r0, r1, r2;
+ char *name;
+ enum condcode condcode;
+};
+
+struct ir {
+ int cap, len;
+ struct irins **inss;
+ int globspace;
+};
+
+
+struct ir* ir_make(void);
+void ir_delete(struct ir *ir);
+
+// returns offset
+struct ref ir_reserve_global(struct ir *ir, int size);
+
+void ir_append(struct ir *ir, struct irins *ins);
+
+struct irins* irins_make(enum instype type);
+struct irins* irins_make_name(enum instype type, char *name);
+struct irins* irins_make_01(enum instype type, struct ref r0, struct ref r1);
+struct irins* irins_make_012(enum instype type, struct ref r0, struct ref r1, struct ref r2);
+struct irins* irins_make_1(enum instype type, struct ref r1);
+struct irins* irins_make_12(enum instype type, struct ref r1, struct ref r2);
+struct irins* irins_make_jcc(char *name, enum condcode condcode);
+void irins_delete(struct irins *ins);
+
+char* gen_label_name(void);
+
+struct ref ref_reg(int reg);
+struct ref ref_mem(int reg, int offset, bool rel_heap);
+struct ref ref_imm(int imm);
+struct ref ref_next_register(void);
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..173c19b
--- /dev/null
+++ b/main.c
@@ -0,0 +1,37 @@
+#include <stdio.h>
+#include <stdbool.h>
+#include <assert.h>
+#include "compiler.h"
+#include "node.h"
+
+
+extern FILE *yyin;
+extern int yyparse(void);
+extern struct node *root_node;
+
+
+int main(int argc, char **argv) {
+ if (argc != 2) {
+ fprintf(stderr, "Usage: %s <source.c>\n", argv[0]);
+ return 1;
+ }
+ yyin = fopen(argv[1], "r");
+ if (!yyin) {
+ fprintf(stderr, "Cannot open file '%s'\n", argv[1]);
+ return 1;
+ }
+
+ int result = yyparse();
+ if(!feof(yyin)) {
+ assert(false);
+ }
+
+ node_print(root_node, stdout, 0);
+ printf("\n");
+
+ struct ir *ir = compile(root_node);
+
+ type_cache_cleanup();
+
+ return result;
+}
diff --git a/node.c b/node.c
new file mode 100644
index 0000000..b20c38e
--- /dev/null
+++ b/node.c
@@ -0,0 +1,140 @@
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdarg.h>
+#include <assert.h>
+#include "node.h"
+
+
+struct node* node_make_0(enum node_type type) {
+ struct node *node = malloc(sizeof(struct node));
+ node->type = type;
+ node->child1 = node->child2 = node->child3 = NULL;
+ node->rtype = NULL;
+ node->name = NULL;
+ node->value = 0;
+ return node;
+}
+
+struct node* node_make_1(enum node_type type, struct node *c1) {
+ struct node *node = node_make_0(type);
+ node->child1 = c1;
+ return node;
+}
+
+struct node* node_make_2(enum node_type type, struct node *c1, struct node *c2) {
+ struct node *node = node_make_1(type, c1);
+ node->child2 = c2;
+ return node;
+}
+
+struct node* node_make_3(enum node_type type, struct node *c1, struct node *c2, struct node *c3) {
+ struct node *node = node_make_2(type, c1, c2);
+ node->child3 = c3;
+ return node;
+}
+
+struct node* node_make_list_(struct node *first, ...) {
+ va_list ap;
+ va_start(ap, first);
+ struct node *cur = node_make_0(N_LIST_END);
+ struct node *root = node_make_2(N_LIST, first, cur);
+ struct node *next;
+ while ((next = va_arg(ap, struct node*))) {
+ cur->type = N_LIST;
+ cur->child1 = next;
+ cur->child2 = node_make_0(N_LIST_END);
+ cur = cur->child2;
+ }
+ return root;
+}
+
+int node_list_length(struct node *node) {
+ if (node->type == N_LIST_END) return 0;
+ assert(node->type == N_LIST);
+ return 1 + node_list_length(node->child2);
+}
+
+void node_delete_recursive(struct node *node) {
+ if (node->child1) node_delete_recursive(node->child1);
+ if (node->child2) node_delete_recursive(node->child2);
+ if (node->child3) node_delete_recursive(node->child3);
+ if (node->name) free(node->name);
+ free(node);
+}
+
+static void write_spaces(int num, FILE *f) {
+ for (int i = 0; i < num; i++) fputc(' ', f);
+}
+
+static const char* node_type_string(enum node_type type) {
+ switch (type) {
+ case N_LIST: return "N_LIST"; break;
+ case N_LIST_END: return "N_LIST_END"; break;
+ case N_BLOCK: return "N_BLOCK"; break;
+ case N_VAR_DECL_INIT: return "N_VAR_DECL_INIT"; break;
+ case N_VAR_DECL: return "N_VAR_DECL"; break;
+ case N_FUNC_DECL: return "N_FUNC_DECL"; break;
+ case N_NUM: return "N_NUM"; break;
+ case N_VAR: return "N_VAR"; break;
+ case N_IF: return "N_IF"; break;
+ case N_WHILE: return "N_WHILE"; break;
+ case N_RETURN: return "N_RETURN"; break;
+ case N_BINOP: return "N_BINOP"; break;
+ case N_UNOP: return "N_UNOP"; break;
+ case N_CALL: return "N_CALL"; break;
+ default: assert(false);
+ }
+}
+
+static const char* oper_string(enum operator oper) {
+ switch (oper) {
+ case OP_ADD: return "OP_ADD";
+ case OP_SUB: return "OP_SUB";
+ case OP_MUL: return "OP_MUL";
+ case OP_DIV: return "OP_DIV";
+ case OP_MOD: return "OP_MOD";
+ case OP_EQ: return "OP_EQ";
+ case OP_NEQ: return "OP_NEQ";
+ case OP_LT: return "OP_LT";
+ case OP_GT: return "OP_GT";
+ case OP_LEQ: return "OP_LEQ";
+ case OP_GEQ: return "OP_GEQ";
+ case OP_AND: return "OP_AND";
+ case OP_OR: return "OP_OR";
+ case OP_ASSIGN: return "OP_ASSIGN";
+ case OP_NEG: return "OP_NEG";
+ case OP_NOT: return "OP_NOT";
+ default: assert(false);
+ }
+}
+
+void node_print(const struct node *node, FILE *f, int indent) {
+ const char *t = node_type_string(node->type);
+ fprintf(f, "(%s", t);
+ if (node->rtype) {
+ fprintf(f, "[rtype=");
+ type_print(node->rtype, f);
+ fprintf(f, "]");
+ }
+ if (node->name) fprintf(f, "[name=%s]", node->name);
+ if (node->type == N_NUM) fprintf(f, "[value=%d]", node->value);
+ if (node->type == N_BINOP || node->type == N_UNOP) {
+ fprintf(f, "[oper=%s]", oper_string(node->oper));
+ }
+ if (node->child1) {
+ fprintf(f, "\n");
+ write_spaces(2 * (indent + 1), f);
+ node_print(node->child1, f, indent + 1);
+ }
+ if (node->child2) {
+ fprintf(f, "\n");
+ write_spaces(2 * (indent + 1), f);
+ node_print(node->child2, f, indent + 1);
+ }
+ if (node->child3) {
+ fprintf(f, "\n");
+ write_spaces(2 * (indent + 1), f);
+ node_print(node->child3, f, indent + 1);
+ }
+ fprintf(f, ")");
+}
diff --git a/node.h b/node.h
new file mode 100644
index 0000000..81d0b26
--- /dev/null
+++ b/node.h
@@ -0,0 +1,56 @@
+#pragma once
+
+#include "type.h"
+
+
+enum node_type {
+ N_LIST, // [head] [tail]
+ N_LIST_END, // --
+ N_BLOCK, // [body]
+ N_VAR_DECL_INIT, // rtype name [value]
+ N_VAR_DECL, // rtype name
+ N_FUNC_DECL, // rtype name [params] [body]
+ N_IF, // [cond] [body] [else-body]
+ N_WHILE, // [cond] [body]
+ N_RETURN, // [value]
+ N_NUM, // value
+ N_VAR, // name
+ N_BINOP, // [left] oper [right]
+ N_UNOP, // oper [arg]
+ N_CALL, // name [args]
+};
+
+enum operator {
+ // binary
+ OP_ADD, OP_SUB, OP_MUL, OP_DIV, OP_MOD,
+ OP_EQ, OP_NEQ, OP_LT, OP_GT, OP_LEQ, OP_GEQ,
+ OP_AND, OP_OR,
+ OP_ASSIGN,
+ // unary
+ OP_NEG, OP_NOT, OP_DEREF, OP_ADDROF,
+};
+
+struct node {
+ enum node_type type;
+ struct node *child1, *child2, *child3;
+ struct type *rtype;
+ char *name;
+ int value;
+ enum operator oper;
+};
+
+
+struct node* node_make_0(enum node_type type);
+struct node* node_make_1(enum node_type type, struct node *c1);
+struct node* node_make_2(enum node_type type, struct node *c1, struct node *c2);
+struct node* node_make_3(enum node_type type, struct node *c1, struct node *c2, struct node *c3);
+
+__attribute__((sentinel))
+struct node* node_make_list_(struct node *first, ...);
+#define node_make_list(...) node_make_list_(__VA_ARGS__, NULL)
+
+int node_list_length(struct node *node);
+
+void node_delete_recursive(struct node *node);
+
+void node_print(const struct node *node, FILE *f, int indent);
diff --git a/symbol.c b/symbol.c
new file mode 100644
index 0000000..2f4adb7
--- /dev/null
+++ b/symbol.c
@@ -0,0 +1,23 @@
+#include <stdlib.h>
+#include "symbol.h"
+
+
+struct symbol* symbol_make_var(char *name, struct type *rtype) {
+ struct symbol *sym = malloc(sizeof(struct symbol));
+ sym->name = name;
+ sym->stype = ST_VAR;
+ sym->rtype = rtype;
+ sym->numparams = 0;
+ sym->params = NULL;
+ return sym;
+}
+
+struct symbol* symbol_make_func(char *name, struct type *rtype, int numparams) {
+ struct symbol *sym = malloc(sizeof(struct symbol));
+ sym->name = name;
+ sym->stype = ST_FUNC;
+ sym->rtype = rtype;
+ sym->numparams = numparams;
+ sym->params = malloc(numparams * sizeof(struct pair_tn));
+ return sym;
+}
diff --git a/symbol.h b/symbol.h
new file mode 100644
index 0000000..454e8bc
--- /dev/null
+++ b/symbol.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include "ir.h"
+#include "type.h"
+
+
+enum symbol_type {
+ ST_VAR,
+ ST_FUNC
+};
+
+struct pair_tn {
+ struct type *type;
+ char *name;
+};
+
+struct symbol {
+ char *name;
+ enum symbol_type stype;
+ struct type *rtype;
+ struct ref ref;
+
+ int numparams;
+ struct pair_tn *params;
+};
+
+
+struct symbol* symbol_make_var(char *name, struct type *rtype);
+struct symbol* symbol_make_func(char *name, struct type *rtype, int numparams);
diff --git a/symtab.c b/symtab.c
new file mode 100644
index 0000000..4229a63
--- /dev/null
+++ b/symtab.c
@@ -0,0 +1,60 @@
+#include <stdlib.h>
+#include <string.h>
+#include "symtab.h"
+
+
+struct symtab* symtab_make() {
+ struct symtab *symtab = malloc(sizeof(struct symtab));
+ symtab->parent = NULL;
+ symtab->cap = 16;
+ symtab->len = 0;
+ symtab->syms = malloc(symtab->cap * sizeof(struct symbol*));
+ return symtab;
+}
+
+void symtab_delete_recursive(struct symtab *symtab) {
+ free(symtab->syms);
+ struct symtab *parent = symtab->parent;
+ free(symtab);
+ if (parent) symtab_delete_recursive(parent);
+}
+
+struct symbol* symtab_find(const struct symtab *symtab, const char *name) {
+ struct symbol *sym = symtab_find_local(symtab, name);
+ if (sym) return sym;
+ else if (symtab->parent) return symtab_find(symtab->parent, name);
+ else return NULL;
+}
+
+struct symbol* symtab_find_local(const struct symtab *symtab, const char *name) {
+ for (int i = 0; i < symtab->len; i++) {
+ if (strcmp(symtab->syms[i]->name, name) == 0) return symtab->syms[i];
+ }
+ return NULL;
+}
+
+void symtab_insert(struct symtab *symtab, struct symbol *sym) {
+ if (symtab->len == symtab->cap) {
+ symtab->cap *= 2;
+ symtab->syms = realloc(symtab->syms, symtab->cap * sizeof(struct symbol*));
+ }
+ symtab->syms[symtab->len++] = sym;
+}
+
+struct symtab* symtab_root(struct symtab *symtab) {
+ if (symtab->parent) return symtab_root(symtab->parent);
+ else return symtab;
+}
+
+struct symtab* symtab_sub(struct symtab *symtab) {
+ struct symtab *sub = symtab_make();
+ sub->parent = symtab;
+ return sub;
+}
+
+struct symtab* symtab_delete_get_parent(struct symtab *symtab) {
+ struct symtab *parent = symtab->parent;
+ symtab->parent = NULL;
+ symtab_delete_recursive(symtab);
+ return parent;
+}
diff --git a/symtab.h b/symtab.h
new file mode 100644
index 0000000..18dd8e0
--- /dev/null
+++ b/symtab.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include "symbol.h"
+#include "type.h"
+
+
+struct symtab {
+ struct symtab *parent;
+ int cap,len;
+ struct symbol **syms;
+};
+
+
+struct symtab* symtab_make();
+void symtab_delete_recursive(struct symtab *symtab);
+
+struct symbol* symtab_find(const struct symtab *symtab, const char *name);
+struct symbol* symtab_find_local(const struct symtab *symtab, const char *name);
+void symtab_insert(struct symtab *symtab, struct symbol *sym);
+
+struct symtab* symtab_root(struct symtab *symtab);
+struct symtab* symtab_sub(struct symtab *symtab);
+struct symtab* symtab_delete_get_parent(struct symtab *symtab);
diff --git a/test/t1.c b/test/t1.c
new file mode 100644
index 0000000..229d5e9
--- /dev/null
+++ b/test/t1.c
@@ -0,0 +1,21 @@
+int glob;
+
+int fibo(int n) {
+ if (n <= 0) return 0;
+ int a = 0;
+ int b = 1;
+ int i = 1;
+ while (i < n) {
+ int c = a + b;
+ a = b;
+ b = c;
+ i = i + 1;
+ }
+ return b;
+}
+
+int main(int argc, int **argv) {
+ int a = 1;
+ int b = 2 + a * 3;
+ holo_dec(a);
+}
diff --git a/type.c b/type.c
new file mode 100644
index 0000000..6dd4dd0
--- /dev/null
+++ b/type.c
@@ -0,0 +1,79 @@
+#include <stdlib.h>
+#include <stdbool.h>
+#include <assert.h>
+#include "type.h"
+
+
+static struct {
+ int cap, num;
+ struct type **types;
+} cache;
+
+static struct type *voidtype;
+
+static void init_cache(void) {
+ if (voidtype) return;
+ voidtype = malloc(sizeof(struct type));
+ voidtype->tag = T_VOID;
+ cache.cap = 8;
+ cache.num = 0;
+ cache.types = malloc(cache.cap * sizeof(struct type*));
+}
+
+void type_cache_cleanup(void) {
+ for (int i = 0; i < cache.num; i++) {
+ free(cache.types[i]);
+ }
+ free(cache.types);
+ free(voidtype);
+}
+
+static void cache_add(struct type *type) {
+ if (cache.num == cache.cap) {
+ cache.cap *= 2;
+ cache.types = realloc(cache.types, cache.cap * sizeof(struct type*));
+ }
+ cache.types[cache.num++] = type;
+}
+
+
+struct type* type_int(int size) {
+ init_cache();
+ for (int i = 0; i < cache.num; i++) {
+ if (cache.types[i]->tag == T_INT && cache.types[i]->size == size) {
+ return cache.types[i];
+ }
+ }
+ struct type *type = malloc(sizeof(struct type));
+ type->tag = T_INT;
+ type->size = size;
+ cache_add(type);
+ return type;
+}
+
+struct type* type_void(void) {
+ return voidtype;
+}
+
+struct type* type_ptr(struct type *target) {
+ init_cache();
+ for (int i = 0; i < cache.num; i++) {
+ if (cache.types[i]->tag == T_PTR && cache.types[i]->target == target) {
+ return cache.types[i];
+ }
+ }
+ struct type *type = malloc(sizeof(struct type));
+ type->tag = T_PTR;
+ type->target = target;
+ cache_add(type);
+ return type;
+}
+
+void type_print(struct type *type, FILE *f) {
+ switch (type->tag) {
+ case T_INT: fprintf(f, "i%d", type->size); break;
+ case T_VOID: fprintf(f, "void"); break;
+ case T_PTR: type_print(type->target, f); fprintf(f, "*"); break;
+ default: assert(false);
+ }
+}
diff --git a/type.h b/type.h
new file mode 100644
index 0000000..6e6e0f6
--- /dev/null
+++ b/type.h
@@ -0,0 +1,32 @@
+#pragma once
+
+#include <stdio.h>
+
+
+enum type_tag {
+ T_INT,
+ T_VOID,
+ T_PTR
+};
+
+struct type {
+ enum type_tag tag;
+ union {
+ struct { // T_INT
+ int size;
+ };
+ struct {}; // T_VOID
+ struct { // T_PTR
+ struct type *target;
+ };
+ };
+};
+
+
+void type_cache_cleanup(void);
+
+struct type* type_int(int size);
+struct type* type_void(void);
+struct type* type_ptr(struct type *type);
+
+void type_print(struct type *type, FILE *f);