diff options
author | Tom Smeding <tom.smeding@gmail.com> | 2018-01-03 23:10:59 +0100 |
---|---|---|
committer | Tom Smeding <tom.smeding@gmail.com> | 2018-01-03 23:10:59 +0100 |
commit | 9911f9a73c7dc46069199e52f2bc54082d10366c (patch) | |
tree | 914cd4fc2367207271c1c53c7f11a96ed9bbc9b7 |
Initial
-rw-r--r-- | .gitignore | 4 | ||||
-rw-r--r-- | Makefile | 28 | ||||
-rw-r--r-- | c.l | 119 | ||||
-rw-r--r-- | c.y | 248 | ||||
-rw-r--r-- | compiler.c | 279 | ||||
-rw-r--r-- | compiler.h | 7 | ||||
-rw-r--r-- | ir.c | 110 | ||||
-rw-r--r-- | ir.h | 83 | ||||
-rw-r--r-- | main.c | 37 | ||||
-rw-r--r-- | node.c | 140 | ||||
-rw-r--r-- | node.h | 56 | ||||
-rw-r--r-- | symbol.c | 23 | ||||
-rw-r--r-- | symbol.h | 29 | ||||
-rw-r--r-- | symtab.c | 60 | ||||
-rw-r--r-- | symtab.h | 23 | ||||
-rw-r--r-- | test/t1.c | 21 | ||||
-rw-r--r-- | type.c | 79 | ||||
-rw-r--r-- | type.h | 32 |
18 files changed, 1378 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b711b6b --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +as1 +as2 +bison-out +ccomp diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e206eea --- /dev/null +++ b/Makefile @@ -0,0 +1,28 @@ +CC = gcc +LEX = flex +YAXX = bison + +CFLAGS = -Wall -Wextra -std=c11 -g -I. -D_GNU_SOURCE +LDFLAGS = -lfl + +TARGET = ccomp + +.PHONY: all clean + +all: $(TARGET) + +clean: + rm -f $(TARGET) *.o + rm -rf bison-out + + +bison-out/y.tab.c bison-out/y.tab.h: c.y + @mkdir -p bison-out + bison -d -b bison-out/y c.y + +bison-out/lex.yy.c: c.l + @mkdir -p bison-out + flex -o bison-out/lex.yy.c c.l + +$(TARGET): $(wildcard *.c) $(wildcard *.h) bison-out/y.tab.c bison-out/lex.yy.c + $(CC) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@ @@ -0,0 +1,119 @@ +%{ + +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include "type.h" +#include "y.tab.h" + +#define DEBUG + +int yylex(void); + +int lineno=1; + +static enum state { + SOP, + STERM, + SPTR +} state = STERM; + +__attribute__((format (printf, 1, 2))) +void pdebug(const char *format, ...); + +#define RETS(val_) {pdebug(#val_ ": %s\n", yytext); yylval.id = strdup(yytext); return val_;} +#define RET_TYPE(val_, ty_) {pdebug(#val_ ": %s (" #ty_ ")\n", yytext); yylval.type = ty_; return val_;} +#define RET(val_) {pdebug(#val_ "\n"); return val_;} + +%} + +%option noinput nounput + +LETTER [a-zA-Z] +DIGIT [0-9] +ID {LETTER}({LETTER}|{DIGIT}|_)* +DIGITS {DIGIT}{DIGIT}* + +NUM -?{DIGITS} + +%x C_COMMENT + + +%% + +{NUM} { state = SOP; RETS(NUM); } + +int { state = SPTR; RET_TYPE(INT, type_int(16)); } +void { state = SPTR; RET_TYPE(VOID, type_void()); } +if { RET(IF); } +else { RET(ELSE); } +while { RET(WHILE); } +for { RET(FOR); } +return { state = STERM; RET(RETURN); } + +{ID} { state = SOP; RETS(ID); } + +"//"[^\n]* {} + +"/*" { pdebug("C_COMMENT... "); fflush(stdout); BEGIN(C_COMMENT); } +<C_COMMENT>"*/" { pdebug("done\n"); BEGIN(INITIAL); } +<C_COMMENT>. {} +<C_COMMENT>\n { lineno++; } + +\n { lineno++; } +[ \t]+ {} + +"+" { state = STERM; RETS(ADDOP); } +[/%] { state = STERM; RETS(MULOP); } +"<"=?|">"=?|==|!= { state = STERM; RETS(RELOP); } +&&|"||" { state = STERM; RETS(BOOLOP); } += { state = STERM; RET(ASSIGN); } +"!" { state = STERM; RET(NOT); } + +"-" { + switch (state) { + case SOP: state = STERM; RETS(ADDOP); + case STERM: state = STERM; RETS(NEGATE); + default: fprintf(stderr, "Unexpected '-'\n"); exit(1); + } + } + +"*" { + switch (state) { + case SOP: state = STERM; RETS(MULOP); + case STERM: state = STERM; RET(DEREF); + case SPTR: state = SPTR; RET(PTR); + default: fprintf(stderr, "Unexpected '*'\n"); exit(1); + } + } + +"&" { + switch (state) { + case SOP: fprintf(stderr, "Bit-and not implemented\n"); exit(1); + case STERM: state = STERM; RET(ADDROF); + default: fprintf(stderr, "Unexpected '&'\n"); exit(1); + } + } + +")" { state = SOP; pdebug("')'\n"); return ')'; } + +. { pdebug(".: %c\n", yytext[0]); return yytext[0]; } + + +%% + + +void pdebug(const char *format, ...) { + (void)format; +#ifdef DEBUG + va_list ap; + va_start(ap, format); + vfprintf(stderr, format, ap); + va_end(ap); +#endif +} + + +/* vim:et:ts=4:sw=4 +*/ @@ -0,0 +1,248 @@ +%{ + +#include <stdio.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include "node.h" + +static void yyerror(const char*); + +int yylex(void); + +static enum operator addop_to_oper(const char *op); +static enum operator mulop_to_oper(const char *op); +static enum operator relop_to_oper(const char *op); +static enum operator boolop_to_oper(const char *op); + +extern int lineno; + +struct node *root_node; + +%} + +%start start + +%token ID NUM INT VOID IF ELSE WHILE FOR RETURN PTR +%token DEREF ADDROF NEGATE NOT ADDOP MULOP RELOP BOOLOP ASSIGN + +%left ASSIGN +%left BOOLOP +%left RELOP +%left ADDOP +%left MULOP +%left NOT NEGATE DEREF ADDROF + +%type <id> ID NUM ADDOP MULOP RELOP BOOLOP +%type <node> toplevel toplevel_decl var_decl func_decl parameter_list parameter block +%type <node> statement statement_list open_statement matched_statement other_statement +%type <node> expression atom_expr expression_list +%type <type> type INT VOID + +%union { + char *id; + struct node *node; + struct type *type; +} + +%destructor { free($$); } <id> +%destructor { node_delete_recursive($$); } <node> + +%% + +start: toplevel { + root_node = $1; + } ; + +toplevel: toplevel_decl toplevel { + $$ = node_make_2(N_LIST, $1, $2); + } + | toplevel_decl { + $$ = $1; + } ; + +toplevel_decl: var_decl | func_decl ; + +var_decl: type ID ASSIGN expression ';' { + $$ = node_make_1(N_VAR_DECL_INIT, $4); + $$->rtype = $1; + $$->name = $2; + } + | type ID ';' { + $$ = node_make_0(N_VAR_DECL); + $$->rtype = $1; + $$->name = $2; + } ; + +func_decl: type ID '(' parameter_list ')' block { + $$ = node_make_2(N_FUNC_DECL, $4, $6); + $$->rtype = $1; + $$->name = $2; + } ; + +type: type PTR { $$ = type_ptr($1); } + | INT { $$ = type_int(16); } + | VOID { $$ = type_void(); } ; + +parameter_list: parameter { + $$ = node_make_2(N_LIST, $1, node_make_0(N_LIST_END)); + } + | parameter ',' parameter_list { + $$ = node_make_2(N_LIST, $1, $3); + } ; + +parameter: type ID { + $$ = node_make_0(N_VAR_DECL); + $$->rtype = $1; + $$->name = $2; + } ; + +block: '{' statement_list '}' { + $$ = node_make_1(N_BLOCK, $2); + } ; + +statement_list: statement { + $$ = node_make_2(N_LIST, $1, node_make_0(N_LIST_END)); + } + | statement statement_list { + $$ = node_make_2(N_LIST, $1, $2); + } ; + +statement: open_statement | matched_statement ; + +open_statement: IF '(' expression ')' statement { + $$ = node_make_3(N_IF, $3, $5, node_make_0(N_LIST_END)); + } + | IF '(' expression ')' matched_statement ELSE open_statement { + $$ = node_make_3(N_IF, $3, $5, $7); + } + | WHILE '(' expression ')' open_statement { + $$ = node_make_2(N_WHILE, $3, $5); + } + /*| FOR '(' statement ';' expression ';' statement ')' open_statement { + $$ = node_make_1(N_BLOCK, + node_make_list( + $3, node_make_2(N_WHILE, $5, node_make_list($9, $7)))); + }*/ ; + +matched_statement: + IF '(' expression ')' matched_statement ELSE matched_statement { + $$ = node_make_3(N_IF, $3, $5, $7); + } + | WHILE '(' expression ')' matched_statement { + $$ = node_make_2(N_WHILE, $3, $5); + } + /*| FOR '(' statement ';' expression ';' statement ')' matched_statement { + $$ = node_make_1(N_BLOCK, + node_make_list( + $3, node_make_2(N_WHILE, $5, node_make_list($9, $7)))); + }*/ + | other_statement ; + +other_statement: + var_decl | expression ';' | block + | RETURN expression ';' { + $$ = node_make_1(N_RETURN, $2); + } ; + +expression: atom_expr + | NOT expression { $$ = node_make_1(N_UNOP, $2); $$->oper = OP_NOT; } + | NEGATE expression { $$ = node_make_1(N_UNOP, $2); $$->oper = OP_NEG; } + | DEREF expression { $$ = node_make_1(N_UNOP, $2); $$->oper = OP_DEREF; } + | ADDROF expression { $$ = node_make_1(N_UNOP, $2); $$->oper = OP_ADDROF; } + | expression ADDOP expression { + $$ = node_make_2(N_BINOP, $1, $3); + $$->oper = addop_to_oper($2); + free($2); + } + | expression MULOP expression { + $$ = node_make_2(N_BINOP, $1, $3); + $$->oper = mulop_to_oper($2); + free($2); + } + | expression RELOP expression { + $$ = node_make_2(N_BINOP, $1, $3); + $$->oper = relop_to_oper($2); + free($2); + } + | expression BOOLOP expression { + $$ = node_make_2(N_BINOP, $1, $3); + $$->oper = boolop_to_oper($2); + free($2); + } + | atom_expr ASSIGN expression { + $$ = node_make_2(N_BINOP, $1, $3); + $$->oper = OP_ASSIGN; + } ; + +atom_expr: NUM { + $$ = node_make_0(N_NUM); + $$->value = strtol($1, NULL, 10); + free($1); + } + | ID { + $$ = node_make_0(N_VAR); + $$->name = $1; + } + | '(' expression ')' { + $$ = $2; + } + | ID '(' expression_list ')' { + $$ = node_make_1(N_CALL, $3); + $$->name = $1; + } + | atom_expr '[' expression ']' { + $$ = node_make_1(N_UNOP, node_make_2(N_BINOP, $1, $3)); + $$->child2->oper = OP_ADD; + $$->oper = OP_DEREF; + } ; + +expression_list: expression { + $$ = node_make_2(N_LIST, $1, node_make_0(N_LIST_END)); + } + | expression ',' expression_list { + $$ = node_make_2(N_LIST, $1, $3); + } ; + + +%% + + +static void yyerror(const char *s) { + fprintf(stderr, "Parse error: %s\n", s); + exit(1); +} + +static enum operator addop_to_oper(const char *op) { + if (strcmp(op, "+") == 0) return OP_ADD; + else if (strcmp(op, "-") == 0) return OP_SUB; + else assert(false); +} + +static enum operator mulop_to_oper(const char *op) { + if (strcmp(op, "*") == 0) return OP_MUL; + else if (strcmp(op, "/") == 0) return OP_DIV; + else if (strcmp(op, "%") == 0) return OP_MOD; + else assert(false); +} + +static enum operator relop_to_oper(const char *op) { + if (strcmp(op, "==") == 0) return OP_EQ; + else if (strcmp(op, "!=") == 0) return OP_NEQ; + else if (strcmp(op, "<") == 0) return OP_LT; + else if (strcmp(op, ">") == 0) return OP_GT; + else if (strcmp(op, "<=") == 0) return OP_LEQ; + else if (strcmp(op, ">=") == 0) return OP_GEQ; + else assert(false); +} + +static enum operator boolop_to_oper(const char *op) { + if (strcmp(op, "&&") == 0) return OP_AND; + else if (strcmp(op, "||") == 0) return OP_OR; + else assert(false); +} + + +/* vim:et:ts=4:sw=4 +*/ diff --git a/compiler.c b/compiler.c new file mode 100644 index 0000000..42ad25c --- /dev/null +++ b/compiler.c @@ -0,0 +1,279 @@ +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include "compiler.h" +#include "symtab.h" + + +static void write_func_params(struct pair_tn *params, struct node *list) { + if (list->type == N_LIST_END) return; + assert(list->child1->type == N_VAR_DECL); + params[0].type = list->child1->rtype; + params[0].name = strdup(list->child1->name); + write_func_params(params + 1, list->child2); +} + +static bool oper_is_basic_arith(enum operator oper) { + return oper == OP_ADD || oper == OP_SUB || oper == OP_MUL || oper == OP_DIV || oper == OP_MOD; +} + +static bool oper_is_comparison(enum operator oper) { + return oper == OP_EQ || oper == OP_NEQ || + oper == OP_LT || oper == OP_GT || + oper == OP_LEQ || oper == OP_GEQ; +} + +static struct ref compile_expr(struct ir *ir, struct symtab *symtab, struct node *node); + +static void compile_boolexpr( + struct ir *ir, struct symtab *symtab, struct node *node, char *truedest, char *falsedest) { + + if (node->type == N_BINOP && oper_is_comparison(node->oper)) { + struct ref r1 = compile_expr(ir, symtab, node->child1); + struct ref r2 = compile_expr(ir, symtab, node->child2); + ir_append(ir, irins_make_12(INS_CMP, r1, r2)); + enum condcode condcode; + switch (node->oper) { + case OP_EQ: condcode = CCNZ; break; + case OP_NEQ: condcode = CCZ; break; + case OP_LT: condcode = CCGE; break; + case OP_GT: condcode = CCLE; break; + case OP_LEQ: condcode = CCG; break; + case OP_GEQ: condcode = CCL; break; + default: assert(false); + } + ir_append(ir, irins_make_jcc(falsedest, condcode)); + ir_append(ir, irins_make_name(INS_JMP, truedest)); + } else if (node->type == N_BINOP && node->oper == OP_AND) { + char *try2lbl = gen_label_name(); + compile_boolexpr(ir, symtab, node->child1, try2lbl, falsedest); + ir_append(ir, irins_make_name(INS_LBL, try2lbl)); + compile_boolexpr(ir, symtab, node->child2, truedest, falsedest); + } else if (node->type == N_BINOP && node->oper == OP_OR) { + char *try2lbl = gen_label_name(); + compile_boolexpr(ir, symtab, node->child1, truedest, try2lbl); + ir_append(ir, irins_make_name(INS_LBL, try2lbl)); + compile_boolexpr(ir, symtab, node->child2, truedest, falsedest); + } else if (node->type == N_UNOP && node->oper == OP_NOT) { + compile_boolexpr(ir, symtab, node->child1, falsedest, truedest); + } else { + struct ref ref = compile_expr(ir, symtab, node); + ir_append(ir, irins_make_12(INS_TEST, ref, ref)); + ir_append(ir, irins_make_jcc(falsedest, CCZ)); + ir_append(ir, irins_make_name(INS_JMP, truedest)); + } +} + +static struct ref compile_expr(struct ir *ir, struct symtab *symtab, struct node *node) { + switch (node->type) { + case N_NUM: { + struct ref ref = ref_next_register(); + ir_append(ir, irins_make_01(INS_MOV, ref, ref_imm(node->value))); + return ref; + } + + case N_VAR: { + struct symbol *sym = symtab_find(symtab, node->name); + if (!sym) { + fprintf(stderr, "Use of undeclared variable '%s'\n", node->name); + exit(1); + } + return sym->ref; + } + + case N_BINOP: { + if (oper_is_basic_arith(node->oper)) { + struct ref r0 = ref_next_register(); + struct ref r1 = compile_expr(ir, symtab, node->child1); + struct ref r2 = compile_expr(ir, symtab, node->child2); + enum instype instype; + switch (node->oper) { + case OP_ADD: instype = INS_ADD; break; + case OP_SUB: instype = INS_SUB; break; + case OP_MUL: instype = INS_MUL; break; + case OP_DIV: instype = INS_DIV; break; + case OP_MOD: instype = INS_MOD; break; + default: assert(false); + } + ir_append(ir, irins_make_012(instype, r0, r1, r2)); + return r0; + } else if (oper_is_comparison(node->oper) || node->oper == OP_AND || node->oper == OP_OR) { + char *truelbl = gen_label_name(); + char *falselbl = gen_label_name(); + char *afterlbl = gen_label_name(); + compile_boolexpr(ir, symtab, node, truelbl, falselbl); + struct ref cval = ref_next_register(); + ir_append(ir, irins_make_name(INS_LBL, truelbl)); + ir_append(ir, irins_make_01(INS_MOV, cval, ref_imm(1))); + ir_append(ir, irins_make_name(INS_JMP, afterlbl)); + ir_append(ir, irins_make_name(INS_LBL, falselbl)); + ir_append(ir, irins_make_01(INS_MOV, cval, ref_imm(0))); + ir_append(ir, irins_make_name(INS_LBL, afterlbl)); + return cval; + } else if (node->oper == OP_ASSIGN) { + if (node->child1->type == N_VAR || + (node->child1->type == N_UNOP && node->child1->oper == OP_DEREF)) { + struct ref r0 = compile_expr(ir, symtab, node->child1); + struct ref r1 = compile_expr(ir, symtab, node->child2); + ir_append(ir, irins_make_01(INS_MOV, r0, r1)); + return r0; + } else { + fprintf(stderr, "Invalid left-hand side in assignment.\n"); + exit(1); + } + } else { + assert(false); + } + } + + case N_UNOP: { + switch (node->oper) { + case OP_NEG: { + struct ref r0 = ref_next_register(); + struct ref r1 = compile_expr(ir, symtab, node->child1); + ir_append(ir, irins_make_01(INS_NEG, r0, r1)); + return r0; + } + + case OP_NOT: { + struct ref r0 = ref_next_register(); + struct ref r1 = compile_expr(ir, symtab, node->child1); + ir_append(ir, irins_make_01(INS_NOT, r0, r1)); + return r0; + } + + case OP_DEREF: { + struct ref addr = compile_expr(ir, symtab, node->child1); + struct ref r1; + switch (addr.type) { + case REF_REG: r1 = ref_mem(addr.reg, 0, false); break; + case REF_IMM: r1 = ref_mem(-1, addr.imm, false); break; + case REF_MEM: { + r1 = ref_next_register(); + ir_append(ir, irins_make_01(INS_MOV, r1, addr)); + r1 = ref_mem(r1.reg, 0, false); break; + break; + } + } + struct ref r0 = ref_next_register(); + ir_append(ir, irins_make_01(INS_MOV, r0, r1)); + return r0; + } + + case OP_ADDROF: + fprintf(stderr, "OP_ADDROF not implemented\n"); + exit(1); + + default: + assert(false); + } + } + + case N_CALL: { + fprintf(stderr, "function calls not implemented\n"); + exit(1); + /*struct symbol *sym = symtab_find(symtab, node->name); + if (!sym) { + fprintf(stderr, "Call of undeclared function '%s'\n", node->name); + exit(1); + } + ;*/ + } + + default: + fprintf(stderr, "Invalid node type %d in expression\n", node->type); + exit(1); + } +} + +static void compile_node(struct ir *ir, struct symtab *symtab, struct node *node) { + switch (node->type) { + case N_LIST: + compile_node(ir, symtab, node->child1); + compile_node(ir, symtab, node->child2); + break; + + case N_LIST_END: + break; + + case N_BLOCK: { + symtab = symtab_sub(symtab); + compile_node(ir, symtab, node->child1); + symtab = symtab_delete_get_parent(symtab); + break; + } + + case N_VAR_DECL_INIT: + case N_VAR_DECL: { + if (symtab_find_local(symtab, node->name)) { + fprintf(stderr, "Redeclaration of variable '%s'\n", node->name); + exit(1); + } + struct symbol *sym = symbol_make_var(node->name, node->rtype); + sym->ref = ref_next_register(); + symtab_insert(symtab, sym); + ir_append(ir, irins_make_01(INS_MOV, sym->ref, ref_imm(node->value))); + break; + } + + case N_FUNC_DECL: { + if (symtab_find(symtab, node->name)) { + fprintf(stderr, "Redeclaration of function '%s'\n", node->name); + exit(1); + } + int numparams = node_list_length(node->child1); + struct symbol *sym = symbol_make_func(node->name, node->rtype, numparams); + write_func_params(sym->params, node->child1); + symtab_insert(symtab_root(symtab), sym); + ir_append(ir, irins_make_name(INS_LBL, strdup(node->name))); + symtab = symtab_sub(symtab); + compile_node(ir, symtab, node->child2); + symtab = symtab_delete_get_parent(symtab); + break; + } + + case N_IF: { + char *elselbl = gen_label_name(); + char *afterlbl = gen_label_name(); + struct ref condref = compile_expr(ir, symtab, node->child1); + ir_append(ir, irins_make_12(INS_TEST, condref, condref)); + ir_append(ir, irins_make_jcc(elselbl, CCZ)); + compile_node(ir, symtab, node->child2); + ir_append(ir, irins_make_name(INS_JMP, afterlbl)); + ir_append(ir, irins_make_name(INS_LBL, elselbl)); + compile_node(ir, symtab, node->child3); + ir_append(ir, irins_make_name(INS_LBL, afterlbl)); + break; + } + + case N_WHILE: { + char *startlbl = gen_label_name(); + char *afterlbl = gen_label_name(); + ir_append(ir, irins_make_name(INS_LBL, startlbl)); + struct ref condref = compile_expr(ir, symtab, node->child1); + ir_append(ir, irins_make_12(INS_TEST, condref, condref)); + ir_append(ir, irins_make_jcc(afterlbl, CCZ)); + compile_node(ir, symtab, node->child2); + ir_append(ir, irins_make_name(INS_JMP, startlbl)); + ir_append(ir, irins_make_name(INS_LBL, afterlbl)); + break; + } + + case N_RETURN: { + struct ref ref = compile_expr(ir, symtab, node->child1); + ir_append(ir, irins_make_1(INS_RETV, ref)); + break; + } + + default: // hmm, maybe it's an expression statement? + compile_expr(ir, symtab, node); + break; + } +} + +struct ir* compile(struct node *node) { + struct ir *ir = ir_make(); + struct symtab *symtab = symtab_make(); + compile_node(ir, symtab, node); + return ir; +} diff --git a/compiler.h b/compiler.h new file mode 100644 index 0000000..7465c9e --- /dev/null +++ b/compiler.h @@ -0,0 +1,7 @@ +#pragma once + +#include "ir.h" +#include "node.h" + + +struct ir* compile(struct node *node); @@ -0,0 +1,110 @@ +#include <stdio.h> +#include <stdlib.h> +#include "ir.h" + + +struct ir* ir_make(void) { + struct ir *ir = malloc(sizeof(struct ir)); + ir->cap = 32; + ir->len = 0; + ir->inss = malloc(ir->cap * sizeof(struct irins*)); + ir->globspace = 0; + return ir; +} + +void ir_delete(struct ir *ir) { + for (int i = 0; i < ir->len; i++) { + irins_delete(ir->inss[i]); + } + free(ir->inss); + free(ir); +} + +struct ref ir_reserve_global(struct ir *ir, int size) { + int offset = ir->globspace; + ir->globspace += size; + return ref_mem(-1, offset, true); +} + +void ir_append(struct ir *ir, struct irins *ins) { + if (ir->len == ir->cap) { + ir->cap *= 2; + ir->inss = realloc(ir->inss, ir->cap * sizeof(struct irins*)); + } + ir->inss[ir->len++] = ins; +} + +struct irins* irins_make(enum instype type) { + struct irins *ins = malloc(sizeof(struct irins)); + ins->type = type; + ins->name = NULL; + return ins; +} + +struct irins* irins_make_name(enum instype type, char *name) { + struct irins *ins = irins_make(type); + ins->name = name; + return ins; +} + +struct irins* irins_make_01(enum instype type, struct ref r0, struct ref r1) { + struct irins *ins = irins_make(type); + ins->r0 = r0; + ins->r1 = r1; + return ins; +} + +struct irins* irins_make_012(enum instype type, struct ref r0, struct ref r1, struct ref r2) { + struct irins *ins = irins_make_01(type, r0, r1); + ins->r2 = r2; + return ins; +} + +struct irins* irins_make_1(enum instype type, struct ref r1) { + struct irins *ins = irins_make(type); + ins->r1 = r1; + return ins; +} + +struct irins* irins_make_12(enum instype type, struct ref r1, struct ref r2) { + struct irins *ins = irins_make(type); + ins->r1 = r1; + ins->r2 = r2; + return ins; +} + +struct irins* irins_make_jcc(char *name, enum condcode condcode) { + struct irins *ins = irins_make(INS_JCC); + ins->name = name; + ins->condcode = condcode; + return ins; +} + +void irins_delete(struct irins *ins) { + if (ins->name) free(ins->name); + free(ins); +} + +char* gen_label_name(void) { + static int next = 0; + char *buf = malloc(16); + sprintf(buf, "L%d", next++); + return buf; +} + +struct ref ref_reg(int reg) { + return (struct ref){REF_REG, reg, 0, false, 0}; +} + +struct ref ref_mem(int reg, int offset, bool rel_heap) { + return (struct ref){REF_MEM, reg, offset, rel_heap, 0}; +} + +struct ref ref_imm(int imm) { + return (struct ref){REF_IMM, -1, 0, false, imm}; +} + +struct ref ref_next_register(void) { + static int next = 0; + return ref_reg(next++); +} @@ -0,0 +1,83 @@ +#pragma once + +#include <stdbool.h> + + +// IR uses an unlimited number of registers. This will later be fixed by a regalloc. + +enum instype { + INS_ADD, // r0 = r1 + r2 + INS_SUB, // r0 = r1 - r2 + INS_MUL, // r0 = r1 * r2 + INS_DIV, // r0 = r1 / r2 + INS_MOD, // r0 = r1 % r2 + INS_NEG, // r0 = -r1 + INS_NOT, // r0 = !r1 + INS_TEST, // r1 & r2 (flags) + INS_PUSH, // push r1 + INS_POP, // r0 = pop + INS_CMP, // r1 - r2 (flags) + INS_LBL, // label name + INS_JMP, // jmp name + INS_JCC, // if condcode, jmp name + INS_CALL, // call name + INS_RET, // return + INS_RETV, // return r1 + INS_MOV, // r0 = r1 +}; + +enum condcode { + CCZ, CCNZ, CCL, CCG, CCLE, CCGE +}; + +enum reftype { + REF_REG, // reg + REF_MEM, // [reg + offset + (rel_heap ? heap_base : 0)] + REF_IMM, // imm +}; + +struct ref { + enum reftype type; + int reg; // if -1 in a REF_MEM, unused + int offset; + bool rel_heap; // whether the heap base address should be added to the offset + int imm; +}; + +struct irins { + enum instype type; + struct ref r0, r1, r2; + char *name; + enum condcode condcode; +}; + +struct ir { + int cap, len; + struct irins **inss; + int globspace; +}; + + +struct ir* ir_make(void); +void ir_delete(struct ir *ir); + +// returns offset +struct ref ir_reserve_global(struct ir *ir, int size); + +void ir_append(struct ir *ir, struct irins *ins); + +struct irins* irins_make(enum instype type); +struct irins* irins_make_name(enum instype type, char *name); +struct irins* irins_make_01(enum instype type, struct ref r0, struct ref r1); +struct irins* irins_make_012(enum instype type, struct ref r0, struct ref r1, struct ref r2); +struct irins* irins_make_1(enum instype type, struct ref r1); +struct irins* irins_make_12(enum instype type, struct ref r1, struct ref r2); +struct irins* irins_make_jcc(char *name, enum condcode condcode); +void irins_delete(struct irins *ins); + +char* gen_label_name(void); + +struct ref ref_reg(int reg); +struct ref ref_mem(int reg, int offset, bool rel_heap); +struct ref ref_imm(int imm); +struct ref ref_next_register(void); @@ -0,0 +1,37 @@ +#include <stdio.h> +#include <stdbool.h> +#include <assert.h> +#include "compiler.h" +#include "node.h" + + +extern FILE *yyin; +extern int yyparse(void); +extern struct node *root_node; + + +int main(int argc, char **argv) { + if (argc != 2) { + fprintf(stderr, "Usage: %s <source.c>\n", argv[0]); + return 1; + } + yyin = fopen(argv[1], "r"); + if (!yyin) { + fprintf(stderr, "Cannot open file '%s'\n", argv[1]); + return 1; + } + + int result = yyparse(); + if(!feof(yyin)) { + assert(false); + } + + node_print(root_node, stdout, 0); + printf("\n"); + + struct ir *ir = compile(root_node); + + type_cache_cleanup(); + + return result; +} @@ -0,0 +1,140 @@ +#include <stdlib.h> +#include <stdbool.h> +#include <stdarg.h> +#include <assert.h> +#include "node.h" + + +struct node* node_make_0(enum node_type type) { + struct node *node = malloc(sizeof(struct node)); + node->type = type; + node->child1 = node->child2 = node->child3 = NULL; + node->rtype = NULL; + node->name = NULL; + node->value = 0; + return node; +} + +struct node* node_make_1(enum node_type type, struct node *c1) { + struct node *node = node_make_0(type); + node->child1 = c1; + return node; +} + +struct node* node_make_2(enum node_type type, struct node *c1, struct node *c2) { + struct node *node = node_make_1(type, c1); + node->child2 = c2; + return node; +} + +struct node* node_make_3(enum node_type type, struct node *c1, struct node *c2, struct node *c3) { + struct node *node = node_make_2(type, c1, c2); + node->child3 = c3; + return node; +} + +struct node* node_make_list_(struct node *first, ...) { + va_list ap; + va_start(ap, first); + struct node *cur = node_make_0(N_LIST_END); + struct node *root = node_make_2(N_LIST, first, cur); + struct node *next; + while ((next = va_arg(ap, struct node*))) { + cur->type = N_LIST; + cur->child1 = next; + cur->child2 = node_make_0(N_LIST_END); + cur = cur->child2; + } + return root; +} + +int node_list_length(struct node *node) { + if (node->type == N_LIST_END) return 0; + assert(node->type == N_LIST); + return 1 + node_list_length(node->child2); +} + +void node_delete_recursive(struct node *node) { + if (node->child1) node_delete_recursive(node->child1); + if (node->child2) node_delete_recursive(node->child2); + if (node->child3) node_delete_recursive(node->child3); + if (node->name) free(node->name); + free(node); +} + +static void write_spaces(int num, FILE *f) { + for (int i = 0; i < num; i++) fputc(' ', f); +} + +static const char* node_type_string(enum node_type type) { + switch (type) { + case N_LIST: return "N_LIST"; break; + case N_LIST_END: return "N_LIST_END"; break; + case N_BLOCK: return "N_BLOCK"; break; + case N_VAR_DECL_INIT: return "N_VAR_DECL_INIT"; break; + case N_VAR_DECL: return "N_VAR_DECL"; break; + case N_FUNC_DECL: return "N_FUNC_DECL"; break; + case N_NUM: return "N_NUM"; break; + case N_VAR: return "N_VAR"; break; + case N_IF: return "N_IF"; break; + case N_WHILE: return "N_WHILE"; break; + case N_RETURN: return "N_RETURN"; break; + case N_BINOP: return "N_BINOP"; break; + case N_UNOP: return "N_UNOP"; break; + case N_CALL: return "N_CALL"; break; + default: assert(false); + } +} + +static const char* oper_string(enum operator oper) { + switch (oper) { + case OP_ADD: return "OP_ADD"; + case OP_SUB: return "OP_SUB"; + case OP_MUL: return "OP_MUL"; + case OP_DIV: return "OP_DIV"; + case OP_MOD: return "OP_MOD"; + case OP_EQ: return "OP_EQ"; + case OP_NEQ: return "OP_NEQ"; + case OP_LT: return "OP_LT"; + case OP_GT: return "OP_GT"; + case OP_LEQ: return "OP_LEQ"; + case OP_GEQ: return "OP_GEQ"; + case OP_AND: return "OP_AND"; + case OP_OR: return "OP_OR"; + case OP_ASSIGN: return "OP_ASSIGN"; + case OP_NEG: return "OP_NEG"; + case OP_NOT: return "OP_NOT"; + default: assert(false); + } +} + +void node_print(const struct node *node, FILE *f, int indent) { + const char *t = node_type_string(node->type); + fprintf(f, "(%s", t); + if (node->rtype) { + fprintf(f, "[rtype="); + type_print(node->rtype, f); + fprintf(f, "]"); + } + if (node->name) fprintf(f, "[name=%s]", node->name); + if (node->type == N_NUM) fprintf(f, "[value=%d]", node->value); + if (node->type == N_BINOP || node->type == N_UNOP) { + fprintf(f, "[oper=%s]", oper_string(node->oper)); + } + if (node->child1) { + fprintf(f, "\n"); + write_spaces(2 * (indent + 1), f); + node_print(node->child1, f, indent + 1); + } + if (node->child2) { + fprintf(f, "\n"); + write_spaces(2 * (indent + 1), f); + node_print(node->child2, f, indent + 1); + } + if (node->child3) { + fprintf(f, "\n"); + write_spaces(2 * (indent + 1), f); + node_print(node->child3, f, indent + 1); + } + fprintf(f, ")"); +} @@ -0,0 +1,56 @@ +#pragma once + +#include "type.h" + + +enum node_type { + N_LIST, // [head] [tail] + N_LIST_END, // -- + N_BLOCK, // [body] + N_VAR_DECL_INIT, // rtype name [value] + N_VAR_DECL, // rtype name + N_FUNC_DECL, // rtype name [params] [body] + N_IF, // [cond] [body] [else-body] + N_WHILE, // [cond] [body] + N_RETURN, // [value] + N_NUM, // value + N_VAR, // name + N_BINOP, // [left] oper [right] + N_UNOP, // oper [arg] + N_CALL, // name [args] +}; + +enum operator { + // binary + OP_ADD, OP_SUB, OP_MUL, OP_DIV, OP_MOD, + OP_EQ, OP_NEQ, OP_LT, OP_GT, OP_LEQ, OP_GEQ, + OP_AND, OP_OR, + OP_ASSIGN, + // unary + OP_NEG, OP_NOT, OP_DEREF, OP_ADDROF, +}; + +struct node { + enum node_type type; + struct node *child1, *child2, *child3; + struct type *rtype; + char *name; + int value; + enum operator oper; +}; + + +struct node* node_make_0(enum node_type type); +struct node* node_make_1(enum node_type type, struct node *c1); +struct node* node_make_2(enum node_type type, struct node *c1, struct node *c2); +struct node* node_make_3(enum node_type type, struct node *c1, struct node *c2, struct node *c3); + +__attribute__((sentinel)) +struct node* node_make_list_(struct node *first, ...); +#define node_make_list(...) node_make_list_(__VA_ARGS__, NULL) + +int node_list_length(struct node *node); + +void node_delete_recursive(struct node *node); + +void node_print(const struct node *node, FILE *f, int indent); diff --git a/symbol.c b/symbol.c new file mode 100644 index 0000000..2f4adb7 --- /dev/null +++ b/symbol.c @@ -0,0 +1,23 @@ +#include <stdlib.h> +#include "symbol.h" + + +struct symbol* symbol_make_var(char *name, struct type *rtype) { + struct symbol *sym = malloc(sizeof(struct symbol)); + sym->name = name; + sym->stype = ST_VAR; + sym->rtype = rtype; + sym->numparams = 0; + sym->params = NULL; + return sym; +} + +struct symbol* symbol_make_func(char *name, struct type *rtype, int numparams) { + struct symbol *sym = malloc(sizeof(struct symbol)); + sym->name = name; + sym->stype = ST_FUNC; + sym->rtype = rtype; + sym->numparams = numparams; + sym->params = malloc(numparams * sizeof(struct pair_tn)); + return sym; +} diff --git a/symbol.h b/symbol.h new file mode 100644 index 0000000..454e8bc --- /dev/null +++ b/symbol.h @@ -0,0 +1,29 @@ +#pragma once + +#include "ir.h" +#include "type.h" + + +enum symbol_type { + ST_VAR, + ST_FUNC +}; + +struct pair_tn { + struct type *type; + char *name; +}; + +struct symbol { + char *name; + enum symbol_type stype; + struct type *rtype; + struct ref ref; + + int numparams; + struct pair_tn *params; +}; + + +struct symbol* symbol_make_var(char *name, struct type *rtype); +struct symbol* symbol_make_func(char *name, struct type *rtype, int numparams); diff --git a/symtab.c b/symtab.c new file mode 100644 index 0000000..4229a63 --- /dev/null +++ b/symtab.c @@ -0,0 +1,60 @@ +#include <stdlib.h> +#include <string.h> +#include "symtab.h" + + +struct symtab* symtab_make() { + struct symtab *symtab = malloc(sizeof(struct symtab)); + symtab->parent = NULL; + symtab->cap = 16; + symtab->len = 0; + symtab->syms = malloc(symtab->cap * sizeof(struct symbol*)); + return symtab; +} + +void symtab_delete_recursive(struct symtab *symtab) { + free(symtab->syms); + struct symtab *parent = symtab->parent; + free(symtab); + if (parent) symtab_delete_recursive(parent); +} + +struct symbol* symtab_find(const struct symtab *symtab, const char *name) { + struct symbol *sym = symtab_find_local(symtab, name); + if (sym) return sym; + else if (symtab->parent) return symtab_find(symtab->parent, name); + else return NULL; +} + +struct symbol* symtab_find_local(const struct symtab *symtab, const char *name) { + for (int i = 0; i < symtab->len; i++) { + if (strcmp(symtab->syms[i]->name, name) == 0) return symtab->syms[i]; + } + return NULL; +} + +void symtab_insert(struct symtab *symtab, struct symbol *sym) { + if (symtab->len == symtab->cap) { + symtab->cap *= 2; + symtab->syms = realloc(symtab->syms, symtab->cap * sizeof(struct symbol*)); + } + symtab->syms[symtab->len++] = sym; +} + +struct symtab* symtab_root(struct symtab *symtab) { + if (symtab->parent) return symtab_root(symtab->parent); + else return symtab; +} + +struct symtab* symtab_sub(struct symtab *symtab) { + struct symtab *sub = symtab_make(); + sub->parent = symtab; + return sub; +} + +struct symtab* symtab_delete_get_parent(struct symtab *symtab) { + struct symtab *parent = symtab->parent; + symtab->parent = NULL; + symtab_delete_recursive(symtab); + return parent; +} diff --git a/symtab.h b/symtab.h new file mode 100644 index 0000000..18dd8e0 --- /dev/null +++ b/symtab.h @@ -0,0 +1,23 @@ +#pragma once + +#include "symbol.h" +#include "type.h" + + +struct symtab { + struct symtab *parent; + int cap,len; + struct symbol **syms; +}; + + +struct symtab* symtab_make(); +void symtab_delete_recursive(struct symtab *symtab); + +struct symbol* symtab_find(const struct symtab *symtab, const char *name); +struct symbol* symtab_find_local(const struct symtab *symtab, const char *name); +void symtab_insert(struct symtab *symtab, struct symbol *sym); + +struct symtab* symtab_root(struct symtab *symtab); +struct symtab* symtab_sub(struct symtab *symtab); +struct symtab* symtab_delete_get_parent(struct symtab *symtab); diff --git a/test/t1.c b/test/t1.c new file mode 100644 index 0000000..229d5e9 --- /dev/null +++ b/test/t1.c @@ -0,0 +1,21 @@ +int glob; + +int fibo(int n) { + if (n <= 0) return 0; + int a = 0; + int b = 1; + int i = 1; + while (i < n) { + int c = a + b; + a = b; + b = c; + i = i + 1; + } + return b; +} + +int main(int argc, int **argv) { + int a = 1; + int b = 2 + a * 3; + holo_dec(a); +} @@ -0,0 +1,79 @@ +#include <stdlib.h> +#include <stdbool.h> +#include <assert.h> +#include "type.h" + + +static struct { + int cap, num; + struct type **types; +} cache; + +static struct type *voidtype; + +static void init_cache(void) { + if (voidtype) return; + voidtype = malloc(sizeof(struct type)); + voidtype->tag = T_VOID; + cache.cap = 8; + cache.num = 0; + cache.types = malloc(cache.cap * sizeof(struct type*)); +} + +void type_cache_cleanup(void) { + for (int i = 0; i < cache.num; i++) { + free(cache.types[i]); + } + free(cache.types); + free(voidtype); +} + +static void cache_add(struct type *type) { + if (cache.num == cache.cap) { + cache.cap *= 2; + cache.types = realloc(cache.types, cache.cap * sizeof(struct type*)); + } + cache.types[cache.num++] = type; +} + + +struct type* type_int(int size) { + init_cache(); + for (int i = 0; i < cache.num; i++) { + if (cache.types[i]->tag == T_INT && cache.types[i]->size == size) { + return cache.types[i]; + } + } + struct type *type = malloc(sizeof(struct type)); + type->tag = T_INT; + type->size = size; + cache_add(type); + return type; +} + +struct type* type_void(void) { + return voidtype; +} + +struct type* type_ptr(struct type *target) { + init_cache(); + for (int i = 0; i < cache.num; i++) { + if (cache.types[i]->tag == T_PTR && cache.types[i]->target == target) { + return cache.types[i]; + } + } + struct type *type = malloc(sizeof(struct type)); + type->tag = T_PTR; + type->target = target; + cache_add(type); + return type; +} + +void type_print(struct type *type, FILE *f) { + switch (type->tag) { + case T_INT: fprintf(f, "i%d", type->size); break; + case T_VOID: fprintf(f, "void"); break; + case T_PTR: type_print(type->target, f); fprintf(f, "*"); break; + default: assert(false); + } +} @@ -0,0 +1,32 @@ +#pragma once + +#include <stdio.h> + + +enum type_tag { + T_INT, + T_VOID, + T_PTR +}; + +struct type { + enum type_tag tag; + union { + struct { // T_INT + int size; + }; + struct {}; // T_VOID + struct { // T_PTR + struct type *target; + }; + }; +}; + + +void type_cache_cleanup(void); + +struct type* type_int(int size); +struct type* type_void(void); +struct type* type_ptr(struct type *type); + +void type_print(struct type *type, FILE *f); |