From 9911f9a73c7dc46069199e52f2bc54082d10366c Mon Sep 17 00:00:00 2001 From: Tom Smeding Date: Wed, 3 Jan 2018 23:10:59 +0100 Subject: Initial --- .gitignore | 4 + Makefile | 28 +++++++ c.l | 119 ++++++++++++++++++++++++++ c.y | 248 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ compiler.c | 279 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ compiler.h | 7 ++ ir.c | 110 ++++++++++++++++++++++++ ir.h | 83 ++++++++++++++++++ main.c | 37 ++++++++ node.c | 140 +++++++++++++++++++++++++++++++ node.h | 56 +++++++++++++ symbol.c | 23 +++++ symbol.h | 29 +++++++ symtab.c | 60 +++++++++++++ symtab.h | 23 +++++ test/t1.c | 21 +++++ type.c | 79 +++++++++++++++++ type.h | 32 +++++++ 18 files changed, 1378 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 c.l create mode 100644 c.y create mode 100644 compiler.c create mode 100644 compiler.h create mode 100644 ir.c create mode 100644 ir.h create mode 100644 main.c create mode 100644 node.c create mode 100644 node.h create mode 100644 symbol.c create mode 100644 symbol.h create mode 100644 symtab.c create mode 100644 symtab.h create mode 100644 test/t1.c create mode 100644 type.c create mode 100644 type.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b711b6b --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +as1 +as2 +bison-out +ccomp diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e206eea --- /dev/null +++ b/Makefile @@ -0,0 +1,28 @@ +CC = gcc +LEX = flex +YAXX = bison + +CFLAGS = -Wall -Wextra -std=c11 -g -I. -D_GNU_SOURCE +LDFLAGS = -lfl + +TARGET = ccomp + +.PHONY: all clean + +all: $(TARGET) + +clean: + rm -f $(TARGET) *.o + rm -rf bison-out + + +bison-out/y.tab.c bison-out/y.tab.h: c.y + @mkdir -p bison-out + bison -d -b bison-out/y c.y + +bison-out/lex.yy.c: c.l + @mkdir -p bison-out + flex -o bison-out/lex.yy.c c.l + +$(TARGET): $(wildcard *.c) $(wildcard *.h) bison-out/y.tab.c bison-out/lex.yy.c + $(CC) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@ diff --git a/c.l b/c.l new file mode 100644 index 0000000..376ed4c --- /dev/null +++ b/c.l @@ -0,0 +1,119 @@ +%{ + +#include +#include +#include +#include +#include "type.h" +#include "y.tab.h" + +#define DEBUG + +int yylex(void); + +int lineno=1; + +static enum state { + SOP, + STERM, + SPTR +} state = STERM; + +__attribute__((format (printf, 1, 2))) +void pdebug(const char *format, ...); + +#define RETS(val_) {pdebug(#val_ ": %s\n", yytext); yylval.id = strdup(yytext); return val_;} +#define RET_TYPE(val_, ty_) {pdebug(#val_ ": %s (" #ty_ ")\n", yytext); yylval.type = ty_; return val_;} +#define RET(val_) {pdebug(#val_ "\n"); return val_;} + +%} + +%option noinput nounput + +LETTER [a-zA-Z] +DIGIT [0-9] +ID {LETTER}({LETTER}|{DIGIT}|_)* +DIGITS {DIGIT}{DIGIT}* + +NUM -?{DIGITS} + +%x C_COMMENT + + +%% + +{NUM} { state = SOP; RETS(NUM); } + +int { state = SPTR; RET_TYPE(INT, type_int(16)); } +void { state = SPTR; RET_TYPE(VOID, type_void()); } +if { RET(IF); } +else { RET(ELSE); } +while { RET(WHILE); } +for { RET(FOR); } +return { state = STERM; RET(RETURN); } + +{ID} { state = SOP; RETS(ID); } + +"//"[^\n]* {} + +"/*" { pdebug("C_COMMENT... "); fflush(stdout); BEGIN(C_COMMENT); } +"*/" { pdebug("done\n"); BEGIN(INITIAL); } +. {} +\n { lineno++; } + +\n { lineno++; } +[ \t]+ {} + +"+" { state = STERM; RETS(ADDOP); } +[/%] { state = STERM; RETS(MULOP); } +"<"=?|">"=?|==|!= { state = STERM; RETS(RELOP); } +&&|"||" { state = STERM; RETS(BOOLOP); } += { state = STERM; RET(ASSIGN); } +"!" { state = STERM; RET(NOT); } + +"-" { + switch (state) { + case SOP: state = STERM; RETS(ADDOP); + case STERM: state = STERM; RETS(NEGATE); + default: fprintf(stderr, "Unexpected '-'\n"); exit(1); + } + } + +"*" { + switch (state) { + case SOP: state = STERM; RETS(MULOP); + case STERM: state = STERM; RET(DEREF); + case SPTR: state = SPTR; RET(PTR); + default: fprintf(stderr, "Unexpected '*'\n"); exit(1); + } + } + +"&" { + switch (state) { + case SOP: fprintf(stderr, "Bit-and not implemented\n"); exit(1); + case STERM: state = STERM; RET(ADDROF); + default: fprintf(stderr, "Unexpected '&'\n"); exit(1); + } + } + +")" { state = SOP; pdebug("')'\n"); return ')'; } + +. { pdebug(".: %c\n", yytext[0]); return yytext[0]; } + + +%% + + +void pdebug(const char *format, ...) { + (void)format; +#ifdef DEBUG + va_list ap; + va_start(ap, format); + vfprintf(stderr, format, ap); + va_end(ap); +#endif +} + + +/* vim:et:ts=4:sw=4 +*/ diff --git a/c.y b/c.y new file mode 100644 index 0000000..5aafe26 --- /dev/null +++ b/c.y @@ -0,0 +1,248 @@ +%{ + +#include +#include +#include +#include +#include +#include "node.h" + +static void yyerror(const char*); + +int yylex(void); + +static enum operator addop_to_oper(const char *op); +static enum operator mulop_to_oper(const char *op); +static enum operator relop_to_oper(const char *op); +static enum operator boolop_to_oper(const char *op); + +extern int lineno; + +struct node *root_node; + +%} + +%start start + +%token ID NUM INT VOID IF ELSE WHILE FOR RETURN PTR +%token DEREF ADDROF NEGATE NOT ADDOP MULOP RELOP BOOLOP ASSIGN + +%left ASSIGN +%left BOOLOP +%left RELOP +%left ADDOP +%left MULOP +%left NOT NEGATE DEREF ADDROF + +%type ID NUM ADDOP MULOP RELOP BOOLOP +%type toplevel toplevel_decl var_decl func_decl parameter_list parameter block +%type statement statement_list open_statement matched_statement other_statement +%type expression atom_expr expression_list +%type type INT VOID + +%union { + char *id; + struct node *node; + struct type *type; +} + +%destructor { free($$); } +%destructor { node_delete_recursive($$); } + +%% + +start: toplevel { + root_node = $1; + } ; + +toplevel: toplevel_decl toplevel { + $$ = node_make_2(N_LIST, $1, $2); + } + | toplevel_decl { + $$ = $1; + } ; + +toplevel_decl: var_decl | func_decl ; + +var_decl: type ID ASSIGN expression ';' { + $$ = node_make_1(N_VAR_DECL_INIT, $4); + $$->rtype = $1; + $$->name = $2; + } + | type ID ';' { + $$ = node_make_0(N_VAR_DECL); + $$->rtype = $1; + $$->name = $2; + } ; + +func_decl: type ID '(' parameter_list ')' block { + $$ = node_make_2(N_FUNC_DECL, $4, $6); + $$->rtype = $1; + $$->name = $2; + } ; + +type: type PTR { $$ = type_ptr($1); } + | INT { $$ = type_int(16); } + | VOID { $$ = type_void(); } ; + +parameter_list: parameter { + $$ = node_make_2(N_LIST, $1, node_make_0(N_LIST_END)); + } + | parameter ',' parameter_list { + $$ = node_make_2(N_LIST, $1, $3); + } ; + +parameter: type ID { + $$ = node_make_0(N_VAR_DECL); + $$->rtype = $1; + $$->name = $2; + } ; + +block: '{' statement_list '}' { + $$ = node_make_1(N_BLOCK, $2); + } ; + +statement_list: statement { + $$ = node_make_2(N_LIST, $1, node_make_0(N_LIST_END)); + } + | statement statement_list { + $$ = node_make_2(N_LIST, $1, $2); + } ; + +statement: open_statement | matched_statement ; + +open_statement: IF '(' expression ')' statement { + $$ = node_make_3(N_IF, $3, $5, node_make_0(N_LIST_END)); + } + | IF '(' expression ')' matched_statement ELSE open_statement { + $$ = node_make_3(N_IF, $3, $5, $7); + } + | WHILE '(' expression ')' open_statement { + $$ = node_make_2(N_WHILE, $3, $5); + } + /*| FOR '(' statement ';' expression ';' statement ')' open_statement { + $$ = node_make_1(N_BLOCK, + node_make_list( + $3, node_make_2(N_WHILE, $5, node_make_list($9, $7)))); + }*/ ; + +matched_statement: + IF '(' expression ')' matched_statement ELSE matched_statement { + $$ = node_make_3(N_IF, $3, $5, $7); + } + | WHILE '(' expression ')' matched_statement { + $$ = node_make_2(N_WHILE, $3, $5); + } + /*| FOR '(' statement ';' expression ';' statement ')' matched_statement { + $$ = node_make_1(N_BLOCK, + node_make_list( + $3, node_make_2(N_WHILE, $5, node_make_list($9, $7)))); + }*/ + | other_statement ; + +other_statement: + var_decl | expression ';' | block + | RETURN expression ';' { + $$ = node_make_1(N_RETURN, $2); + } ; + +expression: atom_expr + | NOT expression { $$ = node_make_1(N_UNOP, $2); $$->oper = OP_NOT; } + | NEGATE expression { $$ = node_make_1(N_UNOP, $2); $$->oper = OP_NEG; } + | DEREF expression { $$ = node_make_1(N_UNOP, $2); $$->oper = OP_DEREF; } + | ADDROF expression { $$ = node_make_1(N_UNOP, $2); $$->oper = OP_ADDROF; } + | expression ADDOP expression { + $$ = node_make_2(N_BINOP, $1, $3); + $$->oper = addop_to_oper($2); + free($2); + } + | expression MULOP expression { + $$ = node_make_2(N_BINOP, $1, $3); + $$->oper = mulop_to_oper($2); + free($2); + } + | expression RELOP expression { + $$ = node_make_2(N_BINOP, $1, $3); + $$->oper = relop_to_oper($2); + free($2); + } + | expression BOOLOP expression { + $$ = node_make_2(N_BINOP, $1, $3); + $$->oper = boolop_to_oper($2); + free($2); + } + | atom_expr ASSIGN expression { + $$ = node_make_2(N_BINOP, $1, $3); + $$->oper = OP_ASSIGN; + } ; + +atom_expr: NUM { + $$ = node_make_0(N_NUM); + $$->value = strtol($1, NULL, 10); + free($1); + } + | ID { + $$ = node_make_0(N_VAR); + $$->name = $1; + } + | '(' expression ')' { + $$ = $2; + } + | ID '(' expression_list ')' { + $$ = node_make_1(N_CALL, $3); + $$->name = $1; + } + | atom_expr '[' expression ']' { + $$ = node_make_1(N_UNOP, node_make_2(N_BINOP, $1, $3)); + $$->child2->oper = OP_ADD; + $$->oper = OP_DEREF; + } ; + +expression_list: expression { + $$ = node_make_2(N_LIST, $1, node_make_0(N_LIST_END)); + } + | expression ',' expression_list { + $$ = node_make_2(N_LIST, $1, $3); + } ; + + +%% + + +static void yyerror(const char *s) { + fprintf(stderr, "Parse error: %s\n", s); + exit(1); +} + +static enum operator addop_to_oper(const char *op) { + if (strcmp(op, "+") == 0) return OP_ADD; + else if (strcmp(op, "-") == 0) return OP_SUB; + else assert(false); +} + +static enum operator mulop_to_oper(const char *op) { + if (strcmp(op, "*") == 0) return OP_MUL; + else if (strcmp(op, "/") == 0) return OP_DIV; + else if (strcmp(op, "%") == 0) return OP_MOD; + else assert(false); +} + +static enum operator relop_to_oper(const char *op) { + if (strcmp(op, "==") == 0) return OP_EQ; + else if (strcmp(op, "!=") == 0) return OP_NEQ; + else if (strcmp(op, "<") == 0) return OP_LT; + else if (strcmp(op, ">") == 0) return OP_GT; + else if (strcmp(op, "<=") == 0) return OP_LEQ; + else if (strcmp(op, ">=") == 0) return OP_GEQ; + else assert(false); +} + +static enum operator boolop_to_oper(const char *op) { + if (strcmp(op, "&&") == 0) return OP_AND; + else if (strcmp(op, "||") == 0) return OP_OR; + else assert(false); +} + + +/* vim:et:ts=4:sw=4 +*/ diff --git a/compiler.c b/compiler.c new file mode 100644 index 0000000..42ad25c --- /dev/null +++ b/compiler.c @@ -0,0 +1,279 @@ +#include +#include +#include +#include "compiler.h" +#include "symtab.h" + + +static void write_func_params(struct pair_tn *params, struct node *list) { + if (list->type == N_LIST_END) return; + assert(list->child1->type == N_VAR_DECL); + params[0].type = list->child1->rtype; + params[0].name = strdup(list->child1->name); + write_func_params(params + 1, list->child2); +} + +static bool oper_is_basic_arith(enum operator oper) { + return oper == OP_ADD || oper == OP_SUB || oper == OP_MUL || oper == OP_DIV || oper == OP_MOD; +} + +static bool oper_is_comparison(enum operator oper) { + return oper == OP_EQ || oper == OP_NEQ || + oper == OP_LT || oper == OP_GT || + oper == OP_LEQ || oper == OP_GEQ; +} + +static struct ref compile_expr(struct ir *ir, struct symtab *symtab, struct node *node); + +static void compile_boolexpr( + struct ir *ir, struct symtab *symtab, struct node *node, char *truedest, char *falsedest) { + + if (node->type == N_BINOP && oper_is_comparison(node->oper)) { + struct ref r1 = compile_expr(ir, symtab, node->child1); + struct ref r2 = compile_expr(ir, symtab, node->child2); + ir_append(ir, irins_make_12(INS_CMP, r1, r2)); + enum condcode condcode; + switch (node->oper) { + case OP_EQ: condcode = CCNZ; break; + case OP_NEQ: condcode = CCZ; break; + case OP_LT: condcode = CCGE; break; + case OP_GT: condcode = CCLE; break; + case OP_LEQ: condcode = CCG; break; + case OP_GEQ: condcode = CCL; break; + default: assert(false); + } + ir_append(ir, irins_make_jcc(falsedest, condcode)); + ir_append(ir, irins_make_name(INS_JMP, truedest)); + } else if (node->type == N_BINOP && node->oper == OP_AND) { + char *try2lbl = gen_label_name(); + compile_boolexpr(ir, symtab, node->child1, try2lbl, falsedest); + ir_append(ir, irins_make_name(INS_LBL, try2lbl)); + compile_boolexpr(ir, symtab, node->child2, truedest, falsedest); + } else if (node->type == N_BINOP && node->oper == OP_OR) { + char *try2lbl = gen_label_name(); + compile_boolexpr(ir, symtab, node->child1, truedest, try2lbl); + ir_append(ir, irins_make_name(INS_LBL, try2lbl)); + compile_boolexpr(ir, symtab, node->child2, truedest, falsedest); + } else if (node->type == N_UNOP && node->oper == OP_NOT) { + compile_boolexpr(ir, symtab, node->child1, falsedest, truedest); + } else { + struct ref ref = compile_expr(ir, symtab, node); + ir_append(ir, irins_make_12(INS_TEST, ref, ref)); + ir_append(ir, irins_make_jcc(falsedest, CCZ)); + ir_append(ir, irins_make_name(INS_JMP, truedest)); + } +} + +static struct ref compile_expr(struct ir *ir, struct symtab *symtab, struct node *node) { + switch (node->type) { + case N_NUM: { + struct ref ref = ref_next_register(); + ir_append(ir, irins_make_01(INS_MOV, ref, ref_imm(node->value))); + return ref; + } + + case N_VAR: { + struct symbol *sym = symtab_find(symtab, node->name); + if (!sym) { + fprintf(stderr, "Use of undeclared variable '%s'\n", node->name); + exit(1); + } + return sym->ref; + } + + case N_BINOP: { + if (oper_is_basic_arith(node->oper)) { + struct ref r0 = ref_next_register(); + struct ref r1 = compile_expr(ir, symtab, node->child1); + struct ref r2 = compile_expr(ir, symtab, node->child2); + enum instype instype; + switch (node->oper) { + case OP_ADD: instype = INS_ADD; break; + case OP_SUB: instype = INS_SUB; break; + case OP_MUL: instype = INS_MUL; break; + case OP_DIV: instype = INS_DIV; break; + case OP_MOD: instype = INS_MOD; break; + default: assert(false); + } + ir_append(ir, irins_make_012(instype, r0, r1, r2)); + return r0; + } else if (oper_is_comparison(node->oper) || node->oper == OP_AND || node->oper == OP_OR) { + char *truelbl = gen_label_name(); + char *falselbl = gen_label_name(); + char *afterlbl = gen_label_name(); + compile_boolexpr(ir, symtab, node, truelbl, falselbl); + struct ref cval = ref_next_register(); + ir_append(ir, irins_make_name(INS_LBL, truelbl)); + ir_append(ir, irins_make_01(INS_MOV, cval, ref_imm(1))); + ir_append(ir, irins_make_name(INS_JMP, afterlbl)); + ir_append(ir, irins_make_name(INS_LBL, falselbl)); + ir_append(ir, irins_make_01(INS_MOV, cval, ref_imm(0))); + ir_append(ir, irins_make_name(INS_LBL, afterlbl)); + return cval; + } else if (node->oper == OP_ASSIGN) { + if (node->child1->type == N_VAR || + (node->child1->type == N_UNOP && node->child1->oper == OP_DEREF)) { + struct ref r0 = compile_expr(ir, symtab, node->child1); + struct ref r1 = compile_expr(ir, symtab, node->child2); + ir_append(ir, irins_make_01(INS_MOV, r0, r1)); + return r0; + } else { + fprintf(stderr, "Invalid left-hand side in assignment.\n"); + exit(1); + } + } else { + assert(false); + } + } + + case N_UNOP: { + switch (node->oper) { + case OP_NEG: { + struct ref r0 = ref_next_register(); + struct ref r1 = compile_expr(ir, symtab, node->child1); + ir_append(ir, irins_make_01(INS_NEG, r0, r1)); + return r0; + } + + case OP_NOT: { + struct ref r0 = ref_next_register(); + struct ref r1 = compile_expr(ir, symtab, node->child1); + ir_append(ir, irins_make_01(INS_NOT, r0, r1)); + return r0; + } + + case OP_DEREF: { + struct ref addr = compile_expr(ir, symtab, node->child1); + struct ref r1; + switch (addr.type) { + case REF_REG: r1 = ref_mem(addr.reg, 0, false); break; + case REF_IMM: r1 = ref_mem(-1, addr.imm, false); break; + case REF_MEM: { + r1 = ref_next_register(); + ir_append(ir, irins_make_01(INS_MOV, r1, addr)); + r1 = ref_mem(r1.reg, 0, false); break; + break; + } + } + struct ref r0 = ref_next_register(); + ir_append(ir, irins_make_01(INS_MOV, r0, r1)); + return r0; + } + + case OP_ADDROF: + fprintf(stderr, "OP_ADDROF not implemented\n"); + exit(1); + + default: + assert(false); + } + } + + case N_CALL: { + fprintf(stderr, "function calls not implemented\n"); + exit(1); + /*struct symbol *sym = symtab_find(symtab, node->name); + if (!sym) { + fprintf(stderr, "Call of undeclared function '%s'\n", node->name); + exit(1); + } + ;*/ + } + + default: + fprintf(stderr, "Invalid node type %d in expression\n", node->type); + exit(1); + } +} + +static void compile_node(struct ir *ir, struct symtab *symtab, struct node *node) { + switch (node->type) { + case N_LIST: + compile_node(ir, symtab, node->child1); + compile_node(ir, symtab, node->child2); + break; + + case N_LIST_END: + break; + + case N_BLOCK: { + symtab = symtab_sub(symtab); + compile_node(ir, symtab, node->child1); + symtab = symtab_delete_get_parent(symtab); + break; + } + + case N_VAR_DECL_INIT: + case N_VAR_DECL: { + if (symtab_find_local(symtab, node->name)) { + fprintf(stderr, "Redeclaration of variable '%s'\n", node->name); + exit(1); + } + struct symbol *sym = symbol_make_var(node->name, node->rtype); + sym->ref = ref_next_register(); + symtab_insert(symtab, sym); + ir_append(ir, irins_make_01(INS_MOV, sym->ref, ref_imm(node->value))); + break; + } + + case N_FUNC_DECL: { + if (symtab_find(symtab, node->name)) { + fprintf(stderr, "Redeclaration of function '%s'\n", node->name); + exit(1); + } + int numparams = node_list_length(node->child1); + struct symbol *sym = symbol_make_func(node->name, node->rtype, numparams); + write_func_params(sym->params, node->child1); + symtab_insert(symtab_root(symtab), sym); + ir_append(ir, irins_make_name(INS_LBL, strdup(node->name))); + symtab = symtab_sub(symtab); + compile_node(ir, symtab, node->child2); + symtab = symtab_delete_get_parent(symtab); + break; + } + + case N_IF: { + char *elselbl = gen_label_name(); + char *afterlbl = gen_label_name(); + struct ref condref = compile_expr(ir, symtab, node->child1); + ir_append(ir, irins_make_12(INS_TEST, condref, condref)); + ir_append(ir, irins_make_jcc(elselbl, CCZ)); + compile_node(ir, symtab, node->child2); + ir_append(ir, irins_make_name(INS_JMP, afterlbl)); + ir_append(ir, irins_make_name(INS_LBL, elselbl)); + compile_node(ir, symtab, node->child3); + ir_append(ir, irins_make_name(INS_LBL, afterlbl)); + break; + } + + case N_WHILE: { + char *startlbl = gen_label_name(); + char *afterlbl = gen_label_name(); + ir_append(ir, irins_make_name(INS_LBL, startlbl)); + struct ref condref = compile_expr(ir, symtab, node->child1); + ir_append(ir, irins_make_12(INS_TEST, condref, condref)); + ir_append(ir, irins_make_jcc(afterlbl, CCZ)); + compile_node(ir, symtab, node->child2); + ir_append(ir, irins_make_name(INS_JMP, startlbl)); + ir_append(ir, irins_make_name(INS_LBL, afterlbl)); + break; + } + + case N_RETURN: { + struct ref ref = compile_expr(ir, symtab, node->child1); + ir_append(ir, irins_make_1(INS_RETV, ref)); + break; + } + + default: // hmm, maybe it's an expression statement? + compile_expr(ir, symtab, node); + break; + } +} + +struct ir* compile(struct node *node) { + struct ir *ir = ir_make(); + struct symtab *symtab = symtab_make(); + compile_node(ir, symtab, node); + return ir; +} diff --git a/compiler.h b/compiler.h new file mode 100644 index 0000000..7465c9e --- /dev/null +++ b/compiler.h @@ -0,0 +1,7 @@ +#pragma once + +#include "ir.h" +#include "node.h" + + +struct ir* compile(struct node *node); diff --git a/ir.c b/ir.c new file mode 100644 index 0000000..9d28e2f --- /dev/null +++ b/ir.c @@ -0,0 +1,110 @@ +#include +#include +#include "ir.h" + + +struct ir* ir_make(void) { + struct ir *ir = malloc(sizeof(struct ir)); + ir->cap = 32; + ir->len = 0; + ir->inss = malloc(ir->cap * sizeof(struct irins*)); + ir->globspace = 0; + return ir; +} + +void ir_delete(struct ir *ir) { + for (int i = 0; i < ir->len; i++) { + irins_delete(ir->inss[i]); + } + free(ir->inss); + free(ir); +} + +struct ref ir_reserve_global(struct ir *ir, int size) { + int offset = ir->globspace; + ir->globspace += size; + return ref_mem(-1, offset, true); +} + +void ir_append(struct ir *ir, struct irins *ins) { + if (ir->len == ir->cap) { + ir->cap *= 2; + ir->inss = realloc(ir->inss, ir->cap * sizeof(struct irins*)); + } + ir->inss[ir->len++] = ins; +} + +struct irins* irins_make(enum instype type) { + struct irins *ins = malloc(sizeof(struct irins)); + ins->type = type; + ins->name = NULL; + return ins; +} + +struct irins* irins_make_name(enum instype type, char *name) { + struct irins *ins = irins_make(type); + ins->name = name; + return ins; +} + +struct irins* irins_make_01(enum instype type, struct ref r0, struct ref r1) { + struct irins *ins = irins_make(type); + ins->r0 = r0; + ins->r1 = r1; + return ins; +} + +struct irins* irins_make_012(enum instype type, struct ref r0, struct ref r1, struct ref r2) { + struct irins *ins = irins_make_01(type, r0, r1); + ins->r2 = r2; + return ins; +} + +struct irins* irins_make_1(enum instype type, struct ref r1) { + struct irins *ins = irins_make(type); + ins->r1 = r1; + return ins; +} + +struct irins* irins_make_12(enum instype type, struct ref r1, struct ref r2) { + struct irins *ins = irins_make(type); + ins->r1 = r1; + ins->r2 = r2; + return ins; +} + +struct irins* irins_make_jcc(char *name, enum condcode condcode) { + struct irins *ins = irins_make(INS_JCC); + ins->name = name; + ins->condcode = condcode; + return ins; +} + +void irins_delete(struct irins *ins) { + if (ins->name) free(ins->name); + free(ins); +} + +char* gen_label_name(void) { + static int next = 0; + char *buf = malloc(16); + sprintf(buf, "L%d", next++); + return buf; +} + +struct ref ref_reg(int reg) { + return (struct ref){REF_REG, reg, 0, false, 0}; +} + +struct ref ref_mem(int reg, int offset, bool rel_heap) { + return (struct ref){REF_MEM, reg, offset, rel_heap, 0}; +} + +struct ref ref_imm(int imm) { + return (struct ref){REF_IMM, -1, 0, false, imm}; +} + +struct ref ref_next_register(void) { + static int next = 0; + return ref_reg(next++); +} diff --git a/ir.h b/ir.h new file mode 100644 index 0000000..b359d00 --- /dev/null +++ b/ir.h @@ -0,0 +1,83 @@ +#pragma once + +#include + + +// IR uses an unlimited number of registers. This will later be fixed by a regalloc. + +enum instype { + INS_ADD, // r0 = r1 + r2 + INS_SUB, // r0 = r1 - r2 + INS_MUL, // r0 = r1 * r2 + INS_DIV, // r0 = r1 / r2 + INS_MOD, // r0 = r1 % r2 + INS_NEG, // r0 = -r1 + INS_NOT, // r0 = !r1 + INS_TEST, // r1 & r2 (flags) + INS_PUSH, // push r1 + INS_POP, // r0 = pop + INS_CMP, // r1 - r2 (flags) + INS_LBL, // label name + INS_JMP, // jmp name + INS_JCC, // if condcode, jmp name + INS_CALL, // call name + INS_RET, // return + INS_RETV, // return r1 + INS_MOV, // r0 = r1 +}; + +enum condcode { + CCZ, CCNZ, CCL, CCG, CCLE, CCGE +}; + +enum reftype { + REF_REG, // reg + REF_MEM, // [reg + offset + (rel_heap ? heap_base : 0)] + REF_IMM, // imm +}; + +struct ref { + enum reftype type; + int reg; // if -1 in a REF_MEM, unused + int offset; + bool rel_heap; // whether the heap base address should be added to the offset + int imm; +}; + +struct irins { + enum instype type; + struct ref r0, r1, r2; + char *name; + enum condcode condcode; +}; + +struct ir { + int cap, len; + struct irins **inss; + int globspace; +}; + + +struct ir* ir_make(void); +void ir_delete(struct ir *ir); + +// returns offset +struct ref ir_reserve_global(struct ir *ir, int size); + +void ir_append(struct ir *ir, struct irins *ins); + +struct irins* irins_make(enum instype type); +struct irins* irins_make_name(enum instype type, char *name); +struct irins* irins_make_01(enum instype type, struct ref r0, struct ref r1); +struct irins* irins_make_012(enum instype type, struct ref r0, struct ref r1, struct ref r2); +struct irins* irins_make_1(enum instype type, struct ref r1); +struct irins* irins_make_12(enum instype type, struct ref r1, struct ref r2); +struct irins* irins_make_jcc(char *name, enum condcode condcode); +void irins_delete(struct irins *ins); + +char* gen_label_name(void); + +struct ref ref_reg(int reg); +struct ref ref_mem(int reg, int offset, bool rel_heap); +struct ref ref_imm(int imm); +struct ref ref_next_register(void); diff --git a/main.c b/main.c new file mode 100644 index 0000000..173c19b --- /dev/null +++ b/main.c @@ -0,0 +1,37 @@ +#include +#include +#include +#include "compiler.h" +#include "node.h" + + +extern FILE *yyin; +extern int yyparse(void); +extern struct node *root_node; + + +int main(int argc, char **argv) { + if (argc != 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + yyin = fopen(argv[1], "r"); + if (!yyin) { + fprintf(stderr, "Cannot open file '%s'\n", argv[1]); + return 1; + } + + int result = yyparse(); + if(!feof(yyin)) { + assert(false); + } + + node_print(root_node, stdout, 0); + printf("\n"); + + struct ir *ir = compile(root_node); + + type_cache_cleanup(); + + return result; +} diff --git a/node.c b/node.c new file mode 100644 index 0000000..b20c38e --- /dev/null +++ b/node.c @@ -0,0 +1,140 @@ +#include +#include +#include +#include +#include "node.h" + + +struct node* node_make_0(enum node_type type) { + struct node *node = malloc(sizeof(struct node)); + node->type = type; + node->child1 = node->child2 = node->child3 = NULL; + node->rtype = NULL; + node->name = NULL; + node->value = 0; + return node; +} + +struct node* node_make_1(enum node_type type, struct node *c1) { + struct node *node = node_make_0(type); + node->child1 = c1; + return node; +} + +struct node* node_make_2(enum node_type type, struct node *c1, struct node *c2) { + struct node *node = node_make_1(type, c1); + node->child2 = c2; + return node; +} + +struct node* node_make_3(enum node_type type, struct node *c1, struct node *c2, struct node *c3) { + struct node *node = node_make_2(type, c1, c2); + node->child3 = c3; + return node; +} + +struct node* node_make_list_(struct node *first, ...) { + va_list ap; + va_start(ap, first); + struct node *cur = node_make_0(N_LIST_END); + struct node *root = node_make_2(N_LIST, first, cur); + struct node *next; + while ((next = va_arg(ap, struct node*))) { + cur->type = N_LIST; + cur->child1 = next; + cur->child2 = node_make_0(N_LIST_END); + cur = cur->child2; + } + return root; +} + +int node_list_length(struct node *node) { + if (node->type == N_LIST_END) return 0; + assert(node->type == N_LIST); + return 1 + node_list_length(node->child2); +} + +void node_delete_recursive(struct node *node) { + if (node->child1) node_delete_recursive(node->child1); + if (node->child2) node_delete_recursive(node->child2); + if (node->child3) node_delete_recursive(node->child3); + if (node->name) free(node->name); + free(node); +} + +static void write_spaces(int num, FILE *f) { + for (int i = 0; i < num; i++) fputc(' ', f); +} + +static const char* node_type_string(enum node_type type) { + switch (type) { + case N_LIST: return "N_LIST"; break; + case N_LIST_END: return "N_LIST_END"; break; + case N_BLOCK: return "N_BLOCK"; break; + case N_VAR_DECL_INIT: return "N_VAR_DECL_INIT"; break; + case N_VAR_DECL: return "N_VAR_DECL"; break; + case N_FUNC_DECL: return "N_FUNC_DECL"; break; + case N_NUM: return "N_NUM"; break; + case N_VAR: return "N_VAR"; break; + case N_IF: return "N_IF"; break; + case N_WHILE: return "N_WHILE"; break; + case N_RETURN: return "N_RETURN"; break; + case N_BINOP: return "N_BINOP"; break; + case N_UNOP: return "N_UNOP"; break; + case N_CALL: return "N_CALL"; break; + default: assert(false); + } +} + +static const char* oper_string(enum operator oper) { + switch (oper) { + case OP_ADD: return "OP_ADD"; + case OP_SUB: return "OP_SUB"; + case OP_MUL: return "OP_MUL"; + case OP_DIV: return "OP_DIV"; + case OP_MOD: return "OP_MOD"; + case OP_EQ: return "OP_EQ"; + case OP_NEQ: return "OP_NEQ"; + case OP_LT: return "OP_LT"; + case OP_GT: return "OP_GT"; + case OP_LEQ: return "OP_LEQ"; + case OP_GEQ: return "OP_GEQ"; + case OP_AND: return "OP_AND"; + case OP_OR: return "OP_OR"; + case OP_ASSIGN: return "OP_ASSIGN"; + case OP_NEG: return "OP_NEG"; + case OP_NOT: return "OP_NOT"; + default: assert(false); + } +} + +void node_print(const struct node *node, FILE *f, int indent) { + const char *t = node_type_string(node->type); + fprintf(f, "(%s", t); + if (node->rtype) { + fprintf(f, "[rtype="); + type_print(node->rtype, f); + fprintf(f, "]"); + } + if (node->name) fprintf(f, "[name=%s]", node->name); + if (node->type == N_NUM) fprintf(f, "[value=%d]", node->value); + if (node->type == N_BINOP || node->type == N_UNOP) { + fprintf(f, "[oper=%s]", oper_string(node->oper)); + } + if (node->child1) { + fprintf(f, "\n"); + write_spaces(2 * (indent + 1), f); + node_print(node->child1, f, indent + 1); + } + if (node->child2) { + fprintf(f, "\n"); + write_spaces(2 * (indent + 1), f); + node_print(node->child2, f, indent + 1); + } + if (node->child3) { + fprintf(f, "\n"); + write_spaces(2 * (indent + 1), f); + node_print(node->child3, f, indent + 1); + } + fprintf(f, ")"); +} diff --git a/node.h b/node.h new file mode 100644 index 0000000..81d0b26 --- /dev/null +++ b/node.h @@ -0,0 +1,56 @@ +#pragma once + +#include "type.h" + + +enum node_type { + N_LIST, // [head] [tail] + N_LIST_END, // -- + N_BLOCK, // [body] + N_VAR_DECL_INIT, // rtype name [value] + N_VAR_DECL, // rtype name + N_FUNC_DECL, // rtype name [params] [body] + N_IF, // [cond] [body] [else-body] + N_WHILE, // [cond] [body] + N_RETURN, // [value] + N_NUM, // value + N_VAR, // name + N_BINOP, // [left] oper [right] + N_UNOP, // oper [arg] + N_CALL, // name [args] +}; + +enum operator { + // binary + OP_ADD, OP_SUB, OP_MUL, OP_DIV, OP_MOD, + OP_EQ, OP_NEQ, OP_LT, OP_GT, OP_LEQ, OP_GEQ, + OP_AND, OP_OR, + OP_ASSIGN, + // unary + OP_NEG, OP_NOT, OP_DEREF, OP_ADDROF, +}; + +struct node { + enum node_type type; + struct node *child1, *child2, *child3; + struct type *rtype; + char *name; + int value; + enum operator oper; +}; + + +struct node* node_make_0(enum node_type type); +struct node* node_make_1(enum node_type type, struct node *c1); +struct node* node_make_2(enum node_type type, struct node *c1, struct node *c2); +struct node* node_make_3(enum node_type type, struct node *c1, struct node *c2, struct node *c3); + +__attribute__((sentinel)) +struct node* node_make_list_(struct node *first, ...); +#define node_make_list(...) node_make_list_(__VA_ARGS__, NULL) + +int node_list_length(struct node *node); + +void node_delete_recursive(struct node *node); + +void node_print(const struct node *node, FILE *f, int indent); diff --git a/symbol.c b/symbol.c new file mode 100644 index 0000000..2f4adb7 --- /dev/null +++ b/symbol.c @@ -0,0 +1,23 @@ +#include +#include "symbol.h" + + +struct symbol* symbol_make_var(char *name, struct type *rtype) { + struct symbol *sym = malloc(sizeof(struct symbol)); + sym->name = name; + sym->stype = ST_VAR; + sym->rtype = rtype; + sym->numparams = 0; + sym->params = NULL; + return sym; +} + +struct symbol* symbol_make_func(char *name, struct type *rtype, int numparams) { + struct symbol *sym = malloc(sizeof(struct symbol)); + sym->name = name; + sym->stype = ST_FUNC; + sym->rtype = rtype; + sym->numparams = numparams; + sym->params = malloc(numparams * sizeof(struct pair_tn)); + return sym; +} diff --git a/symbol.h b/symbol.h new file mode 100644 index 0000000..454e8bc --- /dev/null +++ b/symbol.h @@ -0,0 +1,29 @@ +#pragma once + +#include "ir.h" +#include "type.h" + + +enum symbol_type { + ST_VAR, + ST_FUNC +}; + +struct pair_tn { + struct type *type; + char *name; +}; + +struct symbol { + char *name; + enum symbol_type stype; + struct type *rtype; + struct ref ref; + + int numparams; + struct pair_tn *params; +}; + + +struct symbol* symbol_make_var(char *name, struct type *rtype); +struct symbol* symbol_make_func(char *name, struct type *rtype, int numparams); diff --git a/symtab.c b/symtab.c new file mode 100644 index 0000000..4229a63 --- /dev/null +++ b/symtab.c @@ -0,0 +1,60 @@ +#include +#include +#include "symtab.h" + + +struct symtab* symtab_make() { + struct symtab *symtab = malloc(sizeof(struct symtab)); + symtab->parent = NULL; + symtab->cap = 16; + symtab->len = 0; + symtab->syms = malloc(symtab->cap * sizeof(struct symbol*)); + return symtab; +} + +void symtab_delete_recursive(struct symtab *symtab) { + free(symtab->syms); + struct symtab *parent = symtab->parent; + free(symtab); + if (parent) symtab_delete_recursive(parent); +} + +struct symbol* symtab_find(const struct symtab *symtab, const char *name) { + struct symbol *sym = symtab_find_local(symtab, name); + if (sym) return sym; + else if (symtab->parent) return symtab_find(symtab->parent, name); + else return NULL; +} + +struct symbol* symtab_find_local(const struct symtab *symtab, const char *name) { + for (int i = 0; i < symtab->len; i++) { + if (strcmp(symtab->syms[i]->name, name) == 0) return symtab->syms[i]; + } + return NULL; +} + +void symtab_insert(struct symtab *symtab, struct symbol *sym) { + if (symtab->len == symtab->cap) { + symtab->cap *= 2; + symtab->syms = realloc(symtab->syms, symtab->cap * sizeof(struct symbol*)); + } + symtab->syms[symtab->len++] = sym; +} + +struct symtab* symtab_root(struct symtab *symtab) { + if (symtab->parent) return symtab_root(symtab->parent); + else return symtab; +} + +struct symtab* symtab_sub(struct symtab *symtab) { + struct symtab *sub = symtab_make(); + sub->parent = symtab; + return sub; +} + +struct symtab* symtab_delete_get_parent(struct symtab *symtab) { + struct symtab *parent = symtab->parent; + symtab->parent = NULL; + symtab_delete_recursive(symtab); + return parent; +} diff --git a/symtab.h b/symtab.h new file mode 100644 index 0000000..18dd8e0 --- /dev/null +++ b/symtab.h @@ -0,0 +1,23 @@ +#pragma once + +#include "symbol.h" +#include "type.h" + + +struct symtab { + struct symtab *parent; + int cap,len; + struct symbol **syms; +}; + + +struct symtab* symtab_make(); +void symtab_delete_recursive(struct symtab *symtab); + +struct symbol* symtab_find(const struct symtab *symtab, const char *name); +struct symbol* symtab_find_local(const struct symtab *symtab, const char *name); +void symtab_insert(struct symtab *symtab, struct symbol *sym); + +struct symtab* symtab_root(struct symtab *symtab); +struct symtab* symtab_sub(struct symtab *symtab); +struct symtab* symtab_delete_get_parent(struct symtab *symtab); diff --git a/test/t1.c b/test/t1.c new file mode 100644 index 0000000..229d5e9 --- /dev/null +++ b/test/t1.c @@ -0,0 +1,21 @@ +int glob; + +int fibo(int n) { + if (n <= 0) return 0; + int a = 0; + int b = 1; + int i = 1; + while (i < n) { + int c = a + b; + a = b; + b = c; + i = i + 1; + } + return b; +} + +int main(int argc, int **argv) { + int a = 1; + int b = 2 + a * 3; + holo_dec(a); +} diff --git a/type.c b/type.c new file mode 100644 index 0000000..6dd4dd0 --- /dev/null +++ b/type.c @@ -0,0 +1,79 @@ +#include +#include +#include +#include "type.h" + + +static struct { + int cap, num; + struct type **types; +} cache; + +static struct type *voidtype; + +static void init_cache(void) { + if (voidtype) return; + voidtype = malloc(sizeof(struct type)); + voidtype->tag = T_VOID; + cache.cap = 8; + cache.num = 0; + cache.types = malloc(cache.cap * sizeof(struct type*)); +} + +void type_cache_cleanup(void) { + for (int i = 0; i < cache.num; i++) { + free(cache.types[i]); + } + free(cache.types); + free(voidtype); +} + +static void cache_add(struct type *type) { + if (cache.num == cache.cap) { + cache.cap *= 2; + cache.types = realloc(cache.types, cache.cap * sizeof(struct type*)); + } + cache.types[cache.num++] = type; +} + + +struct type* type_int(int size) { + init_cache(); + for (int i = 0; i < cache.num; i++) { + if (cache.types[i]->tag == T_INT && cache.types[i]->size == size) { + return cache.types[i]; + } + } + struct type *type = malloc(sizeof(struct type)); + type->tag = T_INT; + type->size = size; + cache_add(type); + return type; +} + +struct type* type_void(void) { + return voidtype; +} + +struct type* type_ptr(struct type *target) { + init_cache(); + for (int i = 0; i < cache.num; i++) { + if (cache.types[i]->tag == T_PTR && cache.types[i]->target == target) { + return cache.types[i]; + } + } + struct type *type = malloc(sizeof(struct type)); + type->tag = T_PTR; + type->target = target; + cache_add(type); + return type; +} + +void type_print(struct type *type, FILE *f) { + switch (type->tag) { + case T_INT: fprintf(f, "i%d", type->size); break; + case T_VOID: fprintf(f, "void"); break; + case T_PTR: type_print(type->target, f); fprintf(f, "*"); break; + default: assert(false); + } +} diff --git a/type.h b/type.h new file mode 100644 index 0000000..6e6e0f6 --- /dev/null +++ b/type.h @@ -0,0 +1,32 @@ +#pragma once + +#include + + +enum type_tag { + T_INT, + T_VOID, + T_PTR +}; + +struct type { + enum type_tag tag; + union { + struct { // T_INT + int size; + }; + struct {}; // T_VOID + struct { // T_PTR + struct type *target; + }; + }; +}; + + +void type_cache_cleanup(void); + +struct type* type_int(int size); +struct type* type_void(void); +struct type* type_ptr(struct type *type); + +void type_print(struct type *type, FILE *f); -- cgit v1.2.3