#include #include #include #include "compiler.h" #include "symtab.h" static void write_func_params(struct pair_tn *params, struct node *list) { if (list->type == N_LIST_END) return; assert(list->child1->type == N_VAR_DECL); params[0].type = list->child1->rtype; params[0].name = strdup(list->child1->name); write_func_params(params + 1, list->child2); } static void write_node_list(struct node **dest, struct node *list) { if (list->type == N_LIST_END) return; dest[0] = list->child1; write_node_list(dest + 1, list->child2); } static bool oper_is_basic_arith(enum operator oper) { return oper == OP_ADD || oper == OP_SUB || oper == OP_MUL || oper == OP_DIV || oper == OP_MOD; } static bool oper_is_comparison(enum operator oper) { return oper == OP_EQ || oper == OP_NEQ || oper == OP_LT || oper == OP_GT || oper == OP_LEQ || oper == OP_GEQ; } struct info { const char *name; struct type *rettype; }; static struct ref compile_expr(struct ir *ir, struct symtab *symtab, struct info info, struct node *node); static void compile_boolexpr( struct ir *ir, struct symtab *symtab, struct info info, struct node *node, const char *truedest, const char *falsedest) { if (node->type == N_BINOP && oper_is_comparison(node->oper)) { struct ref r1 = compile_expr(ir, symtab, info, node->child1); struct ref r2 = compile_expr(ir, symtab, info, node->child2); if (node->child1->valuetype != node->child2->valuetype || (node->child1->valuetype->tag != T_INT && node->child1->valuetype->tag != T_PTR) || (node->child2->valuetype->tag != T_INT && node->child2->valuetype->tag != T_PTR)) { fprintf(stderr, "Invalid types "); type_print(node->child1->valuetype, stderr); fprintf(stderr, " and "); type_print(node->child2->valuetype, stderr); fprintf(stderr, " to operator %s\n", oper_string(node->oper)); exit(1); } ir_append(ir, irins_make_12(INS_CMP, r1, r2)); enum condcode condcode; switch (node->oper) { case OP_EQ: condcode = CCNZ; break; case OP_NEQ: condcode = CCZ; break; case OP_LT: condcode = CCGE; break; case OP_GT: condcode = CCLE; break; case OP_LEQ: condcode = CCG; break; case OP_GEQ: condcode = CCL; break; default: assert(false); } ir_append(ir, irins_make_jcc(falsedest, condcode)); ir_append(ir, irins_make_name(INS_JMP, truedest)); } else if (node->type == N_BINOP && node->oper == OP_AND) { const char *try2lbl = gen_label_name(); compile_boolexpr(ir, symtab, info, node->child1, try2lbl, falsedest); ir_append(ir, irins_make_name(INS_LBL, try2lbl)); compile_boolexpr(ir, symtab, info, node->child2, truedest, falsedest); } else if (node->type == N_BINOP && node->oper == OP_OR) { const char *try2lbl = gen_label_name(); compile_boolexpr(ir, symtab, info, node->child1, truedest, try2lbl); ir_append(ir, irins_make_name(INS_LBL, try2lbl)); compile_boolexpr(ir, symtab, info, node->child2, truedest, falsedest); } else if (node->type == N_UNOP && node->oper == OP_NOT) { compile_boolexpr(ir, symtab, info, node->child1, falsedest, truedest); } else { struct ref ref = compile_expr(ir, symtab, info, node); ir_append(ir, irins_make_12(INS_TEST, ref, ref)); ir_append(ir, irins_make_jcc(falsedest, CCZ)); ir_append(ir, irins_make_name(INS_JMP, truedest)); } } static struct ref compile_expr(struct ir *ir, struct symtab *symtab, struct info info, struct node *node) { switch (node->type) { case N_NUM: { struct ref ref = ref_next_register(); ir_append(ir, irins_make_01(INS_MOV, ref, ref_imm(node->value))); node->valuetype = type_int(16); return ref; } case N_VAR: { struct symbol *sym = symtab_find(symtab, node->name); if (!sym) { fprintf(stderr, "Use of undeclared variable '%s'\n", node->name); exit(1); } if (sym->stype != ST_VAR) { fprintf(stderr, "Cannot use function name in expression\n"); exit(1); } node->valuetype = sym->rtype; return sym->ref; } case N_BINOP: { if (oper_is_basic_arith(node->oper)) { struct ref r0 = ref_next_register(); struct ref r1 = compile_expr(ir, symtab, info, node->child1); struct ref r2 = compile_expr(ir, symtab, info, node->child2); if (node->child1->valuetype != type_int(16) || node->child2->valuetype != type_int(16)) { fprintf(stderr, "Invalid operand types "); type_print(node->child1->valuetype, stderr); fprintf(stderr, " and "); type_print(node->child2->valuetype, stderr); fprintf(stderr, " to arithmetic operator %s\n", oper_string(node->oper)); exit(1); } enum instype instype; switch (node->oper) { case OP_ADD: instype = INS_ADD; break; case OP_SUB: instype = INS_SUB; break; case OP_MUL: instype = INS_MUL; break; case OP_DIV: instype = INS_DIV; break; case OP_MOD: instype = INS_MOD; break; default: assert(false); } ir_append(ir, irins_make_012(instype, r0, r1, r2)); node->valuetype = node->child1->valuetype; return r0; } else if (oper_is_comparison(node->oper) || node->oper == OP_AND || node->oper == OP_OR) { const char *truelbl = gen_label_name(); const char *falselbl = gen_label_name(); const char *afterlbl = gen_label_name(); compile_boolexpr(ir, symtab, info, node, truelbl, falselbl); struct ref cval = ref_next_register(); ir_append(ir, irins_make_name(INS_LBL, truelbl)); ir_append(ir, irins_make_01(INS_MOV, cval, ref_imm(1))); ir_append(ir, irins_make_name(INS_JMP, afterlbl)); ir_append(ir, irins_make_name(INS_LBL, falselbl)); ir_append(ir, irins_make_01(INS_MOV, cval, ref_imm(0))); ir_append(ir, irins_make_name(INS_LBL, afterlbl)); return cval; } else if (node->oper == OP_ASSIGN) { if (node->child1->type == N_VAR || (node->child1->type == N_UNOP && node->child1->oper == OP_DEREF)) { struct ref r0 = compile_expr(ir, symtab, info, node->child1); struct ref r1 = compile_expr(ir, symtab, info, node->child2); if (node->child1->valuetype != node->child2->valuetype) { fprintf(stderr, "Cannot assign type "); type_print(node->child2->valuetype, stderr); fprintf(stderr, " to destination of type "); type_print(node->child1->valuetype, stderr); fprintf(stderr, "\n"); exit(1); } ir_append(ir, irins_make_01(INS_MOV, r0, r1)); node->valuetype = node->child1->valuetype; return r0; } else { fprintf(stderr, "Invalid left-hand side in assignment.\n"); exit(1); } } else { assert(false); } } case N_UNOP: { switch (node->oper) { case OP_NEG: { struct ref r0 = ref_next_register(); struct ref r1 = compile_expr(ir, symtab, info, node->child1); if (node->child1->valuetype != type_int(16)) { fprintf(stderr, "Cannot negate value of type "); type_print(node->child1->valuetype, stderr); fprintf(stderr, "\n"); exit(1); } ir_append(ir, irins_make_01(INS_NEG, r0, r1)); node->valuetype = node->child1->valuetype; return r0; } case OP_NOT: { struct ref r0 = ref_next_register(); struct ref r1 = compile_expr(ir, symtab, info, node->child1); if (node->child1->valuetype->tag != T_INT && node->child1->valuetype->tag != T_PTR) { fprintf(stderr, "Cannot apply OP_NOT to value of type "); type_print(node->child1->valuetype, stderr); fprintf(stderr, "\n"); exit(1); } const char *afterlbl = gen_label_name(); ir_append(ir, irins_make_12(INS_TEST, r1, r1)); ir_append(ir, irins_make_01(INS_MOV, r0, ref_imm(0))); ir_append(ir, irins_make_jcc(afterlbl, CCNZ)); ir_append(ir, irins_make_01(INS_MOV, r0, ref_imm(1))); ir_append(ir, irins_make_name(INS_LBL, afterlbl)); node->valuetype = type_int(16); return r0; } case OP_DEREF: { struct ref addr = compile_expr(ir, symtab, info, node->child1); if (node->child1->valuetype->tag != T_PTR) { fprintf(stderr, "Cannot dereference value of type "); type_print(node->child1->valuetype, stderr); fprintf(stderr, "\n"); exit(1); } struct ref r1; switch (addr.type) { case REF_REG: r1 = ref_mem(addr.reg, 0, REFREL_ZERO); break; case REF_IMM: r1 = ref_mem(REG_UNUSED, addr.imm, REFREL_ZERO); break; case REF_MEM: { r1 = ref_next_register(); ir_append(ir, irins_make_01(INS_MOV, r1, addr)); r1 = ref_mem(r1.reg, 0, REFREL_ZERO); break; break; } case REF_VARNAME: assert(false); } struct ref r0 = ref_next_register(); ir_append(ir, irins_make_01(INS_MOV, r0, r1)); assert(node->child1->valuetype->tag == T_PTR); node->valuetype = node->child1->valuetype->target; return r0; } case OP_ADDROF: fprintf(stderr, "OP_ADDROF not implemented\n"); exit(1); default: assert(false); } } case N_CALL: { struct symbol *sym = symtab_find(symtab, node->name); if (!sym) { fprintf(stderr, "Call of undeclared function '%s'\n", node->name); exit(1); } if (sym->stype != ST_FUNC) { fprintf(stderr, "Cannot call non-function\n"); exit(1); } int actual_numparams = node_list_length(node->child1); if (actual_numparams != sym->numparams) { fprintf(stderr, "Invalid number of parameters to function '%s' (expected %d, got %d)\n", node->name, sym->numparams, actual_numparams); exit(1); } struct node **args = malloc(sym->numparams * sizeof(struct node*)); write_node_list(args, node->child1); for (int i = sym->numparams - 1; i >= 0; i--) { struct ref r = compile_expr(ir, symtab, info, args[i]); if (args[i]->valuetype != sym->params[i].type) { fprintf(stderr, "Invalid argument type "); type_print(args[i]->valuetype, stderr); fprintf(stderr, "to parameter %d (type ", i + 1); type_print(sym->params[i].type, stderr); fprintf(stderr, ") of function '%s'\n", node->name); exit(1); } ir_append(ir, irins_make_1(INS_PUSH, r)); } free(args); struct ref retref; if (node->rtype == type_void()) { ir_append(ir, irins_make_name(INS_CALL, node->name)); retref = ref_reg(REG_UNUSED); // TODO make nicer; typechecker should've caught usages though } else { retref = ref_next_register(); struct irins *ins = irins_make_0(INS_CALLV, retref); ins->name = ir_str(node->name); ir_append(ir, ins); } ir_append(ir, irins_make_012(INS_ADD, ref_reg(REG_SP), ref_reg(REG_SP), ref_imm(sym->numparams))); node->valuetype = sym->rtype; return retref; } default: fprintf(stderr, "Invalid node type %d in expression\n", node->type); exit(1); } } static void compile_node(struct ir *ir, struct symtab *symtab, struct info info, struct node *node) { switch (node->type) { case N_LIST: compile_node(ir, symtab, info, node->child1); compile_node(ir, symtab, info, node->child2); break; case N_LIST_END: break; case N_BLOCK: { symtab = symtab_sub(symtab); compile_node(ir, symtab, info, node->child1); symtab = symtab_delete_get_parent(symtab); break; } case N_VAR_DECL_INIT: case N_VAR_DECL: { if (symtab_find_local(symtab, node->name)) { fprintf(stderr, "Redeclaration of variable '%s'\n", node->name); exit(1); } struct ref vref; if (node->type == N_VAR_DECL_INIT) { struct info info2 = info; info2.name = node->name; vref = compile_expr(ir, symtab, info2, node->child1); if (node->child1->valuetype != node->rtype) { fprintf(stderr, "Invalid type in initialisation of variable '%s' (expected ", node->name); type_print(node->rtype, stderr); fprintf(stderr, ", got "); type_print(node->child1->valuetype, stderr); fprintf(stderr, ")\n"); exit(1); } } else { vref = ref_imm(0); } struct symbol *sym = symbol_make_var(strdup(node->name), node->rtype); if (symtab->parent == NULL) sym->ref = ir_reserve_global(ir, 1); else sym->ref = ref_next_register(); symtab_insert(symtab, sym); ir_append(ir, irins_make_01(INS_MOV, sym->ref, vref)); break; } case N_FUNC_DECL: { if (memcmp(node->name, "__", 2) == 0) { fprintf(stderr, "Invalid function name '%s' (cannot start with '__')\n", node->name); exit(1); } if (symtab_find(symtab, node->name)) { fprintf(stderr, "Redeclaration of function '%s'\n", node->name); exit(1); } int numparams = node_list_length(node->child1); struct symbol *sym = symbol_make_func(strdup(node->name), node->rtype, numparams); write_func_params(sym->params, node->child1); symtab_insert(symtab_root(symtab), sym); ir_append(ir, irins_make_name(INS_LBL, node->name)); symtab = symtab_sub(symtab); for (int i = 0; i < numparams; i++) { struct symbol *parsym = symbol_make_var(strdup(sym->params[i].name), sym->params[i].type); parsym->ref = ref_mem(REG_BP, 2 + i, REFREL_ZERO); symtab_insert(symtab, parsym); } struct info info2 = info; info2.name = node->name; info2.rettype = node->rtype; compile_node(ir, symtab, info2, node->child2); symtab = symtab_delete_get_parent(symtab); // necessary for void functions, to be sure otherwise ir_append(ir, irins_make(INS_RET)); break; } case N_IF: { const char *thenlbl = gen_label_name(); const char *elselbl = gen_label_name(); const char *afterlbl = gen_label_name(); compile_boolexpr(ir, symtab, info, node->child1, thenlbl, elselbl); ir_append(ir, irins_make_name(INS_LBL, thenlbl)); compile_node(ir, symtab, info, node->child2); ir_append(ir, irins_make_name(INS_JMP, afterlbl)); ir_append(ir, irins_make_name(INS_LBL, elselbl)); compile_node(ir, symtab, info, node->child3); ir_append(ir, irins_make_name(INS_LBL, afterlbl)); break; } case N_WHILE: { const char *startlbl = gen_label_name(); const char *bodylbl = gen_label_name(); const char *afterlbl = gen_label_name(); ir_append(ir, irins_make_name(INS_LBL, startlbl)); compile_boolexpr(ir, symtab, info, node->child1, bodylbl, afterlbl); ir_append(ir, irins_make_name(INS_LBL, bodylbl)); compile_node(ir, symtab, info, node->child2); ir_append(ir, irins_make_name(INS_JMP, startlbl)); ir_append(ir, irins_make_name(INS_LBL, afterlbl)); break; } case N_RETURN: if (info.rettype != type_void()) { fprintf(stderr, "Return without value from '%s'\n", info.name); exit(1); } ir_append(ir, irins_make(INS_RET)); break; case N_RETURNV: { struct ref ref = compile_expr(ir, symtab, info, node->child1); if (node->child1->valuetype != info.rettype) { fprintf(stderr, "Return with type "); type_print(node->child1->valuetype, stderr); fprintf(stderr, " from function '%s' with return type ", info.name); type_print(info.rettype, stderr); fprintf(stderr, "\n"); exit(1); } ir_append(ir, irins_make_1(INS_RETV, ref)); break; } case N_IRINS: { bool haveref[3]; irins_which_refs(node->irins, haveref); for (int i = 0; i < 3; i++) { if (!haveref[i]) continue; struct ref *ref = &node->irins->three_refs[i]; if (ref->type == REF_VARNAME) { struct symbol *sym = symtab_find(symtab, ref->name); if (!sym) { fprintf(stderr, "Use of undeclared variable '%s' in asm block\n", node->name); exit(1); } if (sym->stype != ST_VAR) { fprintf(stderr, "Variable reference in asm block should be variable\n"); exit(1); } *ref = sym->ref; } } ir_append(ir, node->irins); node->irins = NULL; break; } default: // hmm, maybe it's an expression statement? compile_expr(ir, symtab, info, node); break; } } static void compile_data_setup_node(struct ir *ir, struct symtab *symtab, struct info info, struct node *node) { switch (node->type) { case N_LIST: if (node->child1->type == N_VAR_DECL || node->child1->type == N_VAR_DECL_INIT) { compile_node(ir, symtab, info, node->child1); node_delete_recursive(node->child1); node->child1 = node_make_0(N_LIST_END); } compile_data_setup_node(ir, symtab, info, node->child2); break; case N_LIST_END: break; case N_VAR_DECL: case N_VAR_DECL_INIT: compile_node(ir, symtab, info, node); break; case N_FUNC_DECL: break; default: assert(false); } } static void compile_data_setup(struct ir *ir, struct symtab *symtab, struct info info, struct node *root) { struct ref flag = ir_reserve_global(ir, 1); ir_append(ir, irins_make_12(INS_TEST, flag, flag)); const char *afterlbl = gen_label_name(); ir_append(ir, irins_make_jcc(afterlbl, CCNZ)); compile_data_setup_node(ir, symtab, info, root); ir_append(ir, irins_make_01(INS_MOV, flag, ref_imm(1))); ir_append(ir, irins_make_name(INS_LBL, afterlbl)); ir_append(ir, irins_make_name(INS_CALL, "main")); ir_append(ir, irins_make(INS_BRK)); } struct ir* compile(struct node *node) { struct ir *ir = ir_make(); struct symtab *symtab = symtab_make(); struct info info; info.name = "<>"; info.rettype = type_void(); compile_data_setup(ir, symtab, info, node); compile_node(ir, symtab, info, node); return ir; }