diff options
author | tomsmeding <tom.smeding@gmail.com> | 2018-01-06 21:38:40 +0100 |
---|---|---|
committer | tomsmeding <tom.smeding@gmail.com> | 2018-01-06 21:38:40 +0100 |
commit | f4b60f43cf636d48f8857676b072371f1575a5b2 (patch) | |
tree | d185ecd5d15b379e64474030bad8a2ab386c4284 | |
parent | c4a376d1c7263993f13e9cf276ebd9e530fd419c (diff) |
Working compiler
-rw-r--r-- | assemble.c | 117 | ||||
-rw-r--r-- | assemble.h | 7 | ||||
-rw-r--r-- | compiler.c | 3 | ||||
-rw-r--r-- | ir.c | 101 | ||||
-rw-r--r-- | ir.h | 4 | ||||
-rw-r--r-- | main.c | 2 | ||||
-rw-r--r-- | test/t1.c | 5 | ||||
-rw-r--r-- | to_assembly.c | 82 |
8 files changed, 273 insertions, 48 deletions
diff --git a/assemble.c b/assemble.c new file mode 100644 index 0000000..cdc584c --- /dev/null +++ b/assemble.c @@ -0,0 +1,117 @@ +#include <stdlib.h> +#include <assert.h> +#include "assemble.h" + + +static void aRef(struct ref ref, FILE *f) { + char buf[40]; + ref_show(ref, buf); + fprintf(f, "%s", buf); +} + +static void assemble_ins(const struct irins *ins, FILE *f) { + switch (ins->type) { + case INS_ADD: + assert(ref_equal(ins->r0, ins->r1)); + fprintf(f, "\tadd "); aRef(ins->r1, f); fprintf(f, ", "); aRef(ins->r2, f); fprintf(f, "\n"); + break; + + case INS_SUB: + assert(ref_equal(ins->r0, ins->r1)); + fprintf(f, "\tsub "); aRef(ins->r1, f); fprintf(f, ", "); aRef(ins->r2, f); fprintf(f, "\n"); + break; + + case INS_MUL: + assert(ref_equal(ins->r0, ref_reg(REG_A))); + assert(ref_equal(ins->r0, ins->r1)); + fprintf(f, "\tmul "); aRef(ins->r2, f); fprintf(f, "\n"); + break; + + case INS_DIV: + fprintf(stderr, "DIV not implemented...\n"); exit(1); + assert(ref_equal(ins->r0, ins->r1)); + fprintf(f, "\tdiv "); aRef(ins->r1, f); fprintf(f, ", "); aRef(ins->r2, f); fprintf(f, "\n"); + break; + + case INS_MOD: + fprintf(stderr, "MOD not implemented...\n"); exit(1); + assert(ref_equal(ins->r0, ins->r1)); + fprintf(f, "\tmod "); aRef(ins->r1, f); fprintf(f, ", "); aRef(ins->r2, f); fprintf(f, "\n"); + break; + + case INS_NEG: + fprintf(f, "\tneg "); aRef(ins->r1, f); fprintf(f, "\n"); + break; + + case INS_NOT: + fprintf(f, "\tnot "); aRef(ins->r1, f); fprintf(f, "\n"); + break; + + case INS_TEST: + fprintf(f, "\ttest "); aRef(ins->r1, f); fprintf(f, ", "); aRef(ins->r2, f); fprintf(f, "\n"); + break; + + case INS_PUSH: + fprintf(f, "\tpush "); aRef(ins->r1, f); fprintf(f, "\n"); + break; + + case INS_POP: + fprintf(f, "\tpop "); aRef(ins->r0, f); fprintf(f, "\n"); + break; + + case INS_CMP: + fprintf(f, "\tcmp "); aRef(ins->r1, f); fprintf(f, ", "); aRef(ins->r2, f); fprintf(f, "\n"); + break; + + case INS_LBL: + fprintf(f, "%s:\n", ins->name); + break; + + case INS_JMP: + fprintf(f, "\tjmp %s\n", ins->name); + break; + + case INS_JCC: + fprintf(f, "\tj%s %s\n", condcode_show(ins->condcode), ins->name); + break; + + case INS_CALL: + fprintf(f, "\tcall %s\n", ins->name); + break; + + case INS_RET: + fprintf(f, "\tret\n"); + break; + + case INS_MOV: + fprintf(f, "\tmov "); aRef(ins->r0, f); fprintf(f, ", "); aRef(ins->r1, f); fprintf(f, "\n"); + break; + + case INS_BRK: + fprintf(f, "\tbrk\n"); + break; + + case INS_CALLV: + case INS_RETV: + assert(false); + + default: + assert(false); + } +} + +void assemble(const struct ir *ir, FILE *f) { + if (ir->globspace > 0) { + fprintf(f, ".data\n\tdw "); + for (int i = 0; i < ir->globspace; i++) { + if (i != 0) fprintf(f, ", "); + fprintf(f, "0"); + } + fprintf(f, "\n"); + } + fprintf(f, ".text\n"); + + for (int i = 0; i < ir->len; i++) { + assemble_ins(ir->inss[i], f); + } +} diff --git a/assemble.h b/assemble.h new file mode 100644 index 0000000..4e97f29 --- /dev/null +++ b/assemble.h @@ -0,0 +1,7 @@ +#pragma once + +#include <stdio.h> +#include "ir.h" + + +void assemble(const struct ir *ir, FILE *f); @@ -278,7 +278,7 @@ static void compile_node(struct ir *ir, struct symtab *symtab, struct node *node symtab = symtab_sub(symtab); for (int i = 0; i < numparams; i++) { struct symbol *parsym = symbol_make_var(strdup(sym->params[i].name), sym->params[i].type); - parsym->ref = ref_mem(REG_BP, -2 - i, REFREL_ZERO); + parsym->ref = ref_mem(REG_BP, 2 + i, REFREL_ZERO); symtab_insert(symtab, parsym); } @@ -368,6 +368,7 @@ static void compile_data_setup(struct ir *ir, struct symtab *symtab, struct node ir_append(ir, irins_make_jcc(afterlbl, CCNZ)); compile_data_setup_node(ir, symtab, root); + ir_append(ir, irins_make_01(INS_MOV, flag, ref_imm(1))); ir_append(ir, irins_make_name(INS_LBL, afterlbl)); ir_append(ir, irins_make_name(INS_CALL, "main")); @@ -48,7 +48,7 @@ void ir_print(struct ir *ir, FILE *f) { } } -static const char* condcode_show(enum condcode condcode) { +const char* condcode_show(enum condcode condcode) { switch (condcode) { case CCZ: return "z"; case CCNZ: return "nz"; @@ -60,67 +60,86 @@ static const char* condcode_show(enum condcode condcode) { } } -static const char* ref_show(struct ref ref) { - static char cbuf[3][16]; - static int cbufi = 0; +void ref_show(struct ref ref, char *dest) { static const char *special[8] = {"A", "B", "C", "D", "X", "Y", "SP", "BP"}; - const char *suffix; - if (ref.rel == REFREL_ZERO) suffix = ""; - else if (ref.rel == REFREL_DATA) suffix = ":data"; - else if (ref.rel == REFREL_HEAP) suffix = ":heap"; - else assert(false); + const char *suffix = NULL; + if (ref.type == REF_MEM) { + if (ref.rel == REFREL_ZERO) suffix = ""; + else if (ref.rel == REFREL_DATA) suffix = ":data"; + else if (ref.rel == REFREL_HEAP) suffix = ":heap"; + else assert(false); + } switch (ref.type) { case REF_REG: - if (ref.reg == REG_UNUSED) return "<<UNUSED?>>"; - if (ref.reg < 0) return special[-(ref.reg - REG_A)]; - cbufi = (cbufi + 1) % 3; - sprintf(cbuf[cbufi], "t%d", ref.reg); - return cbuf[cbufi]; + if (ref.reg == REG_UNUSED) { + strcpy(dest, "<<UNUSED?>>"); + } else if (ref.reg < 0) { + strcpy(dest, special[-(ref.reg - REG_A)]); + } else { + sprintf(dest, "t%d", ref.reg); + } + break; case REF_MEM: - cbufi = (cbufi + 1) % 3; if (ref.reg == REG_UNUSED) { - sprintf(cbuf[cbufi], "[%d%s]", ref.offset, suffix); + sprintf(dest, "[0x%x%s]", ref.offset, suffix); } else if (ref.reg < 0) { - sprintf(cbuf[cbufi], "[%s + %d%s]", special[-(ref.reg - REG_A)], ref.offset, suffix); + if (ref.offset < 0) { + sprintf(dest, "[%s - %d%s]", special[-(ref.reg - REG_A)], -ref.offset, suffix); + } else { + sprintf(dest, "[%s + %d%s]", special[-(ref.reg - REG_A)], ref.offset, suffix); + } } else { - sprintf(cbuf[cbufi], "[t%d + %d%s]", ref.reg, ref.offset, suffix); + if (ref.offset < 0) { + sprintf(dest, "[t%d - %d%s]", ref.reg, -ref.offset, suffix); + } else { + sprintf(dest, "[t%d + %d%s]", ref.reg, ref.offset, suffix); + } } - return cbuf[cbufi]; + break; case REF_IMM: - cbufi = (cbufi + 1) % 3; - sprintf(cbuf[cbufi], "%d", ref.imm); - return cbuf[cbufi]; + sprintf(dest, "%d", ref.imm); + break; default: assert(false); } } +static const char* ref_show_c(struct ref ref) { + static char cbuf[3][40]; + static int cbufi = 0; + + char *buf = cbuf[cbufi]; + cbufi = (cbufi + 1) % 3; + ref_show(ref, buf); + return buf; +} + void irins_print(struct irins *ins, FILE *f) { switch (ins->type) { - case INS_ADD: fprintf(f, "\t%s <- %s + %s\n", ref_show(ins->r0), ref_show(ins->r1), ref_show(ins->r2)); break; - case INS_SUB: fprintf(f, "\t%s <- %s - %s\n", ref_show(ins->r0), ref_show(ins->r1), ref_show(ins->r2)); break; - case INS_MUL: fprintf(f, "\t%s <- %s * %s\n", ref_show(ins->r0), ref_show(ins->r1), ref_show(ins->r2)); break; - case INS_DIV: fprintf(f, "\t%s <- %s / %s\n", ref_show(ins->r0), ref_show(ins->r1), ref_show(ins->r2)); break; - case INS_MOD: fprintf(f, "\t%s <- %s %% %s\n", ref_show(ins->r0), ref_show(ins->r1), ref_show(ins->r2)); break; - case INS_NEG: fprintf(f, "\t%s <- -%s\n", ref_show(ins->r0), ref_show(ins->r1)); break; - case INS_NOT: fprintf(f, "\t%s <- ~%s\n", ref_show(ins->r0), ref_show(ins->r1)); break; - case INS_TEST: fprintf(f, "\ttest %s, %s\n", ref_show(ins->r1), ref_show(ins->r2)); break; - case INS_PUSH: fprintf(f, "\tpush %s\n", ref_show(ins->r1)); break; - case INS_POP: fprintf(f, "\t%s <- pop\n", ref_show(ins->r0)); break; - case INS_CMP: fprintf(f, "\tcmp %s, %s\n", ref_show(ins->r1), ref_show(ins->r2)); break; + case INS_ADD: fprintf(f, "\t%s <- %s + %s\n", ref_show_c(ins->r0), ref_show_c(ins->r1), ref_show_c(ins->r2)); break; + case INS_SUB: fprintf(f, "\t%s <- %s - %s\n", ref_show_c(ins->r0), ref_show_c(ins->r1), ref_show_c(ins->r2)); break; + case INS_MUL: fprintf(f, "\t%s <- %s * %s\n", ref_show_c(ins->r0), ref_show_c(ins->r1), ref_show_c(ins->r2)); break; + case INS_DIV: fprintf(f, "\t%s <- %s / %s\n", ref_show_c(ins->r0), ref_show_c(ins->r1), ref_show_c(ins->r2)); break; + case INS_MOD: fprintf(f, "\t%s <- %s %% %s\n", ref_show_c(ins->r0), ref_show_c(ins->r1), ref_show_c(ins->r2)); break; + case INS_NEG: fprintf(f, "\t%s <- -%s\n", ref_show_c(ins->r0), ref_show_c(ins->r1)); break; + case INS_NOT: fprintf(f, "\t%s <- ~%s\n", ref_show_c(ins->r0), ref_show_c(ins->r1)); break; + case INS_TEST: fprintf(f, "\ttest %s, %s\n", ref_show_c(ins->r1), ref_show_c(ins->r2)); break; + case INS_PUSH: fprintf(f, "\tpush %s\n", ref_show_c(ins->r1)); break; + case INS_POP: fprintf(f, "\t%s <- pop\n", ref_show_c(ins->r0)); break; + case INS_CMP: fprintf(f, "\tcmp %s, %s\n", ref_show_c(ins->r1), ref_show_c(ins->r2)); break; case INS_LBL: fprintf(f, "%s:\n", ins->name); break; case INS_JMP: fprintf(f, "\tjmp %s\n", ins->name); break; case INS_JCC: fprintf(f, "\tj%s %s\n", condcode_show(ins->condcode), ins->name); break; case INS_CALL: fprintf(f, "\tcall %s\n", ins->name); break; - case INS_CALLV: fprintf(f, "\t%s <- call %s\n", ref_show(ins->r0), ins->name); break; + case INS_CALLV: fprintf(f, "\t%s <- call %s\n", ref_show_c(ins->r0), ins->name); break; case INS_RET: fprintf(f, "\tret\n"); break; - case INS_RETV: fprintf(f, "\tret %s\n", ref_show(ins->r1)); break; - case INS_MOV: fprintf(f, "\t%s <- %s\n", ref_show(ins->r0), ref_show(ins->r1)); break; + case INS_RETV: fprintf(f, "\tret %s\n", ref_show_c(ins->r1)); break; + case INS_MOV: fprintf(f, "\t%s <- %s\n", ref_show_c(ins->r0), ref_show_c(ins->r1)); break; case INS_BRK: fprintf(f, "\tbrk\n"); break; default: assert(false); } @@ -253,3 +272,13 @@ struct ref ref_next_register(void) { static int next = 0; return ref_reg(next++); } + +bool ref_equal(struct ref r1, struct ref r2) { + if (r1.type != r2.type) return false; + switch (r1.type) { + case REF_REG: return r1.reg == r2.reg; + case REF_MEM: return r1.reg == r2.reg && r1.offset == r2.offset && r1.rel == r2.rel; + case REF_IMM: return r1.imm == r2.imm; + default: assert(false); + } +} @@ -85,6 +85,8 @@ void ir_delete(struct ir *ir); void ir_print(struct ir *ir, FILE *f); void irins_print(struct irins *ins, FILE *f); +const char* condcode_show(enum condcode condcode); +void ref_show(struct ref ref, char *dest); // dest must have size at least 40 const char* ir_str(const char *str); // returns interned string @@ -112,3 +114,5 @@ struct ref ref_reg(int reg); struct ref ref_mem(int reg, int offset, enum refrel rel); struct ref ref_imm(int imm); struct ref ref_next_register(void); + +bool ref_equal(struct ref r1, struct ref r2); @@ -1,6 +1,7 @@ #include <stdio.h> #include <stdbool.h> #include <assert.h> +#include "assemble.h" #include "compiler.h" #include "node.h" #include "regalloc.h" @@ -40,6 +41,7 @@ int main(int argc, char **argv) { allocation_delete(alloc); ir_print(ir, stdout); printf("\n"); + assemble(ir, stdout); return result; } @@ -1,4 +1,4 @@ -int glob; +int glob = 1; int fibo(int n) { if (n <= 0) return 0; @@ -17,6 +17,7 @@ int fibo(int n) { void main(int argc, int **argv) { int a = 1; int b = 2 + a * 3; - fibo(5); + glob = glob + 1; + fibo(glob); // holo_dec(a); } diff --git a/to_assembly.c b/to_assembly.c index ab5c845..bd680b3 100644 --- a/to_assembly.c +++ b/to_assembly.c @@ -1,4 +1,6 @@ +#include <stdlib.h> #include <string.h> +#include <assert.h> #include "to_assembly.h" @@ -6,10 +8,24 @@ static bool is_function_label(const char *lbl) { return memcmp(lbl, "__", 2) != 0; } -static void fix_everything(struct ir *ir, const int startidx, struct allocation *ral) { +static bool needs_0_eq_1(const struct irins *ins) { + return ins->type == INS_ADD || ins->type == INS_SUB || ins->type == INS_MUL || + ins->type == INS_DIV || ins->type == INS_MOD || ins->type == INS_NEG || + ins->type == INS_NOT; +} + +static void fix_function(struct ir *ir, int startidx, struct allocation *ral) { int spillspace = 0; - for (int idx = startidx + 1; idx < ir->len; idx++) { + int of_cap = 32; + int *offsets = malloc(of_cap * sizeof(int)); + memset(offsets, 0xff, of_cap * sizeof(int)); + + if (ir->inss[startidx]->type == INS_LBL && is_function_label(ir->inss[startidx]->name)) { + startidx++; + } + + for (int idx = startidx; idx < ir->len; idx++) { struct irins *ins = ir->inss[idx]; if (ins->type == INS_LBL && is_function_label(ins->name)) break; @@ -27,26 +43,50 @@ static void fix_everything(struct ir *ir, const int startidx, struct allocation continue; } + // Apply the register allocation bool haveref[3]; irins_which_refs(ins, haveref); for (int ri = 0; ri < 3; ri++) { struct ref *ref = &ins->three_refs[ri]; if (haveref[ri] && ref->type == REF_REG && ref->reg >= 0) { if (ral->allocs[ref->reg].spill) { - *ref = ref_mem(REG_BP, spillspace + 1, REFREL_ZERO); - spillspace++; + int old_cap = of_cap; + while (ref->reg >= of_cap) { + of_cap *= 2; + offsets = realloc(offsets, of_cap * sizeof(int)); + } + memset(offsets + old_cap * sizeof(int), 0xff, (of_cap - old_cap) * sizeof(int)); + if (offsets[ref->reg] == -1) offsets[ref->reg] = ++spillspace; + *ref = ref_mem(REG_BP, -offsets[ref->reg], REFREL_ZERO); } else { ref->reg = ral->allocs[ref->reg].reg; } } } + + if (ins->type == INS_MUL) { + if (!ref_equal(ins->r1, ref_reg(REG_A))) { + ir_insert_before(ir, idx, irins_make_01(INS_MOV, ref_reg(REG_A), ins->r1)); + ins->r1 = ref_reg(REG_A); + idx++; + } + if (!ref_equal(ins->r0, ref_reg(REG_A))) { + ir_insert_before(ir, idx + 1, irins_make_01(INS_MOV, ins->r0, ref_reg(REG_A))); + ins->r0 = ref_reg(REG_A); + } + } else if (needs_0_eq_1(ins) && !ref_equal(ins->r0, ins->r1)) { + ir_insert_before(ir, idx, irins_make_01(INS_MOV, ins->r0, ins->r1)); + ins->r1 = ins->r0; + } } + + free(offsets); if (spillspace == 0) return; - ir_insert_before(ir, startidx + 1, irins_make_1(INS_PUSH, ref_reg(REG_BP))); - ir_insert_before(ir, startidx + 2, irins_make_01(INS_MOV, ref_reg(REG_BP), ref_reg(REG_SP))); - ir_insert_before(ir, startidx + 3, + ir_insert_before(ir, startidx + 0, irins_make_1(INS_PUSH, ref_reg(REG_BP))); + ir_insert_before(ir, startidx + 1, irins_make_01(INS_MOV, ref_reg(REG_BP), ref_reg(REG_SP))); + ir_insert_before(ir, startidx + 2, irins_make_012(INS_SUB, ref_reg(REG_SP), ref_reg(REG_SP), ref_imm(spillspace))); for (int idx = startidx + 1; idx < ir->len; idx++) { @@ -54,7 +94,7 @@ static void fix_everything(struct ir *ir, const int startidx, struct allocation if (ins->type == INS_LBL && is_function_label(ins->name)) break; - if (ins->type == INS_RET) { + if (ins->type == INS_RET || ins->type == INS_BRK) { ir_insert_before(ir, idx++, irins_make_01(INS_MOV, ref_reg(REG_SP), ref_reg(REG_BP))); ir_insert_before(ir, idx++, irins_make_0(INS_POP, ref_reg(REG_BP))); continue; @@ -63,9 +103,33 @@ static void fix_everything(struct ir *ir, const int startidx, struct allocation } void to_assembly(struct ir *ir, struct allocation *ral) { + if (ir->len > 0 && (ir->inss[0]->type != INS_LBL || !is_function_label(ir->inss[0]->name))) { + fix_function(ir, 0, ral); + } + for (int idx = 0; idx < ir->len; idx++) { if (ir->inss[idx]->type == INS_LBL && is_function_label(ir->inss[idx]->name)) { - fix_everything(ir, idx, ral); + fix_function(ir, idx, ral); + } + } + + for (int idx = 0; idx < ir->len; idx++) { + struct irins *ins = ir->inss[idx]; + + bool haveref[3]; + irins_which_refs(ins, haveref); + for (int ri = 0; ri < 3; ri++) { + struct ref *ref = &ins->three_refs[ri]; + + if (haveref[ri] && ref->type == REF_MEM) { + switch (ref->rel) { + case REFREL_ZERO: break; + case REFREL_DATA: ref->offset += 0x0200; break; + case REFREL_HEAP: ref->offset += 0x5000; fprintf(stderr, "WARNING: Using hard-coded heap start address\n"); break; + default: assert(false); + } + ref->rel = REFREL_ZERO; + } } } } |