From f36879c1d42e34d593dff204e26ecfb10eb6dfac Mon Sep 17 00:00:00 2001 From: tomsmeding Date: Wed, 24 Jan 2018 22:46:14 +0100 Subject: Asm literals and hwi --- Makefile | 4 +++ assemble.c | 8 +++-- c.l | 69 ++++++++++++++++++++++++++++++++++------ c.y | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- compiler.c | 25 +++++++++++++++ ir.c | 66 +++++++++++++++++++++++++++++--------- ir.h | 8 ++++- main.c | 2 -- node.c | 7 +++++ node.h | 3 ++ test/t2.c | 11 +++++++ 11 files changed, 274 insertions(+), 34 deletions(-) create mode 100644 test/t2.c diff --git a/Makefile b/Makefile index 411fbf3..079f321 100644 --- a/Makefile +++ b/Makefile @@ -5,6 +5,10 @@ YAXX = bison CFLAGS = -Wall -Wextra -std=c11 -g -I. -D_GNU_SOURCE LDFLAGS = -lfl +ifneq ($(DEBUG),) + CFLAGS += -DDEBUG=$(value DEBUG) +endif + ifeq ($(shell uname), Darwin) LDFLAGS += -L/usr/local/opt/flex/lib CFLAGS += -I/usr/local/opt/flex/include diff --git a/assemble.c b/assemble.c index cdc584c..a5ae6b4 100644 --- a/assemble.c +++ b/assemble.c @@ -4,9 +4,7 @@ static void aRef(struct ref ref, FILE *f) { - char buf[40]; - ref_show(ref, buf); - fprintf(f, "%s", buf); + fprintf(f, "%s", ref_show(ref)); } static void assemble_ins(const struct irins *ins, FILE *f) { @@ -91,6 +89,10 @@ static void assemble_ins(const struct irins *ins, FILE *f) { fprintf(f, "\tbrk\n"); break; + case INS_HWI: + fprintf(f, "\thwi "); aRef(ins->r1, f); fprintf(f, "\n"); + break; + case INS_CALLV: case INS_RETV: assert(false); diff --git a/c.l b/c.l index 77381fb..4bac236 100644 --- a/c.l +++ b/c.l @@ -2,13 +2,14 @@ #include #include +#include #include #include +#include #include "type.h" +#include "ir.h" #include "y.tab.h" -#undef DEBUG - int yylex(void); int lineno=1; @@ -19,12 +20,17 @@ static enum state { SPTR } state = STERM; +static bool asm_mode = false; + __attribute__((format (printf, 1, 2))) void pdebug(const char *format, ...); -#define RETS(val_) {pdebug(#val_ ": %s\n", yytext); yylval.id = strdup(yytext); return val_;} -#define RET_TYPE(val_, ty_) {pdebug(#val_ ": %s (" #ty_ ")\n", yytext); yylval.type = ty_; return val_;} -#define RET(val_) {pdebug(#val_ "\n"); return val_;} +#define RETS(val_) RETS_S(val_, yytext) +#define RETS_S(val_, str_) do {pdebug(#val_ ": %s\n", (str_)); yylval.id = strdup((str_)); return (val_);} while (0) +#define RET_TYPE(val_, ty_) do {pdebug(#val_ ": %s (" #ty_ ")\n", yytext); yylval.type = ty_; return (val_);} while (0) +#define RET_INSTYPE(val_, it_) do {pdebug(#val_ ": " #it_ "\n"); yylval.instype = (it_); return (val_);} while (0) +#define RET_REF(val_, r_) do {pdebug(#val_ ": " #r_ "\n"); yylval.ref = (r_); return (val_);} while (0) +#define RET(val_) do {pdebug(#val_ "\n"); return val_;} while (0) %} @@ -37,6 +43,9 @@ DIGITS {DIGIT}{DIGIT}* NUM -?{DIGITS} +/* An inclusive state; all other rules will still fire */ +%s ASM_MODE +/* An exclusive state; no other rules will fire */ %x C_COMMENT @@ -51,17 +60,57 @@ else { RET(ELSE); } while { RET(WHILE); } for { RET(FOR); } return { state = STERM; RET(RETURN); } +asm { asm_mode = true; BEGIN(ASM_MODE); RET(ASM); } + +{ID} { + state = SOP; + if (asm_mode) { + int len = strlen(yytext); + char name[len + 1]; + for (int i = 0; i < len; i++) name[i] = tolower(yytext[i]); + name[len] = '\0'; + if (strcmp(name, "add") == 0) RET_INSTYPE(ASMINS2, INS_ADD); + else if (strcmp(name, "sub") == 0) RET_INSTYPE(ASMINS2, INS_SUB); + else if (strcmp(name, "mul") == 0) RET_INSTYPE(ASMINS2, INS_MUL); + else if (strcmp(name, "div") == 0) RET_INSTYPE(ASMINS2, INS_DIV); + else if (strcmp(name, "mod") == 0) RET_INSTYPE(ASMINS2, INS_MOD); + else if (strcmp(name, "test") == 0) RET_INSTYPE(ASMINS2, INS_TEST); + else if (strcmp(name, "cmp") == 0) RET_INSTYPE(ASMINS2, INS_CMP); + else if (strcmp(name, "mov") == 0) RET_INSTYPE(ASMINS2, INS_MOV); + else if (strcmp(name, "neg") == 0) RET_INSTYPE(ASMINS1, INS_NEG); + else if (strcmp(name, "not") == 0) RET_INSTYPE(ASMINS1, INS_NOT); + else if (strcmp(name, "push") == 0) RET_INSTYPE(ASMINS1, INS_PUSH); + else if (strcmp(name, "pop") == 0) RET_INSTYPE(ASMINS1, INS_POP); + else if (strcmp(name, "hwi") == 0) RET_INSTYPE(ASMINS1, INS_HWI); + else if (strcmp(name, "ret") == 0) RET_INSTYPE(ASMINS0, INS_RET); + else if (strcmp(name, "brk") == 0) RET_INSTYPE(ASMINS0, INS_BRK); + else if (strcmp(name, "jmp") == 0) RET_INSTYPE(ASMINS_NAME, INS_JMP); + else if (strcmp(name, "call") == 0) RET_INSTYPE(ASMINS_NAME, INS_CALL); + else if (strcmp(name, "a") == 0) RET_REF(ASMREG, ref_reg(REG_A)); + else if (strcmp(name, "b") == 0) RET_REF(ASMREG, ref_reg(REG_B)); + else if (strcmp(name, "c") == 0) RET_REF(ASMREG, ref_reg(REG_C)); + else if (strcmp(name, "d") == 0) RET_REF(ASMREG, ref_reg(REG_D)); + else if (strcmp(name, "x") == 0) RET_REF(ASMREG, ref_reg(REG_X)); + else if (strcmp(name, "y") == 0) RET_REF(ASMREG, ref_reg(REG_Y)); + else if (strcmp(name, "sp") == 0) RET_REF(ASMREG, ref_reg(REG_SP)); + else if (strcmp(name, "bp") == 0) RET_REF(ASMREG, ref_reg(REG_BP)); + else RETS(ID); + } else { + RETS(ID); + } + } -{ID} { state = SOP; RETS(ID); } +[+-] { pdebug("%c in ASM_MODE\n", yytext[0]); return yytext[0]; } +@{ID} { pdebug("'%s'\n", yytext); RETS_S(ASMVARNAME, yytext + 1); } "//"[^\n]* {} "/*" { pdebug("C_COMMENT... "); fflush(stdout); BEGIN(C_COMMENT); } "*/" { pdebug("done\n"); BEGIN(INITIAL); } . {} -\n { lineno++; } +\n { lineno++; if (asm_mode) RET(NEWLINE); } -\n { lineno++; } +\n { lineno++; if (asm_mode) RET(NEWLINE); } [ \t]+ {} "+" { state = STERM; RETS(ADDOP); } @@ -74,7 +123,7 @@ return { state = STERM; RET(RETURN); } "-" { switch (state) { case SOP: state = STERM; RETS(ADDOP); - case STERM: state = STERM; RETS(NEGATE); + case STERM: state = STERM; RET(NEGATE); default: fprintf(stderr, "Unexpected '-'\n"); exit(1); } } @@ -98,6 +147,8 @@ return { state = STERM; RET(RETURN); } ")" { state = SOP; pdebug("')'\n"); return ')'; } +"}" { state = STERM; pdebug("'}'\n"); asm_mode = false; BEGIN(INITIAL); return '}'; } + . { pdebug(".: %c\n", yytext[0]); return yytext[0]; } diff --git a/c.y b/c.y index 4de4cae..96866ef 100644 --- a/c.y +++ b/c.y @@ -6,6 +6,7 @@ #include #include #include "node.h" +#include "ir.h" static void yyerror(const char*); @@ -24,8 +25,10 @@ struct node *root_node; %start start -%token ID NUM INT VOID IF ELSE WHILE FOR RETURN PTR +%token ID NUM INT VOID IF ELSE WHILE FOR RETURN PTR ASM %token DEREF ADDROF NEGATE NOT ADDOP MULOP RELOP BOOLOP ASSIGN +%token NEWLINE // only sent in asm mode +%token ASMINS0 ASMINS1 ASMINS2 ASMINS_NAME ASMREG ASMVARNAME %left ASSIGN %left BOOLOP @@ -34,20 +37,26 @@ struct node *root_node; %left MULOP %left NOT NEGATE DEREF ADDROF -%type ID NUM ADDOP MULOP RELOP BOOLOP +%type ID NUM ADDOP MULOP RELOP BOOLOP ASMVARNAME %type toplevel toplevel_decl var_decl func_decl parameter_list parameter_list_rest %type parameter block statement statement_list open_statement matched_statement %type other_statement expression atom_expr expression_list +%type asm_statement_list_terminated asm_statement_list_terminated_trailer asm_statement %type type INT VOID +%type ASMINS0 ASMINS1 ASMINS2 ASMINS_NAME +%type ASMREG asm_arg %union { char *id; struct node *node; struct type *type; + enum instype instype; + struct ref ref; } %destructor { free($$); } %destructor { node_delete_recursive($$); } +%destructor { irins_delete($$); } %% @@ -155,7 +164,14 @@ other_statement: } | RETURN expression ';' { $$ = node_make_1(N_RETURNV, $2); - } ; + } + | ASM '{' newlines '}' { + $$ = node_make_0(N_LIST_END); + } + | ASM '{' newlines asm_statement_list_terminated { + $$ = $4; + } + ; expression: atom_expr | NOT expression { $$ = node_make_1(N_UNOP, $2); $$->oper = OP_NOT; } @@ -209,13 +225,94 @@ atom_expr: NUM { $$->oper = OP_DEREF; } ; -expression_list: expression { +expression_list: + expression { $$ = node_make_2(N_LIST, $1, node_make_0(N_LIST_END)); } | expression ',' expression_list { $$ = node_make_2(N_LIST, $1, $3); } ; +newlines: | + NEWLINE newlines ; + +asm_statement_list_terminated: + asm_statement NEWLINE newlines asm_statement_list_terminated_trailer { + $$ = node_make_2(N_LIST, $1, $4); + } + | asm_statement '}' { + $$ = node_make_2(N_LIST, $1, node_make_0(N_LIST_END)); + } ; + +asm_statement_list_terminated_trailer: + asm_statement_list_terminated + | '}' { + $$ = node_make_0(N_LIST_END); + } ; + +asm_statement: ID ':' { + $$ = node_make_0(N_IRINS); + $$->irins = irins_make_name(INS_LBL, $1); + } + | ASMINS0 + { + $$ = node_make_0(N_IRINS); + $$->irins = irins_make($1); + } + | ASMINS1 asm_arg { + $$ = node_make_0(N_IRINS); + struct irins *ins; + switch ($1) { + case INS_NEG: ins = irins_make_01($1, $2, $2); break; + case INS_NOT: ins = irins_make_01($1, $2, $2); break; + case INS_PUSH: ins = irins_make_1($1, $2); break; + case INS_POP: ins = irins_make_0($1, $2); break; + case INS_HWI: ins = irins_make_1($1, $2); break; + default: assert(false); + } + $$->irins = ins; + } + | ASMINS2 asm_arg ',' asm_arg { + $$ = node_make_0(N_IRINS); + struct irins *ins; + switch ($1) { + case INS_ADD: ins = irins_make_012($1, $2, $2, $4); break; + case INS_SUB: ins = irins_make_012($1, $2, $2, $4); break; + case INS_MUL: ins = irins_make_012($1, $2, $2, $4); break; + case INS_DIV: ins = irins_make_012($1, $2, $2, $4); break; + case INS_MOD: ins = irins_make_012($1, $2, $2, $4); break; + case INS_TEST: ins = irins_make_12($1, $2, $4); break; + case INS_CMP: ins = irins_make_12($1, $2, $4); break; + case INS_MOV: ins = irins_make_01($1, $2, $4); break; + default: assert(false); + } + $$->irins = ins; + } + | ASMINS_NAME ID { + $$ = node_make_0(N_IRINS); + $$->irins = irins_make_name($1, $2); + } ; + +asm_arg: ASMREG + | NUM { + $$ = ref_imm(strtol($1, NULL, 0)); + } + | ASMVARNAME { + $$ = ref_varname($1); + } + | '[' NUM ']' { + $$ = ref_mem(REG_UNUSED, strtol($2, NULL, 0), REFREL_ZERO); + } + | '[' ASMREG ']' { + $$ = ref_mem($2.reg, 0, REFREL_ZERO); + } + | '[' ASMREG '+' NUM ']' { + $$ = ref_mem($2.reg, strtol($4, NULL, 0), REFREL_ZERO); + } + | '[' ASMREG '-' NUM ']' { + $$ = ref_mem($2.reg, -strtol($4, NULL, 0), REFREL_ZERO); + } + %% diff --git a/compiler.c b/compiler.c index adcca46..bdbbb8b 100644 --- a/compiler.c +++ b/compiler.c @@ -226,6 +226,7 @@ static struct ref compile_expr(struct ir *ir, struct symtab *symtab, struct info r1 = ref_mem(r1.reg, 0, REFREL_ZERO); break; break; } + case REF_VARNAME: assert(false); } struct ref r0 = ref_next_register(); ir_append(ir, irins_make_01(INS_MOV, r0, r1)); @@ -430,6 +431,30 @@ static void compile_node(struct ir *ir, struct symtab *symtab, struct info info, break; } + case N_IRINS: { + bool haveref[3]; + irins_which_refs(node->irins, haveref); + for (int i = 0; i < 3; i++) { + if (!haveref[i]) continue; + struct ref *ref = &node->irins->three_refs[i]; + if (ref->type == REF_VARNAME) { + struct symbol *sym = symtab_find(symtab, ref->name); + if (!sym) { + fprintf(stderr, "Use of undeclared variable '%s' in asm block\n", node->name); + exit(1); + } + if (sym->stype != ST_VAR) { + fprintf(stderr, "Variable reference in asm block should be variable\n"); + exit(1); + } + *ref = sym->ref; + } + } + ir_append(ir, node->irins); + node->irins = NULL; + break; + } + default: // hmm, maybe it's an expression statement? compile_expr(ir, symtab, info, node); break; diff --git a/ir.c b/ir.c index e3a3171..4dca5b0 100644 --- a/ir.c +++ b/ir.c @@ -60,8 +60,15 @@ const char* condcode_show(enum condcode condcode) { } } -void ref_show(struct ref ref, char *dest) { +const char* ref_show(struct ref ref) { static const char *special[8] = {"A", "B", "C", "D", "X", "Y", "SP", "BP"}; + static char *buffer = NULL; + static int buffer_size = 0; + + if (buffer == NULL) { + buffer_size = 40; // enough for all non-name refs + buffer = malloc(buffer_size + 1); + } const char *suffix = NULL; if (ref.type == REF_MEM) { @@ -74,48 +81,70 @@ void ref_show(struct ref ref, char *dest) { switch (ref.type) { case REF_REG: if (ref.reg == REG_UNUSED) { - strcpy(dest, "<>"); + strcpy(buffer, "<>"); } else if (ref.reg < 0) { - strcpy(dest, special[-(ref.reg - REG_A)]); + strcpy(buffer, special[-(ref.reg - REG_A)]); } else { - sprintf(dest, "t%d", ref.reg); + sprintf(buffer, "t%d", ref.reg); } break; case REF_MEM: if (ref.reg == REG_UNUSED) { - sprintf(dest, "[0x%x%s]", ref.offset, suffix); + sprintf(buffer, "[0x%x%s]", ref.offset, suffix); } else if (ref.reg < 0) { if (ref.offset < 0) { - sprintf(dest, "[%s - %d%s]", special[-(ref.reg - REG_A)], -ref.offset, suffix); + sprintf(buffer, "[%s - %d%s]", special[-(ref.reg - REG_A)], -ref.offset, suffix); } else { - sprintf(dest, "[%s + %d%s]", special[-(ref.reg - REG_A)], ref.offset, suffix); + sprintf(buffer, "[%s + %d%s]", special[-(ref.reg - REG_A)], ref.offset, suffix); } } else { if (ref.offset < 0) { - sprintf(dest, "[t%d - %d%s]", ref.reg, -ref.offset, suffix); + sprintf(buffer, "[t%d - %d%s]", ref.reg, -ref.offset, suffix); } else { - sprintf(dest, "[t%d + %d%s]", ref.reg, ref.offset, suffix); + sprintf(buffer, "[t%d + %d%s]", ref.reg, ref.offset, suffix); } } break; case REF_IMM: - sprintf(dest, "%d", ref.imm); + sprintf(buffer, "%d", ref.imm); + break; + + case REF_VARNAME: { + int namelen = strlen(ref.name); + if (buffer_size < 1 + namelen) { + buffer_size = 1 + namelen; + buffer = realloc(buffer, 1 + namelen + 1); + } + buffer[0] = '@'; + memcpy(buffer + 1, ref.name, namelen + 1); break; + } default: assert(false); } + + return buffer; } static const char* ref_show_c(struct ref ref) { - static char cbuf[3][40]; + static char *cbuf[3] = {NULL, NULL, NULL}; + static int cbufsize[3] = {-1, -1, -1}; static int cbufi = 0; + const char *sbuf = ref_show(ref); + int len = strlen(sbuf); + if (cbufsize[cbufi] < len) { + cbufsize[cbufi] = len; + if (cbuf[cbufi]) cbuf[cbufi] = realloc(cbuf[cbufi], len + 1); + else cbuf[cbufi] = malloc(len + 1); + } + memcpy(cbuf[cbufi], sbuf, len + 1); + char *buf = cbuf[cbufi]; cbufi = (cbufi + 1) % 3; - ref_show(ref, buf); return buf; } @@ -141,6 +170,7 @@ void irins_print(struct irins *ins, FILE *f) { case INS_RETV: fprintf(f, "\tret %s\n", ref_show_c(ins->r1)); break; case INS_MOV: fprintf(f, "\t%s <- %s\n", ref_show_c(ins->r0), ref_show_c(ins->r1)); break; case INS_BRK: fprintf(f, "\tbrk\n"); break; + case INS_HWI: fprintf(f, "\thwi %s\n", ref_show_c(ins->r1)); break; default: assert(false); } } @@ -245,6 +275,7 @@ void irins_which_refs(const struct irins *ins, bool have[3]) { case INS_RETV: have[1] = true; break; case INS_MOV: have[0] = true; have[1] = true; break; case INS_BRK: break; + case INS_HWI: have[1] = true; break; default: assert(false); } } @@ -257,15 +288,19 @@ const char* gen_label_name(void) { } struct ref ref_reg(int reg) { - return (struct ref){REF_REG, reg, 0, REFREL_ZERO, 0}; + return (struct ref){REF_REG, reg, 0, REFREL_ZERO, 0, NULL}; } struct ref ref_mem(int reg, int offset, enum refrel rel) { - return (struct ref){REF_MEM, reg, offset, rel, 0}; + return (struct ref){REF_MEM, reg, offset, rel, 0, NULL}; } struct ref ref_imm(int imm) { - return (struct ref){REF_IMM, REG_UNUSED, 0, REFREL_ZERO, imm}; + return (struct ref){REF_IMM, REG_UNUSED, 0, REFREL_ZERO, imm, NULL}; +} + +struct ref ref_varname(const char *name) { + return (struct ref){REF_VARNAME, REG_UNUSED, 0, REFREL_ZERO, 0, ir_str(name)}; } struct ref ref_next_register(void) { @@ -279,6 +314,7 @@ bool ref_equal(struct ref r1, struct ref r2) { case REF_REG: return r1.reg == r2.reg; case REF_MEM: return r1.reg == r2.reg && r1.offset == r2.offset && r1.rel == r2.rel; case REF_IMM: return r1.imm == r2.imm; + case REF_VARNAME: return strcmp(r1.name, r2.name) == 0; default: assert(false); } } diff --git a/ir.h b/ir.h index 213793f..8496a06 100644 --- a/ir.h +++ b/ir.h @@ -27,6 +27,7 @@ enum instype { INS_RETV, // return r1 INS_MOV, // r0 = r1 INS_BRK, // halt + INS_HWI, // hwi r1 }; enum condcode { @@ -37,6 +38,9 @@ enum reftype { REF_REG, // reg REF_MEM, // [reg + offset + {0, .data, heap base}[rel]] REF_IMM, // imm + // This can only appear in user-entered asm literals and will be eliminated in the compiler. + // It should refer to a symbol that is in scope, otherwise an error will be reported. + REF_VARNAME, // name }; enum refrel { @@ -61,6 +65,7 @@ struct ref { int offset; enum refrel rel; int imm; + const char *name; // should be a return value from ir_str() }; struct irins { @@ -86,7 +91,7 @@ void ir_delete(struct ir *ir); void ir_print(struct ir *ir, FILE *f); void irins_print(struct irins *ins, FILE *f); const char* condcode_show(enum condcode condcode); -void ref_show(struct ref ref, char *dest); // dest must have size at least 40 +const char* ref_show(struct ref ref); // returns pointer to static buffer const char* ir_str(const char *str); // returns interned string @@ -113,6 +118,7 @@ const char* gen_label_name(void); struct ref ref_reg(int reg); struct ref ref_mem(int reg, int offset, enum refrel rel); struct ref ref_imm(int imm); +struct ref ref_varname(const char *name); struct ref ref_next_register(void); bool ref_equal(struct ref r1, struct ref r2); diff --git a/main.c b/main.c index 94d155e..61475dc 100644 --- a/main.c +++ b/main.c @@ -7,8 +7,6 @@ #include "regalloc.h" #include "to_assembly.h" -#undef DEBUG - extern FILE *yyin; extern int yyparse(void); diff --git a/node.c b/node.c index 3e7d945..dd0e3bb 100644 --- a/node.c +++ b/node.c @@ -13,6 +13,7 @@ struct node* node_make_0(enum node_type type) { node->name = NULL; node->value = 0; node->oper = -1; + node->irins = NULL; node->valuetype = NULL; return node; } @@ -61,6 +62,7 @@ void node_delete_recursive(struct node *node) { if (node->child2) node_delete_recursive(node->child2); if (node->child3) node_delete_recursive(node->child3); if (node->name) free(node->name); + if (node->irins) irins_delete(node->irins); free(node); } @@ -85,6 +87,7 @@ const char* node_type_string(enum node_type type) { case N_BINOP: return "N_BINOP"; break; case N_UNOP: return "N_UNOP"; break; case N_CALL: return "N_CALL"; break; + case N_IRINS: return "N_IRINS"; break; default: assert(false); } } @@ -114,6 +117,10 @@ const char* oper_string(enum operator oper) { void node_print(const struct node *node, FILE *f, int indent) { const char *t = node_type_string(node->type); fprintf(f, "(%s", t); + if (node->irins) { + fprintf(f, " "); + irins_print(node->irins, f); + } if (node->rtype) { fprintf(f, "[rtype="); type_print(node->rtype, f); diff --git a/node.h b/node.h index b6489d0..63575f8 100644 --- a/node.h +++ b/node.h @@ -1,6 +1,7 @@ #pragma once #include "type.h" +#include "ir.h" enum node_type { @@ -19,6 +20,7 @@ enum node_type { N_BINOP, // [left] oper [right] N_UNOP, // oper [arg] N_CALL, // name [args] + N_IRINS, // irins }; enum operator { @@ -38,6 +40,7 @@ struct node { char *name; int value; enum operator oper; + struct irins *irins; struct type *valuetype; // filled in by type checker }; diff --git a/test/t2.c b/test/t2.c new file mode 100644 index 0000000..1091313 --- /dev/null +++ b/test/t2.c @@ -0,0 +1,11 @@ +int dir = 0; + +void main() { + asm { + mov a, 2 + mov b, @dir + hwi 1 // legs + } + dir = dir + 1; + if (dir == 4) dir = 0; +} -- cgit v1.2.3