diff options
author | tomsmeding <tom.smeding@gmail.com> | 2016-08-04 21:53:16 +0200 |
---|---|---|
committer | tomsmeding <tom.smeding@gmail.com> | 2016-08-04 21:53:16 +0200 |
commit | e6bb770a52980ef3d85c2d4b93fb240c026ce7f7 (patch) | |
tree | 3fbfbad81222598292a6aaf70c7b7bd1f32272f8 | |
parent | f83ea28ae6a04f1121b8328f7bdc5dad94628328 (diff) |
Advance parser
-rw-r--r-- | LANGUAGE.txt | 26 | ||||
-rwxr-xr-x | genops.js | 88 | ||||
-rw-r--r-- | parser.c | 131 | ||||
-rw-r--r-- | parser.h | 9 |
4 files changed, 230 insertions, 24 deletions
diff --git a/LANGUAGE.txt b/LANGUAGE.txt index 9d72f08..0e7066b 100644 --- a/LANGUAGE.txt +++ b/LANGUAGE.txt @@ -1,21 +1,21 @@ Statements are terminated by ';'. The usual infix expression rules apply, with the following precedence table: -(lower precedence number means tighter binding) +(higher precedence number means tighter binding) Operators Precedence Associativity - ** 1 Right - - ! ~ 2 Prefix unary - * / % 3 Left - + - 4 Left - & 5 Left - ^ 6 Left + ** 14 Right + - ! ~ 12 Prefix (unary) + * / // % 11 Left + + - 10 Left + & 9 Left + ^ 8 Left | 7 Left - < > <= >= 8 Nonassociative - == != 9 Nonassociative - && 10 Left (short-circuiting) - ^^ 11 Left - || 12 Left (short-circuiting) - = 13 Right (also += -= *= /= %= **= &= ^= |=) + < > <= >= 6 Nonassociative + == != 5 Nonassociative + && 4 Left (short-circuiting) + ^^ 3 Left + || 2 Left (short-circuiting) + = 1 Right (also += -= *= /= %= **= &= ^= |=) break and continue get parsed to calls to the __break() and __continue() diff --git a/genops.js b/genops.js new file mode 100755 index 0000000..33b688d --- /dev/null +++ b/genops.js @@ -0,0 +1,88 @@ +#!/usr/bin/env node +const fs=require("fs"); + + +function print(/*arguments*/){ + //console.log.apply(console,arguments); + process.stdout.write.apply(process.stdout,arguments); +} + +function pad(s,w,c){ + s=s+""; + if(s.length>=Math.abs(w))return s; + if(w<0)return s+Array(-w-s.length+1).join(c?c:" "); + else return Array(w-s.length+1).join(c?c:" ")+s; +} + + +function readopmap(fname){ + let table=String(fs.readFileSync(fname)).split("\n"); + let i; + for(i=0;i<table.length;i++){ + if(/Oper.*Prec.*Assoc/.test(table[i]))break; + } + let header=table[i]; + table=table.slice(i+1); + for(i=0;i<table.length;i++){ + if(table[i].length==0)break; + } + table=table.slice(0,i); + + let opsidx=header.indexOf("Oper"), + precidx=header.indexOf("Prec"), + associdx=header.indexOf("Assoc"); + + let opmap={}; + + for(let row of table){ + let ops=row.slice(opsidx,precidx).trim().split(" "), + prec=parseInt(row.slice(precidx,associdx).trim(),10), + assoc=row.slice(associdx).replace(/ *([^ ]+).*/,"$1"); + for(let op of ops){ + opmap[op]={prec,assoc}; + } + } + return opmap; +} + +function outputfunc(opmap,name,gen,padw){ + print("int "+name+"(const char *op){\n"); + print("\tswitch(op[0]){\n"); + + let firstchars={}; + for(let k in opmap){ + if(firstchars[k[0]])firstchars[k[0]].push(k); + else firstchars[k[0]]=[k]; + } + let arr=[]; + for(k in firstchars){ + arr.push([k,firstchars[k].sort()]); + } + arr=arr.sort(); + for(let tup of arr){ + let k=tup[0],ops=tup[1]; + print("\t\tcase '"+k+"': return "); + for(let op of ops){ + let cond=""; + for(j=1;j<op.length;j++)cond+="op["+j+"]=='"+op[j]+"'&&"; + cond+="op["+j+"]=='\\0'"; + print(cond+" ? "+pad(gen(opmap[op]),padw)+" : "); + } + print("-1;\n"); + } + + print("\t\tdefault: return -1;\n"); + print("\t}\n"); + print("}\n"); +} + + +const opmap=readopmap("LANGUAGE.txt"); +outputfunc(opmap,"precedence",o=>o.prec,2); +print("\n"); +const assocenum={ + "Prefix": "AS_PREFIX", "Suffix": "AS_SUFFIX", + "Left": "AS_LEFT", "Right": "AS_RIGHT", + "Nonassociative": "AS_NONASSOC" +}; +outputfunc(opmap,"associativity",o=>assocenum[o.assoc],-11); @@ -7,6 +7,115 @@ #include "parser.h" +typedef enum Tokentype{ + TT_NUM, + TT_STR, + TT_WORD, + TT_SYM +} Tokentype; + +typedef struct Token{ + const char *str; + int len; +} Token; + +Token nexttoken(const char **sourcep){ + const char *source=*sourcep; + while(isspace(*source))source++; + if(isdigit(*source)||(*source=='-'&&isdigit(source[1]))){ + char *endp; + strtod(source,&endp); + assert(endp!=source); + Token tok={source,endp-source}; + return tok; + } +} + + +int precedence(const char *op){ + switch(op[0]){ + case '!': return op[1]=='\0' ? 12 : op[1]=='='&&op[2]=='\0' ? 5 : -1; + case '%': return op[1]=='\0' ? 11 : -1; + case '&': return op[1]=='\0' ? 9 : op[1]=='&'&&op[2]=='\0' ? 4 : -1; + case '*': return op[1]=='\0' ? 11 : op[1]=='*'&&op[2]=='\0' ? 14 : -1; + case '+': return op[1]=='\0' ? 10 : -1; + case '-': return op[1]=='\0' ? 10 : -1; + case '/': return op[1]=='\0' ? 11 : op[1]=='/'&&op[2]=='\0' ? 11 : -1; + case '<': return op[1]=='\0' ? 6 : op[1]=='='&&op[2]=='\0' ? 6 : -1; + case '=': return op[1]=='\0' ? 1 : op[1]=='='&&op[2]=='\0' ? 5 : -1; + case '>': return op[1]=='\0' ? 6 : op[1]=='='&&op[2]=='\0' ? 6 : -1; + case '^': return op[1]=='\0' ? 8 : op[1]=='^'&&op[2]=='\0' ? 3 : -1; + case '|': return op[1]=='\0' ? 7 : op[1]=='|'&&op[2]=='\0' ? 2 : -1; + case '~': return op[1]=='\0' ? 12 : -1; + default: return -1; + } +} + +int associativity(const char *op){ + switch(op[0]){ + case '!': return op[1]=='\0' ? AS_PREFIX : op[1]=='='&&op[2]=='\0' ? AS_NONASSOC : -1; + case '%': return op[1]=='\0' ? AS_LEFT : -1; + case '&': return op[1]=='\0' ? AS_LEFT : op[1]=='&'&&op[2]=='\0' ? AS_LEFT : -1; + case '*': return op[1]=='\0' ? AS_LEFT : op[1]=='*'&&op[2]=='\0' ? AS_RIGHT : -1; + case '+': return op[1]=='\0' ? AS_LEFT : -1; + case '-': return op[1]=='\0' ? AS_LEFT : -1; + case '/': return op[1]=='\0' ? AS_LEFT : op[1]=='/'&&op[2]=='\0' ? AS_LEFT : -1; + case '<': return op[1]=='\0' ? AS_NONASSOC : op[1]=='='&&op[2]=='\0' ? AS_NONASSOC : -1; + case '=': return op[1]=='\0' ? AS_RIGHT : op[1]=='='&&op[2]=='\0' ? AS_NONASSOC : -1; + case '>': return op[1]=='\0' ? AS_NONASSOC : op[1]=='='&&op[2]=='\0' ? AS_NONASSOC : -1; + case '^': return op[1]=='\0' ? AS_LEFT : op[1]=='^'&&op[2]=='\0' ? AS_LEFT : -1; + case '|': return op[1]=='\0' ? AS_LEFT : op[1]=='|'&&op[2]=='\0' ? AS_LEFT : -1; + case '~': return op[1]=='\0' ? AS_PREFIX : -1; + default: return -1; + } +} + + +static bool parsecomment(const char *source,int *reslen){ + int cursor=0; + if(source[cursor]!='#')return false; + if(source[cursor+1]=='#'&&source[cursor+2]=='#'){ + cursor+=3; + while(source[cursor]&& + (source[cursor]!='#'||source[cursor+1]!='#'||source[cursor+2]!='#')){ + cursor++; + } + if(!source[cursor])return false; //unclosed block comment + cursor+=2; + } else { + while(source[cursor]&&source[cursor]!='\n')cursor++; + if(source[cursor])cursor++; + } + *reslen=cursor; + return true; +} + +static void parseintermediate(const char *source,int *reslen){ + int cursor=0; + bool acted; + do { + acted=false; + while(source[cursor]&&isspace(source[cursor])){ + cursor++; + acted=true; + } + int partlen; + if(parsecomment(source+cursor,&partlen)){ + cursor+=partlen; + acted=true; + } + } while(acted); + *reslen=cursor; +} + +static AST* parseexpr(const char *source,int *reslen,int minprec){ + ; +} + +static AST* parsestmt(const char *source,int *reslen){ + return parseexpr(source,reslen,0); +} + ASTblock* parse(const char *source){ ASTblock *bl=malloc(sizeof(ASTblock)); int sz=32; @@ -21,7 +130,7 @@ ASTblock* parse(const char *source){ bl->exprs=realloc(bl->exprs,sz*sizeof(AST*)); if(!bl->exprs)outofmem(); } - while(source[cursor]&isspace(source[cursor]))cursor++; + parseintermediate(source+cursor,&reslen); if(!source[cursor])break; AST *node=parsestmt(source+cursor,&reslen); if(!node){ @@ -34,15 +143,15 @@ ASTblock* parse(const char *source){ return bl; } -void ast_free(AST *ast){ - switch(ast->type){ - case AST_BLOCK:{ ASTblock *ast=ast; +void ast_free(AST *ast_){ + switch(ast_->type){ + case AST_BLOCK:{ ASTblock *ast=(ASTblock*)ast_; for(int i=0;i<ast->len;i++)if(ast->exprs[i])ast_free(ast->exprs[i]); free(ast->exprs); break; } - case AST_OP:{ ASTop *ast=ast; + case AST_OP:{ ASTop *ast=(ASTop*)ast_; if(ast->left)ast_free(ast->left); if(ast->right)ast_free(ast->right); break; @@ -51,35 +160,35 @@ void ast_free(AST *ast){ case AST_NUM: break; - case AST_STR:{ ASTstr *ast=ast; + case AST_STR:{ ASTstr *ast=(ASTstr*)ast_; if(ast->str)free(ast->str); break; } - case AST_VAR:{ ASTvar *ast=ast; + case AST_VAR:{ ASTvar *ast=(ASTvar*)ast_; if(ast->name)free(ast->name); break; } - case AST_CALL:{ ASTcall *ast=ast; + case AST_CALL:{ ASTcall *ast=(ASTcall*)ast_; if(ast->func)free(ast->func); for(int i=0;i<ast->nargs;i++)if(ast->args[i])ast_free(ast->args[i]); free(ast->args); break; } - case AST_IF:{ ASTif *ast=ast; + case AST_IF:{ ASTif *ast=(ASTif*)ast_; if(ast->cond)free(ast->cond); if(ast->thenb)free(ast->thenb); if(ast->elseb)free(ast->elseb); break; } - case AST_WHILE:{ ASTwhile *ast=ast; + case AST_WHILE:{ ASTwhile *ast=(ASTwhile*)ast_; if(ast->cond)free(ast->cond); if(ast->body)free(ast->body); break; } } - free(ast); + free(ast_); } @@ -77,5 +77,14 @@ typedef struct AST{ } AST; +typedef enum Associativity{ + AS_PREFIX, + AS_SUFFIX, + AS_LEFT, + AS_RIGHT, + AS_NONASSOC +} Associativity; + + ASTblock* parse(const char *source); void ast_free(AST *ast); |