summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortomsmeding <tom.smeding@gmail.com>2016-08-04 21:53:16 +0200
committertomsmeding <tom.smeding@gmail.com>2016-08-04 21:53:16 +0200
commite6bb770a52980ef3d85c2d4b93fb240c026ce7f7 (patch)
tree3fbfbad81222598292a6aaf70c7b7bd1f32272f8
parentf83ea28ae6a04f1121b8328f7bdc5dad94628328 (diff)
Advance parser
-rw-r--r--LANGUAGE.txt26
-rwxr-xr-xgenops.js88
-rw-r--r--parser.c131
-rw-r--r--parser.h9
4 files changed, 230 insertions, 24 deletions
diff --git a/LANGUAGE.txt b/LANGUAGE.txt
index 9d72f08..0e7066b 100644
--- a/LANGUAGE.txt
+++ b/LANGUAGE.txt
@@ -1,21 +1,21 @@
Statements are terminated by ';'.
The usual infix expression rules apply, with the following precedence table:
-(lower precedence number means tighter binding)
+(higher precedence number means tighter binding)
Operators Precedence Associativity
- ** 1 Right
- - ! ~ 2 Prefix unary
- * / % 3 Left
- + - 4 Left
- & 5 Left
- ^ 6 Left
+ ** 14 Right
+ - ! ~ 12 Prefix (unary)
+ * / // % 11 Left
+ + - 10 Left
+ & 9 Left
+ ^ 8 Left
| 7 Left
- < > <= >= 8 Nonassociative
- == != 9 Nonassociative
- && 10 Left (short-circuiting)
- ^^ 11 Left
- || 12 Left (short-circuiting)
- = 13 Right (also += -= *= /= %= **= &= ^= |=)
+ < > <= >= 6 Nonassociative
+ == != 5 Nonassociative
+ && 4 Left (short-circuiting)
+ ^^ 3 Left
+ || 2 Left (short-circuiting)
+ = 1 Right (also += -= *= /= %= **= &= ^= |=)
break and continue get parsed to calls to the __break() and __continue()
diff --git a/genops.js b/genops.js
new file mode 100755
index 0000000..33b688d
--- /dev/null
+++ b/genops.js
@@ -0,0 +1,88 @@
+#!/usr/bin/env node
+const fs=require("fs");
+
+
+function print(/*arguments*/){
+ //console.log.apply(console,arguments);
+ process.stdout.write.apply(process.stdout,arguments);
+}
+
+function pad(s,w,c){
+ s=s+"";
+ if(s.length>=Math.abs(w))return s;
+ if(w<0)return s+Array(-w-s.length+1).join(c?c:" ");
+ else return Array(w-s.length+1).join(c?c:" ")+s;
+}
+
+
+function readopmap(fname){
+ let table=String(fs.readFileSync(fname)).split("\n");
+ let i;
+ for(i=0;i<table.length;i++){
+ if(/Oper.*Prec.*Assoc/.test(table[i]))break;
+ }
+ let header=table[i];
+ table=table.slice(i+1);
+ for(i=0;i<table.length;i++){
+ if(table[i].length==0)break;
+ }
+ table=table.slice(0,i);
+
+ let opsidx=header.indexOf("Oper"),
+ precidx=header.indexOf("Prec"),
+ associdx=header.indexOf("Assoc");
+
+ let opmap={};
+
+ for(let row of table){
+ let ops=row.slice(opsidx,precidx).trim().split(" "),
+ prec=parseInt(row.slice(precidx,associdx).trim(),10),
+ assoc=row.slice(associdx).replace(/ *([^ ]+).*/,"$1");
+ for(let op of ops){
+ opmap[op]={prec,assoc};
+ }
+ }
+ return opmap;
+}
+
+function outputfunc(opmap,name,gen,padw){
+ print("int "+name+"(const char *op){\n");
+ print("\tswitch(op[0]){\n");
+
+ let firstchars={};
+ for(let k in opmap){
+ if(firstchars[k[0]])firstchars[k[0]].push(k);
+ else firstchars[k[0]]=[k];
+ }
+ let arr=[];
+ for(k in firstchars){
+ arr.push([k,firstchars[k].sort()]);
+ }
+ arr=arr.sort();
+ for(let tup of arr){
+ let k=tup[0],ops=tup[1];
+ print("\t\tcase '"+k+"': return ");
+ for(let op of ops){
+ let cond="";
+ for(j=1;j<op.length;j++)cond+="op["+j+"]=='"+op[j]+"'&&";
+ cond+="op["+j+"]=='\\0'";
+ print(cond+" ? "+pad(gen(opmap[op]),padw)+" : ");
+ }
+ print("-1;\n");
+ }
+
+ print("\t\tdefault: return -1;\n");
+ print("\t}\n");
+ print("}\n");
+}
+
+
+const opmap=readopmap("LANGUAGE.txt");
+outputfunc(opmap,"precedence",o=>o.prec,2);
+print("\n");
+const assocenum={
+ "Prefix": "AS_PREFIX", "Suffix": "AS_SUFFIX",
+ "Left": "AS_LEFT", "Right": "AS_RIGHT",
+ "Nonassociative": "AS_NONASSOC"
+};
+outputfunc(opmap,"associativity",o=>assocenum[o.assoc],-11);
diff --git a/parser.c b/parser.c
index e099e98..96b9bf7 100644
--- a/parser.c
+++ b/parser.c
@@ -7,6 +7,115 @@
#include "parser.h"
+typedef enum Tokentype{
+ TT_NUM,
+ TT_STR,
+ TT_WORD,
+ TT_SYM
+} Tokentype;
+
+typedef struct Token{
+ const char *str;
+ int len;
+} Token;
+
+Token nexttoken(const char **sourcep){
+ const char *source=*sourcep;
+ while(isspace(*source))source++;
+ if(isdigit(*source)||(*source=='-'&&isdigit(source[1]))){
+ char *endp;
+ strtod(source,&endp);
+ assert(endp!=source);
+ Token tok={source,endp-source};
+ return tok;
+ }
+}
+
+
+int precedence(const char *op){
+ switch(op[0]){
+ case '!': return op[1]=='\0' ? 12 : op[1]=='='&&op[2]=='\0' ? 5 : -1;
+ case '%': return op[1]=='\0' ? 11 : -1;
+ case '&': return op[1]=='\0' ? 9 : op[1]=='&'&&op[2]=='\0' ? 4 : -1;
+ case '*': return op[1]=='\0' ? 11 : op[1]=='*'&&op[2]=='\0' ? 14 : -1;
+ case '+': return op[1]=='\0' ? 10 : -1;
+ case '-': return op[1]=='\0' ? 10 : -1;
+ case '/': return op[1]=='\0' ? 11 : op[1]=='/'&&op[2]=='\0' ? 11 : -1;
+ case '<': return op[1]=='\0' ? 6 : op[1]=='='&&op[2]=='\0' ? 6 : -1;
+ case '=': return op[1]=='\0' ? 1 : op[1]=='='&&op[2]=='\0' ? 5 : -1;
+ case '>': return op[1]=='\0' ? 6 : op[1]=='='&&op[2]=='\0' ? 6 : -1;
+ case '^': return op[1]=='\0' ? 8 : op[1]=='^'&&op[2]=='\0' ? 3 : -1;
+ case '|': return op[1]=='\0' ? 7 : op[1]=='|'&&op[2]=='\0' ? 2 : -1;
+ case '~': return op[1]=='\0' ? 12 : -1;
+ default: return -1;
+ }
+}
+
+int associativity(const char *op){
+ switch(op[0]){
+ case '!': return op[1]=='\0' ? AS_PREFIX : op[1]=='='&&op[2]=='\0' ? AS_NONASSOC : -1;
+ case '%': return op[1]=='\0' ? AS_LEFT : -1;
+ case '&': return op[1]=='\0' ? AS_LEFT : op[1]=='&'&&op[2]=='\0' ? AS_LEFT : -1;
+ case '*': return op[1]=='\0' ? AS_LEFT : op[1]=='*'&&op[2]=='\0' ? AS_RIGHT : -1;
+ case '+': return op[1]=='\0' ? AS_LEFT : -1;
+ case '-': return op[1]=='\0' ? AS_LEFT : -1;
+ case '/': return op[1]=='\0' ? AS_LEFT : op[1]=='/'&&op[2]=='\0' ? AS_LEFT : -1;
+ case '<': return op[1]=='\0' ? AS_NONASSOC : op[1]=='='&&op[2]=='\0' ? AS_NONASSOC : -1;
+ case '=': return op[1]=='\0' ? AS_RIGHT : op[1]=='='&&op[2]=='\0' ? AS_NONASSOC : -1;
+ case '>': return op[1]=='\0' ? AS_NONASSOC : op[1]=='='&&op[2]=='\0' ? AS_NONASSOC : -1;
+ case '^': return op[1]=='\0' ? AS_LEFT : op[1]=='^'&&op[2]=='\0' ? AS_LEFT : -1;
+ case '|': return op[1]=='\0' ? AS_LEFT : op[1]=='|'&&op[2]=='\0' ? AS_LEFT : -1;
+ case '~': return op[1]=='\0' ? AS_PREFIX : -1;
+ default: return -1;
+ }
+}
+
+
+static bool parsecomment(const char *source,int *reslen){
+ int cursor=0;
+ if(source[cursor]!='#')return false;
+ if(source[cursor+1]=='#'&&source[cursor+2]=='#'){
+ cursor+=3;
+ while(source[cursor]&&
+ (source[cursor]!='#'||source[cursor+1]!='#'||source[cursor+2]!='#')){
+ cursor++;
+ }
+ if(!source[cursor])return false; //unclosed block comment
+ cursor+=2;
+ } else {
+ while(source[cursor]&&source[cursor]!='\n')cursor++;
+ if(source[cursor])cursor++;
+ }
+ *reslen=cursor;
+ return true;
+}
+
+static void parseintermediate(const char *source,int *reslen){
+ int cursor=0;
+ bool acted;
+ do {
+ acted=false;
+ while(source[cursor]&&isspace(source[cursor])){
+ cursor++;
+ acted=true;
+ }
+ int partlen;
+ if(parsecomment(source+cursor,&partlen)){
+ cursor+=partlen;
+ acted=true;
+ }
+ } while(acted);
+ *reslen=cursor;
+}
+
+static AST* parseexpr(const char *source,int *reslen,int minprec){
+ ;
+}
+
+static AST* parsestmt(const char *source,int *reslen){
+ return parseexpr(source,reslen,0);
+}
+
ASTblock* parse(const char *source){
ASTblock *bl=malloc(sizeof(ASTblock));
int sz=32;
@@ -21,7 +130,7 @@ ASTblock* parse(const char *source){
bl->exprs=realloc(bl->exprs,sz*sizeof(AST*));
if(!bl->exprs)outofmem();
}
- while(source[cursor]&isspace(source[cursor]))cursor++;
+ parseintermediate(source+cursor,&reslen);
if(!source[cursor])break;
AST *node=parsestmt(source+cursor,&reslen);
if(!node){
@@ -34,15 +143,15 @@ ASTblock* parse(const char *source){
return bl;
}
-void ast_free(AST *ast){
- switch(ast->type){
- case AST_BLOCK:{ ASTblock *ast=ast;
+void ast_free(AST *ast_){
+ switch(ast_->type){
+ case AST_BLOCK:{ ASTblock *ast=(ASTblock*)ast_;
for(int i=0;i<ast->len;i++)if(ast->exprs[i])ast_free(ast->exprs[i]);
free(ast->exprs);
break;
}
- case AST_OP:{ ASTop *ast=ast;
+ case AST_OP:{ ASTop *ast=(ASTop*)ast_;
if(ast->left)ast_free(ast->left);
if(ast->right)ast_free(ast->right);
break;
@@ -51,35 +160,35 @@ void ast_free(AST *ast){
case AST_NUM:
break;
- case AST_STR:{ ASTstr *ast=ast;
+ case AST_STR:{ ASTstr *ast=(ASTstr*)ast_;
if(ast->str)free(ast->str);
break;
}
- case AST_VAR:{ ASTvar *ast=ast;
+ case AST_VAR:{ ASTvar *ast=(ASTvar*)ast_;
if(ast->name)free(ast->name);
break;
}
- case AST_CALL:{ ASTcall *ast=ast;
+ case AST_CALL:{ ASTcall *ast=(ASTcall*)ast_;
if(ast->func)free(ast->func);
for(int i=0;i<ast->nargs;i++)if(ast->args[i])ast_free(ast->args[i]);
free(ast->args);
break;
}
- case AST_IF:{ ASTif *ast=ast;
+ case AST_IF:{ ASTif *ast=(ASTif*)ast_;
if(ast->cond)free(ast->cond);
if(ast->thenb)free(ast->thenb);
if(ast->elseb)free(ast->elseb);
break;
}
- case AST_WHILE:{ ASTwhile *ast=ast;
+ case AST_WHILE:{ ASTwhile *ast=(ASTwhile*)ast_;
if(ast->cond)free(ast->cond);
if(ast->body)free(ast->body);
break;
}
}
- free(ast);
+ free(ast_);
}
diff --git a/parser.h b/parser.h
index a1b655f..7d9b347 100644
--- a/parser.h
+++ b/parser.h
@@ -77,5 +77,14 @@ typedef struct AST{
} AST;
+typedef enum Associativity{
+ AS_PREFIX,
+ AS_SUFFIX,
+ AS_LEFT,
+ AS_RIGHT,
+ AS_NONASSOC
+} Associativity;
+
+
ASTblock* parse(const char *source);
void ast_free(AST *ast);