summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortomsmeding <tom.smeding@gmail.com>2016-08-05 20:26:05 +0200
committertomsmeding <tom.smeding@gmail.com>2016-08-06 10:10:32 +0200
commitf67988fbfde6ad8a91466ef5d4227dcf9e5db6ce (patch)
treedb85d3936f717331c3eeed4fae43e5ed43324be9
parente6bb770a52980ef3d85c2d4b93fb240c026ce7f7 (diff)
Working preliminary version of parser
-rw-r--r--LANGUAGE.txt31
-rw-r--r--Makefile5
-rw-r--r--code.txt1
-rwxr-xr-xgenops.js94
-rw-r--r--main.c8
-rw-r--r--opfuncs.c127
-rw-r--r--opfuncs.h8
-rw-r--r--parser.c538
-rw-r--r--parser.h4
9 files changed, 677 insertions, 139 deletions
diff --git a/LANGUAGE.txt b/LANGUAGE.txt
index 0e7066b..3259182 100644
--- a/LANGUAGE.txt
+++ b/LANGUAGE.txt
@@ -2,20 +2,23 @@ Statements are terminated by ';'.
The usual infix expression rules apply, with the following precedence table:
(higher precedence number means tighter binding)
- Operators Precedence Associativity
- ** 14 Right
- - ! ~ 12 Prefix (unary)
- * / // % 11 Left
- + - 10 Left
- & 9 Left
- ^ 8 Left
- | 7 Left
- < > <= >= 6 Nonassociative
- == != 5 Nonassociative
- && 4 Left (short-circuiting)
- ^^ 3 Left
- || 2 Left (short-circuiting)
- = 1 Right (also += -= *= /= %= **= &= ^= |=)
+ Operators Precedence Associativity
+ = += -= *= 1 Right
+ /= //= %= **= 1 Right
+ &= ^= |= 1 Right
+ || 2 Left (short-circuiting)
+ ^^ 3 Left
+ && 4 Left (short-circuiting)
+ == != 5 Nonassociative
+ < > <= >= 6 Nonassociative
+ | 7 Left
+ ^ 8 Left
+ & 9 Left
+ + - 10 Left
+ * / // % 11 Left
+ (-) ! ~ 12 Prefix (unary)
+ (!) 13 Suffix (unary)
+ ** 14 Right
break and continue get parsed to calls to the __break() and __continue()
diff --git a/Makefile b/Makefile
index 55b2ef8..68912f8 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
CC = gcc
-CFLAGS = -Wall -Wextra -std=c11 -O2 -fwrapv
+CFLAGS = -Wall -Wextra -std=c11 -g -fwrapv
BIN = main
.PHONY: all clean remake
@@ -7,7 +7,8 @@ BIN = main
all: $(BIN)
clean:
- rm -f $(BIN) *.o *.dSYM
+ rm -f $(BIN) *.o
+ rm -rf *.dSYM
remake: clean all
diff --git a/code.txt b/code.txt
index 471fb1b..3b04c0f 100644
--- a/code.txt
+++ b/code.txt
@@ -1,2 +1,3 @@
a = 1;
b = 2;
+c = 1 + -x - 3 > -1;
diff --git a/genops.js b/genops.js
index 33b688d..9b88cbc 100755
--- a/genops.js
+++ b/genops.js
@@ -3,7 +3,6 @@ const fs=require("fs");
function print(/*arguments*/){
- //console.log.apply(console,arguments);
process.stdout.write.apply(process.stdout,arguments);
}
@@ -45,8 +44,12 @@ function readopmap(fname){
return opmap;
}
-function outputfunc(opmap,name,gen,padw){
- print("int "+name+"(const char *op){\n");
+function outputfunc(_ /*opmap,name,gen,padw,dolen,rettype,defval,checkend*/){
+ let opmap=_.opmap, name=_.name, gen=_.gen,
+ padw=_.padw, dolen=_.dolen, rettype=_.rettype,
+ defval=_.defval, checkend=_.checkend;
+ print("\n"+rettype+" "+name+"(const char *op"+(dolen?",const int len":"")+"){\n");
+ if(dolen)print("\tif(len<=0)return "+defval+";\n");
print("\tswitch(op[0]){\n");
let firstchars={};
@@ -56,33 +59,98 @@ function outputfunc(opmap,name,gen,padw){
}
let arr=[];
for(k in firstchars){
- arr.push([k,firstchars[k].sort()]);
+ arr.push([k,firstchars[k].sort(function(a,b){
+ return b.length-a.length;
+ })]);
}
arr=arr.sort();
for(let tup of arr){
let k=tup[0],ops=tup[1];
print("\t\tcase '"+k+"': return ");
+ let expr="";
for(let op of ops){
- let cond="";
- for(j=1;j<op.length;j++)cond+="op["+j+"]=='"+op[j]+"'&&";
- cond+="op["+j+"]=='\\0'";
- print(cond+" ? "+pad(gen(opmap[op]),padw)+" : ");
+ let cond;
+ if(dolen){
+ cond="len=="+op.length;
+ for(j=1;j<op.length;j++)cond+="&&op["+j+"]=='"+op[j]+"'";
+ } else {
+ cond="";
+ for(j=1;j<op.length;j++)cond+="op["+j+"]=='"+op[j]+"'&&";
+ if(checkend)cond+="!op["+j+"]";
+ else cond=cond.slice(0,-2);
+ if(cond=="")cond="true";
+ }
+ expr+=cond+"?"+pad(gen(op,opmap[op]),padw)+":";
}
- print("-1;\n");
+ expr+=defval;
+ expr=expr.replace(/true\?([^:]*):[^)]*/,"$1");
+ print(expr+";\n");
}
- print("\t\tdefault: return -1;\n");
+ print("\t\tdefault: return "+defval+";\n");
print("\t}\n");
print("}\n");
}
const opmap=readopmap("LANGUAGE.txt");
-outputfunc(opmap,"precedence",o=>o.prec,2);
-print("\n");
const assocenum={
"Prefix": "AS_PREFIX", "Suffix": "AS_SUFFIX",
"Left": "AS_LEFT", "Right": "AS_RIGHT",
"Nonassociative": "AS_NONASSOC"
};
-outputfunc(opmap,"associativity",o=>assocenum[o.assoc],-11);
+
+print("#include <stddef.h>\n\n");
+print("#include \"opfuncs.h\"\n");
+print("#include \"parser.h\"\n");
+
+function alsolen(_){
+ outputfunc(_);
+ _.name+="_len";
+ _.dolen=true;
+ outputfunc(_);
+}
+
+alsolen({
+ opmap:opmap,
+ name:"precedence",
+ gen:(op,o)=>o.prec,
+ padw:2,
+ dolen:false,
+ rettype:"int",
+ defval:"-1",
+ checkend:true
+});
+
+alsolen({
+ opmap:opmap,
+ name:"associativity",
+ gen:(op,o)=>assocenum[o.assoc],
+ padw:-11,
+ dolen:false,
+ rettype:"int",
+ defval:"-1",
+ checkend:true
+});
+
+outputfunc({
+ opmap:opmap,
+ name:"parseoplength",
+ gen:(op,o)=>op.length,
+ padw:1,
+ dolen:false,
+ rettype:"int",
+ defval:"-1",
+ checkend:false
+});
+
+outputfunc({
+ opmap:opmap,
+ name:"opconststring_len",
+ gen:(op,o)=>'"'+op+'"',
+ padw:3,
+ dolen:true,
+ rettype:"const char*",
+ defval:"NULL",
+ checkend:true
+});
diff --git a/main.c b/main.c
index 6945950..b2f31dd 100644
--- a/main.c
+++ b/main.c
@@ -77,5 +77,11 @@ int main(int argc,char **argv){
return 1;
}
- ;
+ AST *ast=parse(source);
+ if(ast==NULL){
+ fprintf(stderr,"Parsing error!\n");
+ return 1;
+ }
+ ast_debug(stderr,ast);
+ ast_free(ast);
}
diff --git a/opfuncs.c b/opfuncs.c
new file mode 100644
index 0000000..a5607ae
--- /dev/null
+++ b/opfuncs.c
@@ -0,0 +1,127 @@
+#include <stddef.h>
+
+#include "opfuncs.h"
+#include "parser.h"
+
+int precedence(const char *op){
+ switch(op[0]){
+ case '!': return op[1]=='='&&!op[2]? 5:!op[1]?12:-1;
+ case '%': return op[1]=='='&&!op[2]? 1:!op[1]?11:-1;
+ case '&': return op[1]=='='&&!op[2]? 1:op[1]=='&'&&!op[2]? 4:!op[1]? 9:-1;
+ case '(': return op[1]=='-'&&op[2]==')'&&!op[3]?12:op[1]=='!'&&op[2]==')'&&!op[3]?13:-1;
+ case '*': return op[1]=='*'&&op[2]=='='&&!op[3]? 1:op[1]=='='&&!op[2]? 1:op[1]=='*'&&!op[2]?14:!op[1]?11:-1;
+ case '+': return op[1]=='='&&!op[2]? 1:!op[1]?10:-1;
+ case '-': return op[1]=='='&&!op[2]? 1:!op[1]?10:-1;
+ case '/': return op[1]=='/'&&op[2]=='='&&!op[3]? 1:op[1]=='='&&!op[2]? 1:op[1]=='/'&&!op[2]?11:!op[1]?11:-1;
+ case '<': return op[1]=='='&&!op[2]? 6:!op[1]? 6:-1;
+ case '=': return op[1]=='='&&!op[2]? 5:!op[1]? 1:-1;
+ case '>': return op[1]=='='&&!op[2]? 6:!op[1]? 6:-1;
+ case '^': return op[1]=='='&&!op[2]? 1:op[1]=='^'&&!op[2]? 3:!op[1]? 8:-1;
+ case '|': return op[1]=='='&&!op[2]? 1:op[1]=='|'&&!op[2]? 2:!op[1]? 7:-1;
+ case '~': return !op[1]?12:-1;
+ default: return -1;
+ }
+}
+
+int precedence_len(const char *op,const int len){
+ if(len<=0)return -1;
+ switch(op[0]){
+ case '!': return len==2&&op[1]=='='? 5:len==1?12:-1;
+ case '%': return len==2&&op[1]=='='? 1:len==1?11:-1;
+ case '&': return len==2&&op[1]=='='? 1:len==2&&op[1]=='&'? 4:len==1? 9:-1;
+ case '(': return len==3&&op[1]=='-'&&op[2]==')'?12:len==3&&op[1]=='!'&&op[2]==')'?13:-1;
+ case '*': return len==3&&op[1]=='*'&&op[2]=='='? 1:len==2&&op[1]=='='? 1:len==2&&op[1]=='*'?14:len==1?11:-1;
+ case '+': return len==2&&op[1]=='='? 1:len==1?10:-1;
+ case '-': return len==2&&op[1]=='='? 1:len==1?10:-1;
+ case '/': return len==3&&op[1]=='/'&&op[2]=='='? 1:len==2&&op[1]=='='? 1:len==2&&op[1]=='/'?11:len==1?11:-1;
+ case '<': return len==2&&op[1]=='='? 6:len==1? 6:-1;
+ case '=': return len==2&&op[1]=='='? 5:len==1? 1:-1;
+ case '>': return len==2&&op[1]=='='? 6:len==1? 6:-1;
+ case '^': return len==2&&op[1]=='='? 1:len==2&&op[1]=='^'? 3:len==1? 8:-1;
+ case '|': return len==2&&op[1]=='='? 1:len==2&&op[1]=='|'? 2:len==1? 7:-1;
+ case '~': return len==1?12:-1;
+ default: return -1;
+ }
+}
+
+int associativity(const char *op){
+ switch(op[0]){
+ case '!': return op[1]=='='&&!op[2]?AS_NONASSOC:!op[1]?AS_PREFIX :-1;
+ case '%': return op[1]=='='&&!op[2]?AS_RIGHT :!op[1]?AS_LEFT :-1;
+ case '&': return op[1]=='='&&!op[2]?AS_RIGHT :op[1]=='&'&&!op[2]?AS_LEFT :!op[1]?AS_LEFT :-1;
+ case '(': return op[1]=='-'&&op[2]==')'&&!op[3]?AS_PREFIX :op[1]=='!'&&op[2]==')'&&!op[3]?AS_SUFFIX :-1;
+ case '*': return op[1]=='*'&&op[2]=='='&&!op[3]?AS_RIGHT :op[1]=='='&&!op[2]?AS_RIGHT :op[1]=='*'&&!op[2]?AS_RIGHT :!op[1]?AS_LEFT :-1;
+ case '+': return op[1]=='='&&!op[2]?AS_RIGHT :!op[1]?AS_LEFT :-1;
+ case '-': return op[1]=='='&&!op[2]?AS_RIGHT :!op[1]?AS_LEFT :-1;
+ case '/': return op[1]=='/'&&op[2]=='='&&!op[3]?AS_RIGHT :op[1]=='='&&!op[2]?AS_RIGHT :op[1]=='/'&&!op[2]?AS_LEFT :!op[1]?AS_LEFT :-1;
+ case '<': return op[1]=='='&&!op[2]?AS_NONASSOC:!op[1]?AS_NONASSOC:-1;
+ case '=': return op[1]=='='&&!op[2]?AS_NONASSOC:!op[1]?AS_RIGHT :-1;
+ case '>': return op[1]=='='&&!op[2]?AS_NONASSOC:!op[1]?AS_NONASSOC:-1;
+ case '^': return op[1]=='='&&!op[2]?AS_RIGHT :op[1]=='^'&&!op[2]?AS_LEFT :!op[1]?AS_LEFT :-1;
+ case '|': return op[1]=='='&&!op[2]?AS_RIGHT :op[1]=='|'&&!op[2]?AS_LEFT :!op[1]?AS_LEFT :-1;
+ case '~': return !op[1]?AS_PREFIX :-1;
+ default: return -1;
+ }
+}
+
+int associativity_len(const char *op,const int len){
+ if(len<=0)return -1;
+ switch(op[0]){
+ case '!': return len==2&&op[1]=='='?AS_NONASSOC:len==1?AS_PREFIX :-1;
+ case '%': return len==2&&op[1]=='='?AS_RIGHT :len==1?AS_LEFT :-1;
+ case '&': return len==2&&op[1]=='='?AS_RIGHT :len==2&&op[1]=='&'?AS_LEFT :len==1?AS_LEFT :-1;
+ case '(': return len==3&&op[1]=='-'&&op[2]==')'?AS_PREFIX :len==3&&op[1]=='!'&&op[2]==')'?AS_SUFFIX :-1;
+ case '*': return len==3&&op[1]=='*'&&op[2]=='='?AS_RIGHT :len==2&&op[1]=='='?AS_RIGHT :len==2&&op[1]=='*'?AS_RIGHT :len==1?AS_LEFT :-1;
+ case '+': return len==2&&op[1]=='='?AS_RIGHT :len==1?AS_LEFT :-1;
+ case '-': return len==2&&op[1]=='='?AS_RIGHT :len==1?AS_LEFT :-1;
+ case '/': return len==3&&op[1]=='/'&&op[2]=='='?AS_RIGHT :len==2&&op[1]=='='?AS_RIGHT :len==2&&op[1]=='/'?AS_LEFT :len==1?AS_LEFT :-1;
+ case '<': return len==2&&op[1]=='='?AS_NONASSOC:len==1?AS_NONASSOC:-1;
+ case '=': return len==2&&op[1]=='='?AS_NONASSOC:len==1?AS_RIGHT :-1;
+ case '>': return len==2&&op[1]=='='?AS_NONASSOC:len==1?AS_NONASSOC:-1;
+ case '^': return len==2&&op[1]=='='?AS_RIGHT :len==2&&op[1]=='^'?AS_LEFT :len==1?AS_LEFT :-1;
+ case '|': return len==2&&op[1]=='='?AS_RIGHT :len==2&&op[1]=='|'?AS_LEFT :len==1?AS_LEFT :-1;
+ case '~': return len==1?AS_PREFIX :-1;
+ default: return -1;
+ }
+}
+
+int parseoplength(const char *op){
+ switch(op[0]){
+ case '!': return op[1]=='='?2:1;
+ case '%': return op[1]=='='?2:1;
+ case '&': return op[1]=='='?2:op[1]=='&'?2:1;
+ case '(': return op[1]=='-'&&op[2]==')'?3:op[1]=='!'&&op[2]==')'?3:-1;
+ case '*': return op[1]=='*'&&op[2]=='='?3:op[1]=='='?2:op[1]=='*'?2:1;
+ case '+': return op[1]=='='?2:1;
+ case '-': return op[1]=='='?2:1;
+ case '/': return op[1]=='/'&&op[2]=='='?3:op[1]=='='?2:op[1]=='/'?2:1;
+ case '<': return op[1]=='='?2:1;
+ case '=': return op[1]=='='?2:1;
+ case '>': return op[1]=='='?2:1;
+ case '^': return op[1]=='='?2:op[1]=='^'?2:1;
+ case '|': return op[1]=='='?2:op[1]=='|'?2:1;
+ case '~': return 1;
+ default: return -1;
+ }
+}
+
+const char* opconststring_len(const char *op,const int len){
+ if(len<=0)return NULL;
+ switch(op[0]){
+ case '!': return len==2&&op[1]=='='?"!=":len==1?"!":NULL;
+ case '%': return len==2&&op[1]=='='?"%=":len==1?"%":NULL;
+ case '&': return len==2&&op[1]=='='?"&=":len==2&&op[1]=='&'?"&&":len==1?"&":NULL;
+ case '(': return len==3&&op[1]=='-'&&op[2]==')'?"(-)":len==3&&op[1]=='!'&&op[2]==')'?"(!)":NULL;
+ case '*': return len==3&&op[1]=='*'&&op[2]=='='?"**=":len==2&&op[1]=='='?"*=":len==2&&op[1]=='*'?"**":len==1?"*":NULL;
+ case '+': return len==2&&op[1]=='='?"+=":len==1?"+":NULL;
+ case '-': return len==2&&op[1]=='='?"-=":len==1?"-":NULL;
+ case '/': return len==3&&op[1]=='/'&&op[2]=='='?"//=":len==2&&op[1]=='='?"/=":len==2&&op[1]=='/'?"//":len==1?"/":NULL;
+ case '<': return len==2&&op[1]=='='?"<=":len==1?"<":NULL;
+ case '=': return len==2&&op[1]=='='?"==":len==1?"=":NULL;
+ case '>': return len==2&&op[1]=='='?">=":len==1?">":NULL;
+ case '^': return len==2&&op[1]=='='?"^=":len==2&&op[1]=='^'?"^^":len==1?"^":NULL;
+ case '|': return len==2&&op[1]=='='?"|=":len==2&&op[1]=='|'?"||":len==1?"|":NULL;
+ case '~': return len==1?"~":NULL;
+ default: return NULL;
+ }
+}
diff --git a/opfuncs.h b/opfuncs.h
new file mode 100644
index 0000000..a5e212c
--- /dev/null
+++ b/opfuncs.h
@@ -0,0 +1,8 @@
+#pragma once
+
+int precedence(const char *op);
+int precedence_len(const char *op,const int len);
+int associativity(const char *op);
+int associativity_len(const char *op,const int len);
+int parseoplength(const char *op);
+const char* opconststring_len(const char *op,const int len);
diff --git a/parser.c b/parser.c
index 96b9bf7..14fd47d 100644
--- a/parser.c
+++ b/parser.c
@@ -2,193 +2,515 @@
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
+#include <assert.h>
#include "memory.h"
+#include "opfuncs.h"
#include "parser.h"
+#define NOT_IMPLEMENTED false
+
+
+static bool ishexdigit(char c){
+ return (c>='0'&&c<='9')||(c>='a'&&c<='f')||(c>='A'&&c<='F');
+}
+
+static int hexnumber(char c){
+ return c<='9'?c-'0':(c&~('a'-'A'))-'A'+10;
+}
+
+static char hexencode(int n){
+ return n<10?n+'0':n+'a';
+}
+
+
typedef enum Tokentype{
TT_NUM,
TT_STR,
TT_WORD,
- TT_SYM
+ TT_OP,
+ TT_SYM, //all symbols that are not operators
+ TT_ENDSTMT,
+ TT_EOF,
+
+ TT_ERR=-1
} Tokentype;
typedef struct Token{
- const char *str;
+ Tokentype type;
+ const char *str; //Part of another string; not null-terminated, and do not free
int len;
} Token;
-Token nexttoken(const char **sourcep){
+
+static bool parsecomment(const char **sourcep){
+ const char *source=*sourcep;
+ if(*source!='#')return false;
+ if(source[1]=='#'&&source[2]=='#'){
+ source+=3;
+ while(*source&&
+ (*source!='#'||source[1]!='#'||source[2]!='#')){
+ source++;
+ }
+ if(!*source)return false; //unclosed block comment
+ source+=2;
+ } else {
+ while(*source&&*source!='\n')source++;
+ if(*source)source++;
+ }
+ *sourcep=source;
+ return true;
+}
+
+static void skipintermediate(const char **sourcep){
+ const char *source=*sourcep;
+ bool acted;
+ do {
+ acted=false;
+ while(isspace(*source)){
+ source++;
+ acted=true;
+ }
+ if(parsecomment(&source)){
+ acted=true;
+ }
+ } while(acted);
+ *sourcep=source;
+}
+
+static Token nexttoken(const char **sourcep){
+ skipintermediate(sourcep);
const char *source=*sourcep;
- while(isspace(*source))source++;
+ if(*source=='\0'){
+ Token tok={TT_EOF,NULL,-1};
+ return tok;
+ }
+ if(*source==';'){
+ Token tok={TT_ENDSTMT,source,1};
+ (*sourcep)++;
+ return tok;
+ }
if(isdigit(*source)||(*source=='-'&&isdigit(source[1]))){
char *endp;
strtod(source,&endp);
assert(endp!=source);
- Token tok={source,endp-source};
+ Token tok={TT_NUM,source,endp-source};
+ *sourcep=endp;
return tok;
}
-}
-
-
-int precedence(const char *op){
- switch(op[0]){
- case '!': return op[1]=='\0' ? 12 : op[1]=='='&&op[2]=='\0' ? 5 : -1;
- case '%': return op[1]=='\0' ? 11 : -1;
- case '&': return op[1]=='\0' ? 9 : op[1]=='&'&&op[2]=='\0' ? 4 : -1;
- case '*': return op[1]=='\0' ? 11 : op[1]=='*'&&op[2]=='\0' ? 14 : -1;
- case '+': return op[1]=='\0' ? 10 : -1;
- case '-': return op[1]=='\0' ? 10 : -1;
- case '/': return op[1]=='\0' ? 11 : op[1]=='/'&&op[2]=='\0' ? 11 : -1;
- case '<': return op[1]=='\0' ? 6 : op[1]=='='&&op[2]=='\0' ? 6 : -1;
- case '=': return op[1]=='\0' ? 1 : op[1]=='='&&op[2]=='\0' ? 5 : -1;
- case '>': return op[1]=='\0' ? 6 : op[1]=='='&&op[2]=='\0' ? 6 : -1;
- case '^': return op[1]=='\0' ? 8 : op[1]=='^'&&op[2]=='\0' ? 3 : -1;
- case '|': return op[1]=='\0' ? 7 : op[1]=='|'&&op[2]=='\0' ? 2 : -1;
- case '~': return op[1]=='\0' ? 12 : -1;
- default: return -1;
+ if(*source=='"'){
+ int i;
+ for(i=1;source[i]&&source[i]!='"';i++){
+ if(source[i]=='\\')i++;
+ }
+ if(!source[i]){
+ Token tok={TT_ERR,"Non-terminated string",21};
+ return tok;
+ }
+ *sourcep+=i+1;
+ Token tok={TT_STR,source,i+1};
+ return tok;
}
-}
-
-int associativity(const char *op){
- switch(op[0]){
- case '!': return op[1]=='\0' ? AS_PREFIX : op[1]=='='&&op[2]=='\0' ? AS_NONASSOC : -1;
- case '%': return op[1]=='\0' ? AS_LEFT : -1;
- case '&': return op[1]=='\0' ? AS_LEFT : op[1]=='&'&&op[2]=='\0' ? AS_LEFT : -1;
- case '*': return op[1]=='\0' ? AS_LEFT : op[1]=='*'&&op[2]=='\0' ? AS_RIGHT : -1;
- case '+': return op[1]=='\0' ? AS_LEFT : -1;
- case '-': return op[1]=='\0' ? AS_LEFT : -1;
- case '/': return op[1]=='\0' ? AS_LEFT : op[1]=='/'&&op[2]=='\0' ? AS_LEFT : -1;
- case '<': return op[1]=='\0' ? AS_NONASSOC : op[1]=='='&&op[2]=='\0' ? AS_NONASSOC : -1;
- case '=': return op[1]=='\0' ? AS_RIGHT : op[1]=='='&&op[2]=='\0' ? AS_NONASSOC : -1;
- case '>': return op[1]=='\0' ? AS_NONASSOC : op[1]=='='&&op[2]=='\0' ? AS_NONASSOC : -1;
- case '^': return op[1]=='\0' ? AS_LEFT : op[1]=='^'&&op[2]=='\0' ? AS_LEFT : -1;
- case '|': return op[1]=='\0' ? AS_LEFT : op[1]=='|'&&op[2]=='\0' ? AS_LEFT : -1;
- case '~': return op[1]=='\0' ? AS_PREFIX : -1;
- default: return -1;
+ int oplen=parseoplength(source);
+ if(oplen!=-1){
+ Token tok={TT_OP,source,oplen};
+ *sourcep+=oplen;
+ return tok;
+ }
+ if(strchr("(){}",*source)!=NULL){
+ Token tok={TT_SYM,source,1};
+ (*sourcep)++;
+ return tok;
+ }
+ if(isalpha(*source)||*source=='_'){
+ int i;
+ for(i=1;source[i];i++){
+ if(!isalpha(source[i])&&!isdigit(source[i])&&source[i]!='_')break;
+ }
+ Token tok={TT_WORD,source,i};
+ *sourcep+=i;
+ return tok;
}
+ Token tok={TT_ERR,"Unrecognised token",18};
+ return tok;
}
-static bool parsecomment(const char *source,int *reslen){
- int cursor=0;
- if(source[cursor]!='#')return false;
- if(source[cursor+1]=='#'&&source[cursor+2]=='#'){
- cursor+=3;
- while(source[cursor]&&
- (source[cursor]!='#'||source[cursor+1]!='#'||source[cursor+2]!='#')){
- cursor++;
- }
- if(!source[cursor])return false; //unclosed block comment
- cursor+=2;
+static void printtoken(FILE *stream,Token tok,const char *msg){
+ const char *type;
+ switch(tok.type){
+ case TT_NUM: type="TT_NUM"; break;
+ case TT_STR: type="TT_STR"; break;
+ case TT_WORD: type="TT_WORD"; break;
+ case TT_OP: type="TT_OP"; break;
+ case TT_SYM: type="TT_SYM"; break;
+ case TT_ENDSTMT: type="TT_ENDSTMT"; break;
+ case TT_EOF: type="TT_EOF"; break;
+ case TT_ERR: type="TT_ERR"; break;
+ default: type="TT_(??\?)"; break; //TRIGRAPHS ._.
+ }
+ if(tok.len!=-1){
+ char buf[tok.len+1];
+ memcpy(buf,tok.str,tok.len);
+ buf[tok.len]='\0';
+ fprintf(stream,"(%s) Token: %s '%s'\n",msg,type,buf);
} else {
- while(source[cursor]&&source[cursor]!='\n')cursor++;
- if(source[cursor])cursor++;
+ fprintf(stream,"(%s) Token: %s (null)\n",msg,type);
}
- *reslen=cursor;
- return true;
}
-static void parseintermediate(const char *source,int *reslen){
- int cursor=0;
- bool acted;
- do {
- acted=false;
- while(source[cursor]&&isspace(source[cursor])){
- cursor++;
- acted=true;
+
+static AST* parseterm(const char *source,int *reslen){
+ const char *origsource=source;
+ const Token tok=nexttoken(&source);
+ printtoken(stderr,tok,"parseterm");
+ AST *node;
+ switch(tok.type){
+ case TT_NUM:{
+ node=malloc(sizeof(AST));
+ if(!node)outofmem();
+ node->type=AST_NUM;
+ char *endp;
+ int intv=strtol(tok.str,&endp,0);
+ node->n.isint=endp-tok.str==tok.len;
+ if(node->n.isint)node->n.i=intv;
+ else node->n.d=strtod(tok.str,NULL);
+ break;
}
- int partlen;
- if(parsecomment(source+cursor,&partlen)){
- cursor+=partlen;
- acted=true;
+
+ case TT_STR:{
+ int slen=0;
+ for(int i=1;i<tok.len-1;i++){
+ slen++;
+ if(tok.str[i]!='\\')continue;
+ i++;
+ if(tok.str[i]=='x'){
+ if(i+2>=tok.len-1||!ishexdigit(tok.str[i+1])||!ishexdigit(tok.str[i+2])){
+ return NULL;
+ }
+ i+=2;
+ } else {
+ i++;
+ }
+ }
+ node=malloc(sizeof(AST));
+ if(!node)outofmem();
+ node->type=AST_STR;
+ node->s.str=malloc(slen+1);
+ if(!node->s.str)outofmem();
+ int j=0;
+ for(int i=1;i<tok.len-1;i++){
+ if(tok.str[i]!='\\'){
+ node->s.str[j++]=tok.str[i];
+ continue;
+ }
+ i++;
+ switch(tok.str[i]){
+ case 'n': node->s.str[j++]='\n'; break;
+ case 'r': node->s.str[j++]='\r'; break;
+ case 't': node->s.str[j++]='\t'; break;
+ case 'b': node->s.str[j++]='\b'; break;
+ case 'a': node->s.str[j++]='\a'; break;
+ case 'x':
+ node->s.str[j++]=16*hexnumber(tok.str[i+1])+hexnumber(tok.str[i+2]);
+ i+=2;
+ break;
+ default:
+ node->s.str[j++]=tok.str[i];
+ break;
+ }
+ }
+ node->s.str[j]='\0';
+ break;
}
- } while(acted);
- *reslen=cursor;
+
+ case TT_WORD:{
+ if(tok.len==2&&memcmp(tok.str,"if",2)==0)assert(NOT_IMPLEMENTED);
+ if(tok.len==5&&memcmp(tok.str,"while",2)==0)assert(NOT_IMPLEMENTED);
+ const char *tempsource=source;
+ Token next=nexttoken(&source);
+ if(next.len==1&&next.str[0]=='(')assert(NOT_IMPLEMENTED);
+ source=tempsource;
+ node=malloc(sizeof(AST));
+ if(!node)outofmem();
+ node->type=AST_VAR;
+ node->v.name=malloc(tok.len+1);
+ if(!node->v.name)outofmem();
+ memcpy(node->v.name,tok.str,tok.len);
+ node->v.name[tok.len]='\0';
+ break;
+ }
+
+ case TT_SYM:
+ assert(NOT_IMPLEMENTED);
+ break;
+
+ case TT_OP:{
+ char buf[tok.len+3];
+ buf[0]='(';
+ memcpy(buf+1,tok.str,tok.len);
+ buf[tok.len+1]=')';
+ buf[tok.len+2]='\0';
+ if(associativity(buf)==AS_PREFIX){
+ node=malloc(sizeof(AST));
+ if(!node)outofmem();
+ node->type=AST_OP;
+ node->o.op=opconststring_len(buf,tok.len+2);
+ node->o.left=NULL;
+ int len;
+ node->o.right=parseterm(source,&len);
+ if(!node->o.right){
+ free(node);
+ return NULL;
+ }
+ source+=len;
+ } else return NULL;
+ break;
+ }
+
+ case TT_ENDSTMT:
+ case TT_EOF:
+ case TT_ERR:
+ return NULL;
+ }
+ *reslen=source-origsource;
+ return node;
}
+//Uses precedence climbing
static AST* parseexpr(const char *source,int *reslen,int minprec){
- ;
+ const char *origsource=source;
+ int len;
+ AST *tree=parseterm(source,&len);
+ if(!tree)return NULL;
+ source+=len;
+ while(true){
+ const char *beforeop=source;
+ Token tok=nexttoken(&source);
+ printtoken(stderr,tok,"parseEXPR");
+ if(tok.type==TT_ENDSTMT){
+ source=beforeop;
+ break;
+ }
+ if(tok.type!=TT_OP){
+ ast_free(tree);
+ return NULL;
+ }
+ int prec=precedence_len(tok.str,tok.len);
+ if(prec<minprec){
+ source=beforeop;
+ break;
+ }
+ Associativity assoc=associativity_len(tok.str,tok.len);
+ int q;
+ switch(assoc){
+ case AS_PREFIX: case AS_SUFFIX:
+ ast_free(tree);
+ return NULL;
+
+ case AS_LEFT: q=prec+1; break;
+ case AS_RIGHT: q=prec; break;
+ case AS_NONASSOC: q=prec+1; minprec=prec+1; break;
+
+ default: assert(false);
+ }
+ AST *right=parseexpr(source,&len,q);
+ if(!right){
+ ast_free(tree);
+ return NULL;
+ }
+ source+=len;
+ AST *opnode=malloc(sizeof(AST));
+ if(!opnode)outofmem();
+ opnode->type=AST_OP;
+ opnode->o.op=opconststring_len(tok.str,tok.len);
+ if(!opnode->o.op)outofmem();
+ opnode->o.left=tree;
+ opnode->o.right=right;
+ tree=opnode;
+ }
+ *reslen=source-origsource;
+ return tree;
}
static AST* parsestmt(const char *source,int *reslen){
return parseexpr(source,reslen,0);
}
-ASTblock* parse(const char *source){
- ASTblock *bl=malloc(sizeof(ASTblock));
+AST* parse(const char *source){
+ AST *bl=malloc(sizeof(AST));
+ if(!bl)outofmem();
+ bl->type=AST_BLOCK;
int sz=32;
- bl->len=0;
- bl->exprs=calloc(sz,sizeof(AST*));
- if(!bl->exprs)outofmem();
+ bl->b.len=0;
+ bl->b.exprs=calloc(sz,sizeof(AST*));
+ if(!bl->b.exprs)outofmem();
int reslen;
int cursor=0;
while(true){
- if(bl->len==sz){
+ if(bl->b.len==sz){
sz*=2;
- bl->exprs=realloc(bl->exprs,sz*sizeof(AST*));
- if(!bl->exprs)outofmem();
+ bl->b.exprs=realloc(bl->b.exprs,sz*sizeof(AST*));
+ if(!bl->b.exprs)outofmem();
}
- parseintermediate(source+cursor,&reslen);
- if(!source[cursor])break;
AST *node=parsestmt(source+cursor,&reslen);
if(!node){
- ast_free((AST*)bl);
+ ast_free(bl);
return NULL;
}
- bl->exprs[bl->len++]=node;
+ bl->b.exprs[bl->b.len++]=node;
cursor+=reslen;
+ const char *src=source+cursor;
+ Token tok=nexttoken(&src);
+ if(tok.type!=TT_ENDSTMT){
+ ast_free(bl);
+ return NULL;
+ }
+ cursor=src-source;
+ src=source+cursor;
+ tok=nexttoken(&src);
+ if(tok.type==TT_EOF)break;
}
return bl;
}
-void ast_free(AST *ast_){
- switch(ast_->type){
- case AST_BLOCK:{ ASTblock *ast=(ASTblock*)ast_;
- for(int i=0;i<ast->len;i++)if(ast->exprs[i])ast_free(ast->exprs[i]);
- free(ast->exprs);
+static const char* charblock(char c,int n){
+ static char *buf=NULL;
+ if(!buf)buf=malloc(n+1);
+ else buf=realloc(buf,n+1);
+ if(!buf)outofmem();
+ memset(buf,c,n);
+ buf[n]='\0';
+ return buf;
+}
+
+#define TABW (4)
+#define INDENT fprintf(stream,"%s",charblock(' ',TABW*indent));
+static void ast_debug_(FILE *stream,const AST *ast,int indent){
+ switch(ast->type){
+ case AST_BLOCK:
+ if(ast->b.len==0){
+ fprintf(stream,"{}");
+ break;
+ }
+ fprintf(stream,"{\n");
+ indent++;
+ for(int i=0;i<ast->b.len;i++){
+ INDENT
+ ast_debug_(stream,ast->b.exprs[i],indent);
+ fputc('\n',stream);
+ }
+ indent--;
+ INDENT
+ fprintf(stream,"}");
+ break;
+
+ case AST_OP:{
+ bool leftp=ast->o.left&&ast->o.left->type==AST_OP&&precedence(ast->o.left->o.op)<=precedence(ast->o.op);
+ bool rightp=ast->o.right&&ast->o.right->type==AST_OP&&precedence(ast->o.right->o.op)<=precedence(ast->o.op);
+ //fprintf(stderr,"[[op='%s' p=%d lp=%d rp=%d]]",ast->o.op,precedence(ast->o.op),leftp,rightp);
+ if(leftp)fputc('(',stream);
+ if(ast->o.left)ast_debug_(stream,ast->o.left,indent);
+ fprintf(stream,"%s%s%s",leftp?")":"",ast->o.op,rightp?"(":"");
+ if(ast->o.right)ast_debug_(stream,ast->o.right,indent);
+ if(rightp)fputc(')',stream);
+ break;
+ }
+
+ case AST_NUM:
+ if(ast->n.isint)fprintf(stream,"%lld",ast->n.i);
+ else fprintf(stream,"%g",ast->n.d);
+ break;
+
+ case AST_STR:
+ fputc('"',stream);
+ for(int i=0;i<ast->s.len;i++){
+ if(ast->s.str[i]<32||ast->s.str[i]>126){
+ fprintf(stream,"\\x%c%c",hexencode(ast->s.str[i]/16),hexencode(ast->s.str[i]%16));
+ } else fputc(ast->s.str[i],stream);
+ }
+ fputc('"',stream);
+ break;
+
+ case AST_VAR:
+ fprintf(stream,"%s",ast->v.name);
+ break;
+
+ case AST_CALL:
+ fprintf(stream,"%s(",ast->c.func);
+ for(int i=0;i<ast->c.nargs;i++){
+ if(i!=0)fputc(',',stream);
+ ast_debug_(stream,ast->c.args[i],indent);
+ }
+ fputc(')',stream);
+ break;
+
+ case AST_IF:
+ assert(NOT_IMPLEMENTED);
+ break;
+
+ case AST_WHILE:
+ assert(NOT_IMPLEMENTED);
+ break;
+
+ default:
+ fprintf(stream,"AST_(??\?)");
+ break;
+ }
+}
+
+void ast_debug(FILE *stream,const AST *ast){
+ ast_debug_(stream,ast,0);
+ fputc('\n',stream);
+}
+
+void ast_free(AST *ast){
+ switch(ast->type){
+ case AST_BLOCK:{
+ for(int i=0;i<ast->b.len;i++)if(ast->b.exprs[i])ast_free(ast->b.exprs[i]);
+ free(ast->b.exprs);
break;
}
- case AST_OP:{ ASTop *ast=(ASTop*)ast_;
- if(ast->left)ast_free(ast->left);
- if(ast->right)ast_free(ast->right);
+ case AST_OP:{
+ if(ast->o.left)ast_free(ast->o.left);
+ if(ast->o.right)ast_free(ast->o.right);
break;
}
case AST_NUM:
break;
- case AST_STR:{ ASTstr *ast=(ASTstr*)ast_;
- if(ast->str)free(ast->str);
+ case AST_STR:{
+ if(ast->s.str)free(ast->s.str);
break;
}
- case AST_VAR:{ ASTvar *ast=(ASTvar*)ast_;
- if(ast->name)free(ast->name);
+ case AST_VAR:{
+ if(ast->v.name)free(ast->v.name);
break;
}
- case AST_CALL:{ ASTcall *ast=(ASTcall*)ast_;
- if(ast->func)free(ast->func);
- for(int i=0;i<ast->nargs;i++)if(ast->args[i])ast_free(ast->args[i]);
- free(ast->args);
+ case AST_CALL:{
+ if(ast->c.func)free(ast->c.func);
+ for(int i=0;i<ast->c.nargs;i++)if(ast->c.args[i])ast_free(ast->c.args[i]);
+ free(ast->c.args);
break;
}
- case AST_IF:{ ASTif *ast=(ASTif*)ast_;
- if(ast->cond)free(ast->cond);
- if(ast->thenb)free(ast->thenb);
- if(ast->elseb)free(ast->elseb);
+ case AST_IF:{
+ if(ast->i.cond)free(ast->i.cond);
+ if(ast->i.thenb)free(ast->i.thenb);
+ if(ast->i.elseb)free(ast->i.elseb);
break;
}
- case AST_WHILE:{ ASTwhile *ast=(ASTwhile*)ast_;
- if(ast->cond)free(ast->cond);
- if(ast->body)free(ast->body);
+ case AST_WHILE:{
+ if(ast->w.cond)free(ast->w.cond);
+ if(ast->w.body)free(ast->w.body);
break;
}
}
- free(ast_);
+ free(ast);
}
diff --git a/parser.h b/parser.h
index 7d9b347..61d6dfe 100644
--- a/parser.h
+++ b/parser.h
@@ -1,5 +1,6 @@
#pragma once
+#include <stdio.h>
#include <stdbool.h>
#include <stdint.h>
@@ -86,5 +87,6 @@ typedef enum Associativity{
} Associativity;
-ASTblock* parse(const char *source);
+AST* parse(const char *source);
+void ast_debug(FILE *stream,const AST *ast);
void ast_free(AST *ast);