From f67988fbfde6ad8a91466ef5d4227dcf9e5db6ce Mon Sep 17 00:00:00 2001 From: tomsmeding Date: Fri, 5 Aug 2016 20:26:05 +0200 Subject: Working preliminary version of parser --- LANGUAGE.txt | 31 ++-- Makefile | 5 +- code.txt | 1 + genops.js | 94 +++++++++-- main.c | 8 +- opfuncs.c | 127 ++++++++++++++ opfuncs.h | 8 + parser.c | 538 +++++++++++++++++++++++++++++++++++++++++++++++------------ parser.h | 4 +- 9 files changed, 677 insertions(+), 139 deletions(-) create mode 100644 opfuncs.c create mode 100644 opfuncs.h diff --git a/LANGUAGE.txt b/LANGUAGE.txt index 0e7066b..3259182 100644 --- a/LANGUAGE.txt +++ b/LANGUAGE.txt @@ -2,20 +2,23 @@ Statements are terminated by ';'. The usual infix expression rules apply, with the following precedence table: (higher precedence number means tighter binding) - Operators Precedence Associativity - ** 14 Right - - ! ~ 12 Prefix (unary) - * / // % 11 Left - + - 10 Left - & 9 Left - ^ 8 Left - | 7 Left - < > <= >= 6 Nonassociative - == != 5 Nonassociative - && 4 Left (short-circuiting) - ^^ 3 Left - || 2 Left (short-circuiting) - = 1 Right (also += -= *= /= %= **= &= ^= |=) + Operators Precedence Associativity + = += -= *= 1 Right + /= //= %= **= 1 Right + &= ^= |= 1 Right + || 2 Left (short-circuiting) + ^^ 3 Left + && 4 Left (short-circuiting) + == != 5 Nonassociative + < > <= >= 6 Nonassociative + | 7 Left + ^ 8 Left + & 9 Left + + - 10 Left + * / // % 11 Left + (-) ! ~ 12 Prefix (unary) + (!) 13 Suffix (unary) + ** 14 Right break and continue get parsed to calls to the __break() and __continue() diff --git a/Makefile b/Makefile index 55b2ef8..68912f8 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ CC = gcc -CFLAGS = -Wall -Wextra -std=c11 -O2 -fwrapv +CFLAGS = -Wall -Wextra -std=c11 -g -fwrapv BIN = main .PHONY: all clean remake @@ -7,7 +7,8 @@ BIN = main all: $(BIN) clean: - rm -f $(BIN) *.o *.dSYM + rm -f $(BIN) *.o + rm -rf *.dSYM remake: clean all diff --git a/code.txt b/code.txt index 471fb1b..3b04c0f 100644 --- a/code.txt +++ b/code.txt @@ -1,2 +1,3 @@ a = 1; b = 2; +c = 1 + -x - 3 > -1; diff --git a/genops.js b/genops.js index 33b688d..9b88cbc 100755 --- a/genops.js +++ b/genops.js @@ -3,7 +3,6 @@ const fs=require("fs"); function print(/*arguments*/){ - //console.log.apply(console,arguments); process.stdout.write.apply(process.stdout,arguments); } @@ -45,8 +44,12 @@ function readopmap(fname){ return opmap; } -function outputfunc(opmap,name,gen,padw){ - print("int "+name+"(const char *op){\n"); +function outputfunc(_ /*opmap,name,gen,padw,dolen,rettype,defval,checkend*/){ + let opmap=_.opmap, name=_.name, gen=_.gen, + padw=_.padw, dolen=_.dolen, rettype=_.rettype, + defval=_.defval, checkend=_.checkend; + print("\n"+rettype+" "+name+"(const char *op"+(dolen?",const int len":"")+"){\n"); + if(dolen)print("\tif(len<=0)return "+defval+";\n"); print("\tswitch(op[0]){\n"); let firstchars={}; @@ -56,33 +59,98 @@ function outputfunc(opmap,name,gen,padw){ } let arr=[]; for(k in firstchars){ - arr.push([k,firstchars[k].sort()]); + arr.push([k,firstchars[k].sort(function(a,b){ + return b.length-a.length; + })]); } arr=arr.sort(); for(let tup of arr){ let k=tup[0],ops=tup[1]; print("\t\tcase '"+k+"': return "); + let expr=""; for(let op of ops){ - let cond=""; - for(j=1;jo.prec,2); -print("\n"); const assocenum={ "Prefix": "AS_PREFIX", "Suffix": "AS_SUFFIX", "Left": "AS_LEFT", "Right": "AS_RIGHT", "Nonassociative": "AS_NONASSOC" }; -outputfunc(opmap,"associativity",o=>assocenum[o.assoc],-11); + +print("#include \n\n"); +print("#include \"opfuncs.h\"\n"); +print("#include \"parser.h\"\n"); + +function alsolen(_){ + outputfunc(_); + _.name+="_len"; + _.dolen=true; + outputfunc(_); +} + +alsolen({ + opmap:opmap, + name:"precedence", + gen:(op,o)=>o.prec, + padw:2, + dolen:false, + rettype:"int", + defval:"-1", + checkend:true +}); + +alsolen({ + opmap:opmap, + name:"associativity", + gen:(op,o)=>assocenum[o.assoc], + padw:-11, + dolen:false, + rettype:"int", + defval:"-1", + checkend:true +}); + +outputfunc({ + opmap:opmap, + name:"parseoplength", + gen:(op,o)=>op.length, + padw:1, + dolen:false, + rettype:"int", + defval:"-1", + checkend:false +}); + +outputfunc({ + opmap:opmap, + name:"opconststring_len", + gen:(op,o)=>'"'+op+'"', + padw:3, + dolen:true, + rettype:"const char*", + defval:"NULL", + checkend:true +}); diff --git a/main.c b/main.c index 6945950..b2f31dd 100644 --- a/main.c +++ b/main.c @@ -77,5 +77,11 @@ int main(int argc,char **argv){ return 1; } - ; + AST *ast=parse(source); + if(ast==NULL){ + fprintf(stderr,"Parsing error!\n"); + return 1; + } + ast_debug(stderr,ast); + ast_free(ast); } diff --git a/opfuncs.c b/opfuncs.c new file mode 100644 index 0000000..a5607ae --- /dev/null +++ b/opfuncs.c @@ -0,0 +1,127 @@ +#include + +#include "opfuncs.h" +#include "parser.h" + +int precedence(const char *op){ + switch(op[0]){ + case '!': return op[1]=='='&&!op[2]? 5:!op[1]?12:-1; + case '%': return op[1]=='='&&!op[2]? 1:!op[1]?11:-1; + case '&': return op[1]=='='&&!op[2]? 1:op[1]=='&'&&!op[2]? 4:!op[1]? 9:-1; + case '(': return op[1]=='-'&&op[2]==')'&&!op[3]?12:op[1]=='!'&&op[2]==')'&&!op[3]?13:-1; + case '*': return op[1]=='*'&&op[2]=='='&&!op[3]? 1:op[1]=='='&&!op[2]? 1:op[1]=='*'&&!op[2]?14:!op[1]?11:-1; + case '+': return op[1]=='='&&!op[2]? 1:!op[1]?10:-1; + case '-': return op[1]=='='&&!op[2]? 1:!op[1]?10:-1; + case '/': return op[1]=='/'&&op[2]=='='&&!op[3]? 1:op[1]=='='&&!op[2]? 1:op[1]=='/'&&!op[2]?11:!op[1]?11:-1; + case '<': return op[1]=='='&&!op[2]? 6:!op[1]? 6:-1; + case '=': return op[1]=='='&&!op[2]? 5:!op[1]? 1:-1; + case '>': return op[1]=='='&&!op[2]? 6:!op[1]? 6:-1; + case '^': return op[1]=='='&&!op[2]? 1:op[1]=='^'&&!op[2]? 3:!op[1]? 8:-1; + case '|': return op[1]=='='&&!op[2]? 1:op[1]=='|'&&!op[2]? 2:!op[1]? 7:-1; + case '~': return !op[1]?12:-1; + default: return -1; + } +} + +int precedence_len(const char *op,const int len){ + if(len<=0)return -1; + switch(op[0]){ + case '!': return len==2&&op[1]=='='? 5:len==1?12:-1; + case '%': return len==2&&op[1]=='='? 1:len==1?11:-1; + case '&': return len==2&&op[1]=='='? 1:len==2&&op[1]=='&'? 4:len==1? 9:-1; + case '(': return len==3&&op[1]=='-'&&op[2]==')'?12:len==3&&op[1]=='!'&&op[2]==')'?13:-1; + case '*': return len==3&&op[1]=='*'&&op[2]=='='? 1:len==2&&op[1]=='='? 1:len==2&&op[1]=='*'?14:len==1?11:-1; + case '+': return len==2&&op[1]=='='? 1:len==1?10:-1; + case '-': return len==2&&op[1]=='='? 1:len==1?10:-1; + case '/': return len==3&&op[1]=='/'&&op[2]=='='? 1:len==2&&op[1]=='='? 1:len==2&&op[1]=='/'?11:len==1?11:-1; + case '<': return len==2&&op[1]=='='? 6:len==1? 6:-1; + case '=': return len==2&&op[1]=='='? 5:len==1? 1:-1; + case '>': return len==2&&op[1]=='='? 6:len==1? 6:-1; + case '^': return len==2&&op[1]=='='? 1:len==2&&op[1]=='^'? 3:len==1? 8:-1; + case '|': return len==2&&op[1]=='='? 1:len==2&&op[1]=='|'? 2:len==1? 7:-1; + case '~': return len==1?12:-1; + default: return -1; + } +} + +int associativity(const char *op){ + switch(op[0]){ + case '!': return op[1]=='='&&!op[2]?AS_NONASSOC:!op[1]?AS_PREFIX :-1; + case '%': return op[1]=='='&&!op[2]?AS_RIGHT :!op[1]?AS_LEFT :-1; + case '&': return op[1]=='='&&!op[2]?AS_RIGHT :op[1]=='&'&&!op[2]?AS_LEFT :!op[1]?AS_LEFT :-1; + case '(': return op[1]=='-'&&op[2]==')'&&!op[3]?AS_PREFIX :op[1]=='!'&&op[2]==')'&&!op[3]?AS_SUFFIX :-1; + case '*': return op[1]=='*'&&op[2]=='='&&!op[3]?AS_RIGHT :op[1]=='='&&!op[2]?AS_RIGHT :op[1]=='*'&&!op[2]?AS_RIGHT :!op[1]?AS_LEFT :-1; + case '+': return op[1]=='='&&!op[2]?AS_RIGHT :!op[1]?AS_LEFT :-1; + case '-': return op[1]=='='&&!op[2]?AS_RIGHT :!op[1]?AS_LEFT :-1; + case '/': return op[1]=='/'&&op[2]=='='&&!op[3]?AS_RIGHT :op[1]=='='&&!op[2]?AS_RIGHT :op[1]=='/'&&!op[2]?AS_LEFT :!op[1]?AS_LEFT :-1; + case '<': return op[1]=='='&&!op[2]?AS_NONASSOC:!op[1]?AS_NONASSOC:-1; + case '=': return op[1]=='='&&!op[2]?AS_NONASSOC:!op[1]?AS_RIGHT :-1; + case '>': return op[1]=='='&&!op[2]?AS_NONASSOC:!op[1]?AS_NONASSOC:-1; + case '^': return op[1]=='='&&!op[2]?AS_RIGHT :op[1]=='^'&&!op[2]?AS_LEFT :!op[1]?AS_LEFT :-1; + case '|': return op[1]=='='&&!op[2]?AS_RIGHT :op[1]=='|'&&!op[2]?AS_LEFT :!op[1]?AS_LEFT :-1; + case '~': return !op[1]?AS_PREFIX :-1; + default: return -1; + } +} + +int associativity_len(const char *op,const int len){ + if(len<=0)return -1; + switch(op[0]){ + case '!': return len==2&&op[1]=='='?AS_NONASSOC:len==1?AS_PREFIX :-1; + case '%': return len==2&&op[1]=='='?AS_RIGHT :len==1?AS_LEFT :-1; + case '&': return len==2&&op[1]=='='?AS_RIGHT :len==2&&op[1]=='&'?AS_LEFT :len==1?AS_LEFT :-1; + case '(': return len==3&&op[1]=='-'&&op[2]==')'?AS_PREFIX :len==3&&op[1]=='!'&&op[2]==')'?AS_SUFFIX :-1; + case '*': return len==3&&op[1]=='*'&&op[2]=='='?AS_RIGHT :len==2&&op[1]=='='?AS_RIGHT :len==2&&op[1]=='*'?AS_RIGHT :len==1?AS_LEFT :-1; + case '+': return len==2&&op[1]=='='?AS_RIGHT :len==1?AS_LEFT :-1; + case '-': return len==2&&op[1]=='='?AS_RIGHT :len==1?AS_LEFT :-1; + case '/': return len==3&&op[1]=='/'&&op[2]=='='?AS_RIGHT :len==2&&op[1]=='='?AS_RIGHT :len==2&&op[1]=='/'?AS_LEFT :len==1?AS_LEFT :-1; + case '<': return len==2&&op[1]=='='?AS_NONASSOC:len==1?AS_NONASSOC:-1; + case '=': return len==2&&op[1]=='='?AS_NONASSOC:len==1?AS_RIGHT :-1; + case '>': return len==2&&op[1]=='='?AS_NONASSOC:len==1?AS_NONASSOC:-1; + case '^': return len==2&&op[1]=='='?AS_RIGHT :len==2&&op[1]=='^'?AS_LEFT :len==1?AS_LEFT :-1; + case '|': return len==2&&op[1]=='='?AS_RIGHT :len==2&&op[1]=='|'?AS_LEFT :len==1?AS_LEFT :-1; + case '~': return len==1?AS_PREFIX :-1; + default: return -1; + } +} + +int parseoplength(const char *op){ + switch(op[0]){ + case '!': return op[1]=='='?2:1; + case '%': return op[1]=='='?2:1; + case '&': return op[1]=='='?2:op[1]=='&'?2:1; + case '(': return op[1]=='-'&&op[2]==')'?3:op[1]=='!'&&op[2]==')'?3:-1; + case '*': return op[1]=='*'&&op[2]=='='?3:op[1]=='='?2:op[1]=='*'?2:1; + case '+': return op[1]=='='?2:1; + case '-': return op[1]=='='?2:1; + case '/': return op[1]=='/'&&op[2]=='='?3:op[1]=='='?2:op[1]=='/'?2:1; + case '<': return op[1]=='='?2:1; + case '=': return op[1]=='='?2:1; + case '>': return op[1]=='='?2:1; + case '^': return op[1]=='='?2:op[1]=='^'?2:1; + case '|': return op[1]=='='?2:op[1]=='|'?2:1; + case '~': return 1; + default: return -1; + } +} + +const char* opconststring_len(const char *op,const int len){ + if(len<=0)return NULL; + switch(op[0]){ + case '!': return len==2&&op[1]=='='?"!=":len==1?"!":NULL; + case '%': return len==2&&op[1]=='='?"%=":len==1?"%":NULL; + case '&': return len==2&&op[1]=='='?"&=":len==2&&op[1]=='&'?"&&":len==1?"&":NULL; + case '(': return len==3&&op[1]=='-'&&op[2]==')'?"(-)":len==3&&op[1]=='!'&&op[2]==')'?"(!)":NULL; + case '*': return len==3&&op[1]=='*'&&op[2]=='='?"**=":len==2&&op[1]=='='?"*=":len==2&&op[1]=='*'?"**":len==1?"*":NULL; + case '+': return len==2&&op[1]=='='?"+=":len==1?"+":NULL; + case '-': return len==2&&op[1]=='='?"-=":len==1?"-":NULL; + case '/': return len==3&&op[1]=='/'&&op[2]=='='?"//=":len==2&&op[1]=='='?"/=":len==2&&op[1]=='/'?"//":len==1?"/":NULL; + case '<': return len==2&&op[1]=='='?"<=":len==1?"<":NULL; + case '=': return len==2&&op[1]=='='?"==":len==1?"=":NULL; + case '>': return len==2&&op[1]=='='?">=":len==1?">":NULL; + case '^': return len==2&&op[1]=='='?"^=":len==2&&op[1]=='^'?"^^":len==1?"^":NULL; + case '|': return len==2&&op[1]=='='?"|=":len==2&&op[1]=='|'?"||":len==1?"|":NULL; + case '~': return len==1?"~":NULL; + default: return NULL; + } +} diff --git a/opfuncs.h b/opfuncs.h new file mode 100644 index 0000000..a5e212c --- /dev/null +++ b/opfuncs.h @@ -0,0 +1,8 @@ +#pragma once + +int precedence(const char *op); +int precedence_len(const char *op,const int len); +int associativity(const char *op); +int associativity_len(const char *op,const int len); +int parseoplength(const char *op); +const char* opconststring_len(const char *op,const int len); diff --git a/parser.c b/parser.c index 96b9bf7..14fd47d 100644 --- a/parser.c +++ b/parser.c @@ -2,193 +2,515 @@ #include #include #include +#include #include "memory.h" +#include "opfuncs.h" #include "parser.h" +#define NOT_IMPLEMENTED false + + +static bool ishexdigit(char c){ + return (c>='0'&&c<='9')||(c>='a'&&c<='f')||(c>='A'&&c<='F'); +} + +static int hexnumber(char c){ + return c<='9'?c-'0':(c&~('a'-'A'))-'A'+10; +} + +static char hexencode(int n){ + return n<10?n+'0':n+'a'; +} + + typedef enum Tokentype{ TT_NUM, TT_STR, TT_WORD, - TT_SYM + TT_OP, + TT_SYM, //all symbols that are not operators + TT_ENDSTMT, + TT_EOF, + + TT_ERR=-1 } Tokentype; typedef struct Token{ - const char *str; + Tokentype type; + const char *str; //Part of another string; not null-terminated, and do not free int len; } Token; -Token nexttoken(const char **sourcep){ + +static bool parsecomment(const char **sourcep){ + const char *source=*sourcep; + if(*source!='#')return false; + if(source[1]=='#'&&source[2]=='#'){ + source+=3; + while(*source&& + (*source!='#'||source[1]!='#'||source[2]!='#')){ + source++; + } + if(!*source)return false; //unclosed block comment + source+=2; + } else { + while(*source&&*source!='\n')source++; + if(*source)source++; + } + *sourcep=source; + return true; +} + +static void skipintermediate(const char **sourcep){ + const char *source=*sourcep; + bool acted; + do { + acted=false; + while(isspace(*source)){ + source++; + acted=true; + } + if(parsecomment(&source)){ + acted=true; + } + } while(acted); + *sourcep=source; +} + +static Token nexttoken(const char **sourcep){ + skipintermediate(sourcep); const char *source=*sourcep; - while(isspace(*source))source++; + if(*source=='\0'){ + Token tok={TT_EOF,NULL,-1}; + return tok; + } + if(*source==';'){ + Token tok={TT_ENDSTMT,source,1}; + (*sourcep)++; + return tok; + } if(isdigit(*source)||(*source=='-'&&isdigit(source[1]))){ char *endp; strtod(source,&endp); assert(endp!=source); - Token tok={source,endp-source}; + Token tok={TT_NUM,source,endp-source}; + *sourcep=endp; return tok; } -} - - -int precedence(const char *op){ - switch(op[0]){ - case '!': return op[1]=='\0' ? 12 : op[1]=='='&&op[2]=='\0' ? 5 : -1; - case '%': return op[1]=='\0' ? 11 : -1; - case '&': return op[1]=='\0' ? 9 : op[1]=='&'&&op[2]=='\0' ? 4 : -1; - case '*': return op[1]=='\0' ? 11 : op[1]=='*'&&op[2]=='\0' ? 14 : -1; - case '+': return op[1]=='\0' ? 10 : -1; - case '-': return op[1]=='\0' ? 10 : -1; - case '/': return op[1]=='\0' ? 11 : op[1]=='/'&&op[2]=='\0' ? 11 : -1; - case '<': return op[1]=='\0' ? 6 : op[1]=='='&&op[2]=='\0' ? 6 : -1; - case '=': return op[1]=='\0' ? 1 : op[1]=='='&&op[2]=='\0' ? 5 : -1; - case '>': return op[1]=='\0' ? 6 : op[1]=='='&&op[2]=='\0' ? 6 : -1; - case '^': return op[1]=='\0' ? 8 : op[1]=='^'&&op[2]=='\0' ? 3 : -1; - case '|': return op[1]=='\0' ? 7 : op[1]=='|'&&op[2]=='\0' ? 2 : -1; - case '~': return op[1]=='\0' ? 12 : -1; - default: return -1; + if(*source=='"'){ + int i; + for(i=1;source[i]&&source[i]!='"';i++){ + if(source[i]=='\\')i++; + } + if(!source[i]){ + Token tok={TT_ERR,"Non-terminated string",21}; + return tok; + } + *sourcep+=i+1; + Token tok={TT_STR,source,i+1}; + return tok; } -} - -int associativity(const char *op){ - switch(op[0]){ - case '!': return op[1]=='\0' ? AS_PREFIX : op[1]=='='&&op[2]=='\0' ? AS_NONASSOC : -1; - case '%': return op[1]=='\0' ? AS_LEFT : -1; - case '&': return op[1]=='\0' ? AS_LEFT : op[1]=='&'&&op[2]=='\0' ? AS_LEFT : -1; - case '*': return op[1]=='\0' ? AS_LEFT : op[1]=='*'&&op[2]=='\0' ? AS_RIGHT : -1; - case '+': return op[1]=='\0' ? AS_LEFT : -1; - case '-': return op[1]=='\0' ? AS_LEFT : -1; - case '/': return op[1]=='\0' ? AS_LEFT : op[1]=='/'&&op[2]=='\0' ? AS_LEFT : -1; - case '<': return op[1]=='\0' ? AS_NONASSOC : op[1]=='='&&op[2]=='\0' ? AS_NONASSOC : -1; - case '=': return op[1]=='\0' ? AS_RIGHT : op[1]=='='&&op[2]=='\0' ? AS_NONASSOC : -1; - case '>': return op[1]=='\0' ? AS_NONASSOC : op[1]=='='&&op[2]=='\0' ? AS_NONASSOC : -1; - case '^': return op[1]=='\0' ? AS_LEFT : op[1]=='^'&&op[2]=='\0' ? AS_LEFT : -1; - case '|': return op[1]=='\0' ? AS_LEFT : op[1]=='|'&&op[2]=='\0' ? AS_LEFT : -1; - case '~': return op[1]=='\0' ? AS_PREFIX : -1; - default: return -1; + int oplen=parseoplength(source); + if(oplen!=-1){ + Token tok={TT_OP,source,oplen}; + *sourcep+=oplen; + return tok; + } + if(strchr("(){}",*source)!=NULL){ + Token tok={TT_SYM,source,1}; + (*sourcep)++; + return tok; + } + if(isalpha(*source)||*source=='_'){ + int i; + for(i=1;source[i];i++){ + if(!isalpha(source[i])&&!isdigit(source[i])&&source[i]!='_')break; + } + Token tok={TT_WORD,source,i}; + *sourcep+=i; + return tok; } + Token tok={TT_ERR,"Unrecognised token",18}; + return tok; } -static bool parsecomment(const char *source,int *reslen){ - int cursor=0; - if(source[cursor]!='#')return false; - if(source[cursor+1]=='#'&&source[cursor+2]=='#'){ - cursor+=3; - while(source[cursor]&& - (source[cursor]!='#'||source[cursor+1]!='#'||source[cursor+2]!='#')){ - cursor++; - } - if(!source[cursor])return false; //unclosed block comment - cursor+=2; +static void printtoken(FILE *stream,Token tok,const char *msg){ + const char *type; + switch(tok.type){ + case TT_NUM: type="TT_NUM"; break; + case TT_STR: type="TT_STR"; break; + case TT_WORD: type="TT_WORD"; break; + case TT_OP: type="TT_OP"; break; + case TT_SYM: type="TT_SYM"; break; + case TT_ENDSTMT: type="TT_ENDSTMT"; break; + case TT_EOF: type="TT_EOF"; break; + case TT_ERR: type="TT_ERR"; break; + default: type="TT_(??\?)"; break; //TRIGRAPHS ._. + } + if(tok.len!=-1){ + char buf[tok.len+1]; + memcpy(buf,tok.str,tok.len); + buf[tok.len]='\0'; + fprintf(stream,"(%s) Token: %s '%s'\n",msg,type,buf); } else { - while(source[cursor]&&source[cursor]!='\n')cursor++; - if(source[cursor])cursor++; + fprintf(stream,"(%s) Token: %s (null)\n",msg,type); } - *reslen=cursor; - return true; } -static void parseintermediate(const char *source,int *reslen){ - int cursor=0; - bool acted; - do { - acted=false; - while(source[cursor]&&isspace(source[cursor])){ - cursor++; - acted=true; + +static AST* parseterm(const char *source,int *reslen){ + const char *origsource=source; + const Token tok=nexttoken(&source); + printtoken(stderr,tok,"parseterm"); + AST *node; + switch(tok.type){ + case TT_NUM:{ + node=malloc(sizeof(AST)); + if(!node)outofmem(); + node->type=AST_NUM; + char *endp; + int intv=strtol(tok.str,&endp,0); + node->n.isint=endp-tok.str==tok.len; + if(node->n.isint)node->n.i=intv; + else node->n.d=strtod(tok.str,NULL); + break; } - int partlen; - if(parsecomment(source+cursor,&partlen)){ - cursor+=partlen; - acted=true; + + case TT_STR:{ + int slen=0; + for(int i=1;i=tok.len-1||!ishexdigit(tok.str[i+1])||!ishexdigit(tok.str[i+2])){ + return NULL; + } + i+=2; + } else { + i++; + } + } + node=malloc(sizeof(AST)); + if(!node)outofmem(); + node->type=AST_STR; + node->s.str=malloc(slen+1); + if(!node->s.str)outofmem(); + int j=0; + for(int i=1;is.str[j++]=tok.str[i]; + continue; + } + i++; + switch(tok.str[i]){ + case 'n': node->s.str[j++]='\n'; break; + case 'r': node->s.str[j++]='\r'; break; + case 't': node->s.str[j++]='\t'; break; + case 'b': node->s.str[j++]='\b'; break; + case 'a': node->s.str[j++]='\a'; break; + case 'x': + node->s.str[j++]=16*hexnumber(tok.str[i+1])+hexnumber(tok.str[i+2]); + i+=2; + break; + default: + node->s.str[j++]=tok.str[i]; + break; + } + } + node->s.str[j]='\0'; + break; } - } while(acted); - *reslen=cursor; + + case TT_WORD:{ + if(tok.len==2&&memcmp(tok.str,"if",2)==0)assert(NOT_IMPLEMENTED); + if(tok.len==5&&memcmp(tok.str,"while",2)==0)assert(NOT_IMPLEMENTED); + const char *tempsource=source; + Token next=nexttoken(&source); + if(next.len==1&&next.str[0]=='(')assert(NOT_IMPLEMENTED); + source=tempsource; + node=malloc(sizeof(AST)); + if(!node)outofmem(); + node->type=AST_VAR; + node->v.name=malloc(tok.len+1); + if(!node->v.name)outofmem(); + memcpy(node->v.name,tok.str,tok.len); + node->v.name[tok.len]='\0'; + break; + } + + case TT_SYM: + assert(NOT_IMPLEMENTED); + break; + + case TT_OP:{ + char buf[tok.len+3]; + buf[0]='('; + memcpy(buf+1,tok.str,tok.len); + buf[tok.len+1]=')'; + buf[tok.len+2]='\0'; + if(associativity(buf)==AS_PREFIX){ + node=malloc(sizeof(AST)); + if(!node)outofmem(); + node->type=AST_OP; + node->o.op=opconststring_len(buf,tok.len+2); + node->o.left=NULL; + int len; + node->o.right=parseterm(source,&len); + if(!node->o.right){ + free(node); + return NULL; + } + source+=len; + } else return NULL; + break; + } + + case TT_ENDSTMT: + case TT_EOF: + case TT_ERR: + return NULL; + } + *reslen=source-origsource; + return node; } +//Uses precedence climbing static AST* parseexpr(const char *source,int *reslen,int minprec){ - ; + const char *origsource=source; + int len; + AST *tree=parseterm(source,&len); + if(!tree)return NULL; + source+=len; + while(true){ + const char *beforeop=source; + Token tok=nexttoken(&source); + printtoken(stderr,tok,"parseEXPR"); + if(tok.type==TT_ENDSTMT){ + source=beforeop; + break; + } + if(tok.type!=TT_OP){ + ast_free(tree); + return NULL; + } + int prec=precedence_len(tok.str,tok.len); + if(prectype=AST_OP; + opnode->o.op=opconststring_len(tok.str,tok.len); + if(!opnode->o.op)outofmem(); + opnode->o.left=tree; + opnode->o.right=right; + tree=opnode; + } + *reslen=source-origsource; + return tree; } static AST* parsestmt(const char *source,int *reslen){ return parseexpr(source,reslen,0); } -ASTblock* parse(const char *source){ - ASTblock *bl=malloc(sizeof(ASTblock)); +AST* parse(const char *source){ + AST *bl=malloc(sizeof(AST)); + if(!bl)outofmem(); + bl->type=AST_BLOCK; int sz=32; - bl->len=0; - bl->exprs=calloc(sz,sizeof(AST*)); - if(!bl->exprs)outofmem(); + bl->b.len=0; + bl->b.exprs=calloc(sz,sizeof(AST*)); + if(!bl->b.exprs)outofmem(); int reslen; int cursor=0; while(true){ - if(bl->len==sz){ + if(bl->b.len==sz){ sz*=2; - bl->exprs=realloc(bl->exprs,sz*sizeof(AST*)); - if(!bl->exprs)outofmem(); + bl->b.exprs=realloc(bl->b.exprs,sz*sizeof(AST*)); + if(!bl->b.exprs)outofmem(); } - parseintermediate(source+cursor,&reslen); - if(!source[cursor])break; AST *node=parsestmt(source+cursor,&reslen); if(!node){ - ast_free((AST*)bl); + ast_free(bl); return NULL; } - bl->exprs[bl->len++]=node; + bl->b.exprs[bl->b.len++]=node; cursor+=reslen; + const char *src=source+cursor; + Token tok=nexttoken(&src); + if(tok.type!=TT_ENDSTMT){ + ast_free(bl); + return NULL; + } + cursor=src-source; + src=source+cursor; + tok=nexttoken(&src); + if(tok.type==TT_EOF)break; } return bl; } -void ast_free(AST *ast_){ - switch(ast_->type){ - case AST_BLOCK:{ ASTblock *ast=(ASTblock*)ast_; - for(int i=0;ilen;i++)if(ast->exprs[i])ast_free(ast->exprs[i]); - free(ast->exprs); +static const char* charblock(char c,int n){ + static char *buf=NULL; + if(!buf)buf=malloc(n+1); + else buf=realloc(buf,n+1); + if(!buf)outofmem(); + memset(buf,c,n); + buf[n]='\0'; + return buf; +} + +#define TABW (4) +#define INDENT fprintf(stream,"%s",charblock(' ',TABW*indent)); +static void ast_debug_(FILE *stream,const AST *ast,int indent){ + switch(ast->type){ + case AST_BLOCK: + if(ast->b.len==0){ + fprintf(stream,"{}"); + break; + } + fprintf(stream,"{\n"); + indent++; + for(int i=0;ib.len;i++){ + INDENT + ast_debug_(stream,ast->b.exprs[i],indent); + fputc('\n',stream); + } + indent--; + INDENT + fprintf(stream,"}"); + break; + + case AST_OP:{ + bool leftp=ast->o.left&&ast->o.left->type==AST_OP&&precedence(ast->o.left->o.op)<=precedence(ast->o.op); + bool rightp=ast->o.right&&ast->o.right->type==AST_OP&&precedence(ast->o.right->o.op)<=precedence(ast->o.op); + //fprintf(stderr,"[[op='%s' p=%d lp=%d rp=%d]]",ast->o.op,precedence(ast->o.op),leftp,rightp); + if(leftp)fputc('(',stream); + if(ast->o.left)ast_debug_(stream,ast->o.left,indent); + fprintf(stream,"%s%s%s",leftp?")":"",ast->o.op,rightp?"(":""); + if(ast->o.right)ast_debug_(stream,ast->o.right,indent); + if(rightp)fputc(')',stream); + break; + } + + case AST_NUM: + if(ast->n.isint)fprintf(stream,"%lld",ast->n.i); + else fprintf(stream,"%g",ast->n.d); + break; + + case AST_STR: + fputc('"',stream); + for(int i=0;is.len;i++){ + if(ast->s.str[i]<32||ast->s.str[i]>126){ + fprintf(stream,"\\x%c%c",hexencode(ast->s.str[i]/16),hexencode(ast->s.str[i]%16)); + } else fputc(ast->s.str[i],stream); + } + fputc('"',stream); + break; + + case AST_VAR: + fprintf(stream,"%s",ast->v.name); + break; + + case AST_CALL: + fprintf(stream,"%s(",ast->c.func); + for(int i=0;ic.nargs;i++){ + if(i!=0)fputc(',',stream); + ast_debug_(stream,ast->c.args[i],indent); + } + fputc(')',stream); + break; + + case AST_IF: + assert(NOT_IMPLEMENTED); + break; + + case AST_WHILE: + assert(NOT_IMPLEMENTED); + break; + + default: + fprintf(stream,"AST_(??\?)"); + break; + } +} + +void ast_debug(FILE *stream,const AST *ast){ + ast_debug_(stream,ast,0); + fputc('\n',stream); +} + +void ast_free(AST *ast){ + switch(ast->type){ + case AST_BLOCK:{ + for(int i=0;ib.len;i++)if(ast->b.exprs[i])ast_free(ast->b.exprs[i]); + free(ast->b.exprs); break; } - case AST_OP:{ ASTop *ast=(ASTop*)ast_; - if(ast->left)ast_free(ast->left); - if(ast->right)ast_free(ast->right); + case AST_OP:{ + if(ast->o.left)ast_free(ast->o.left); + if(ast->o.right)ast_free(ast->o.right); break; } case AST_NUM: break; - case AST_STR:{ ASTstr *ast=(ASTstr*)ast_; - if(ast->str)free(ast->str); + case AST_STR:{ + if(ast->s.str)free(ast->s.str); break; } - case AST_VAR:{ ASTvar *ast=(ASTvar*)ast_; - if(ast->name)free(ast->name); + case AST_VAR:{ + if(ast->v.name)free(ast->v.name); break; } - case AST_CALL:{ ASTcall *ast=(ASTcall*)ast_; - if(ast->func)free(ast->func); - for(int i=0;inargs;i++)if(ast->args[i])ast_free(ast->args[i]); - free(ast->args); + case AST_CALL:{ + if(ast->c.func)free(ast->c.func); + for(int i=0;ic.nargs;i++)if(ast->c.args[i])ast_free(ast->c.args[i]); + free(ast->c.args); break; } - case AST_IF:{ ASTif *ast=(ASTif*)ast_; - if(ast->cond)free(ast->cond); - if(ast->thenb)free(ast->thenb); - if(ast->elseb)free(ast->elseb); + case AST_IF:{ + if(ast->i.cond)free(ast->i.cond); + if(ast->i.thenb)free(ast->i.thenb); + if(ast->i.elseb)free(ast->i.elseb); break; } - case AST_WHILE:{ ASTwhile *ast=(ASTwhile*)ast_; - if(ast->cond)free(ast->cond); - if(ast->body)free(ast->body); + case AST_WHILE:{ + if(ast->w.cond)free(ast->w.cond); + if(ast->w.body)free(ast->w.body); break; } } - free(ast_); + free(ast); } diff --git a/parser.h b/parser.h index 7d9b347..61d6dfe 100644 --- a/parser.h +++ b/parser.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -86,5 +87,6 @@ typedef enum Associativity{ } Associativity; -ASTblock* parse(const char *source); +AST* parse(const char *source); +void ast_debug(FILE *stream,const AST *ast); void ast_free(AST *ast); -- cgit v1.2.3-70-g09d2