diff options
author | tomsmeding <tom.smeding@gmail.com> | 2016-08-06 14:24:07 +0200 |
---|---|---|
committer | tomsmeding <tom.smeding@gmail.com> | 2016-08-06 14:25:13 +0200 |
commit | 54ddaaaf7692ccec56184cb2c6958a64019b4c6e (patch) | |
tree | 44642858155524ef1998670536195f1e8512fe81 | |
parent | 6eb5ea040f9ebc1a99953ff72c48ebe9f97f7f3c (diff) |
Working parser (no functions/blocks)
-rw-r--r-- | code.txt | 3 | ||||
-rw-r--r-- | main.c | 6 | ||||
-rw-r--r-- | parser.c | 107 | ||||
-rw-r--r-- | parser.h | 2 |
4 files changed, 90 insertions, 28 deletions
@@ -3,3 +3,6 @@ b = !2; c = 1 + (- a - 3 + b > -1); b += 1; b = 3*(b//3-1); + +x = 1 + 1 + 1 + 1; +y = 1 > (1 > 1) == 1; @@ -77,9 +77,11 @@ int main(int argc,char **argv){ return 1; } - AST *ast=parse(source); + char *errmsg; + AST *ast=parse(source,&errmsg); if(ast==NULL){ - fprintf(stderr,"Parsing error!\n"); + fprintf(stderr,"%s\n",errmsg); + free(errmsg); return 1; } ast_debug(stderr,ast); @@ -1,7 +1,9 @@ +#define _GNU_SOURCE //asprintf #include <stdio.h> #include <stdlib.h> #include <string.h> #include <ctype.h> +#include <limits.h> #include <assert.h> #include "memory.h" @@ -79,17 +81,20 @@ static void skipintermediate(const char **sourcep){ *sourcep=source; } +static Token lasttoken; +static const char *lasterrloc; + static Token nexttoken(const char **sourcep,bool expectop){ skipintermediate(sourcep); const char *source=*sourcep; if(*source=='\0'){ Token tok={TT_EOF,NULL,-1}; - return tok; + return lasttoken=tok; } if(*source==';'){ Token tok={TT_ENDSTMT,source,1}; (*sourcep)++; - return tok; + return lasttoken=tok; } if(isdigit(*source)||(!expectop&&*source=='-'&&isdigit(source[1]))){ char *endp; @@ -97,7 +102,7 @@ static Token nexttoken(const char **sourcep,bool expectop){ assert(endp!=source); Token tok={TT_NUM,source,endp-source}; *sourcep=endp; - return tok; + return lasttoken=tok; } if(*source=='"'){ int i; @@ -106,30 +111,30 @@ static Token nexttoken(const char **sourcep,bool expectop){ } if(!source[i]){ Token tok={TT_ERR,"Non-terminated string",21}; - return tok; + return lasttoken=tok; } *sourcep+=i+1; Token tok={TT_STR,source,i+1}; - return tok; + return lasttoken=tok; } int oplen=parseoplength(source); if(oplen!=-1){ Token tok={TT_OP,source,oplen}; *sourcep+=oplen; - return tok; + return lasttoken=tok; } if(!expectop){ char buf[4]={'(',*source,')','\0'}; if(precedence(buf)!=-1){ Token tok={TT_OP,source,1}; (*sourcep)++; - return tok; + return lasttoken=tok; } } if(strchr("(){}",*source)!=NULL){ Token tok={TT_SYM,source,1}; (*sourcep)++; - return tok; + return lasttoken=tok; } if(isalpha(*source)||*source=='_'){ int i; @@ -138,10 +143,11 @@ static Token nexttoken(const char **sourcep,bool expectop){ } Token tok={TT_WORD,source,i}; *sourcep+=i; - return tok; + return lasttoken=tok; } Token tok={TT_ERR,"Unrecognised token",18}; - return tok; + lasterrloc=source; + return lasttoken=tok; } @@ -169,12 +175,12 @@ static void printtoken(FILE *stream,Token tok,const char *msg){ } -static AST* parseexpr(const char *source,int *reslen,int minprec); +static AST* parseexpr(const char *source,int *reslen,int minprec,int maxprec); static AST* parseterm(const char *source,int *reslen){ const char *origsource=source; const Token tok=nexttoken(&source,false); - printtoken(stderr,tok,"parseterm"); + //printtoken(stderr,tok,"parseterm"); AST *node; switch(tok.type){ case TT_NUM:{ @@ -257,10 +263,11 @@ static AST* parseterm(const char *source,int *reslen){ assert(NOT_IMPLEMENTED); } else if(tok.len==1&&tok.str[0]=='('){ int len; - node=parseexpr(source,&len,0); + node=parseexpr(source,&len,0,INT_MAX); if(!node)return NULL; source+=len; Token aftertok=nexttoken(&source,false); + //printtoken(stderr,aftertok,"braceclose"); if(aftertok.type!=TT_SYM||aftertok.len!=1||aftertok.str[0]!=')'){ ast_free(node); return NULL; @@ -283,7 +290,7 @@ static AST* parseterm(const char *source,int *reslen){ node->o.op=opconststring_len(buf,tok.len+2); node->o.left=NULL; int len; - node->o.right=parseexpr(source,&len,precedence(buf)); + node->o.right=parseexpr(source,&len,precedence(buf),INT_MAX); if(!node->o.right){ free(node); return NULL; @@ -303,8 +310,8 @@ static AST* parseterm(const char *source,int *reslen){ return node; } -//Uses precedence climbing -static AST* parseexpr(const char *source,int *reslen,int minprec){ +//Uses precedence climbing; maxprec is INT_MAX unless dealing with nonassociative operators +static AST* parseexpr_(const char *source,int *reslen,int minprec,int maxprec){ const char *origsource=source; int len; AST *tree=parseterm(source,&len); @@ -313,14 +320,14 @@ static AST* parseexpr(const char *source,int *reslen,int minprec){ while(true){ const char *beforeop=source; Token tok=nexttoken(&source,true); - printtoken(stderr,tok,"parseEXPR"); + //printtoken(stderr,tok,"parseEXPR"); if(tok.type==TT_ENDSTMT){ - fprintf(stderr," (token undo)\n"); + //fprintf(stderr," (token undo)\n"); source=beforeop; break; } if(tok.type==TT_SYM&&tok.len==1&&tok.str[0]==')'){ - fprintf(stderr," (token undo)\n"); + //fprintf(stderr," (token undo)\n"); source=beforeop; break; } @@ -330,10 +337,14 @@ static AST* parseexpr(const char *source,int *reslen,int minprec){ } int prec=precedence_len(tok.str,tok.len); if(prec<minprec){ - fprintf(stderr," (token undo)\n"); + //fprintf(stderr," (token undo)\n"); source=beforeop; break; } + if(prec>maxprec){ + ast_free(tree); + return NULL; + } Associativity assoc=associativity_len(tok.str,tok.len); int q; switch(assoc){ @@ -343,11 +354,11 @@ static AST* parseexpr(const char *source,int *reslen,int minprec){ case AS_LEFT: q=prec+1; break; case AS_RIGHT: q=prec; break; - case AS_NONASSOC: q=prec+1; minprec=prec+1; break; + case AS_NONASSOC: q=prec+1; maxprec=prec-1; break; default: assert(false); } - AST *right=parseexpr(source,&len,q); + AST *right=parseexpr(source,&len,q,maxprec); if(!right){ ast_free(tree); return NULL; @@ -366,11 +377,55 @@ static AST* parseexpr(const char *source,int *reslen,int minprec){ return tree; } +static AST* parseexpr(const char *source,int *reslen,int minprec,int maxprec){ + static int depth=0; + //fprintf(stderr,"\x1B[32mEXPR ENTER >>> (%d)\x1B[0m\n",depth); + depth++; + AST *r=parseexpr_(source,reslen,minprec,maxprec); + depth--; + //fprintf(stderr,"\x1B[32mEXPR LEAVE <<< (%d)\x1B[0m\n",depth); + return r; +} + static AST* parsestmt(const char *source,int *reslen){ - return parseexpr(source,reslen,0); + return parseexpr(source,reslen,0,INT_MAX); +} + +static char* reportparseerror(const char *source){ + const char *loc=lasttoken.type==TT_ERR?lasterrloc:lasttoken.str; + if(loc==NULL){ + char *buf; + asprintf(&buf,"\x1B[1mParse error: unexpected end of file\x1B[0m"); + return buf; + } + assert(loc>=source); + int i; + for(i=0;i<100;i++){ + if(loc-i==source||loc[-i]=='\n')break; + } + bool cutoff=i==100; + const char *start=loc-i+(cutoff||loc[-i]=='\n'); + for(;i<150;i++){ + if(start[i]=='\0'||start[i]=='\n')break; + } + bool endcutoff=i==150; + int totallen=17+4*cutoff+i+7+6+4*endcutoff+4; + char *errstr=malloc(totallen+1); + if(!errstr)outofmem(); + int offset=0; + memcpy(errstr+offset,"\x1B[1mParse error: ",17); offset+=17; + if(cutoff){memcpy(errstr+offset,"... ",4); offset+=4;} + memcpy(errstr+offset,start,loc-start); offset+=loc-start; + memcpy(errstr+offset,"\x1B[31;4m",7); offset+=7; + memcpy(errstr+offset,loc,lasttoken.len); offset+=lasttoken.len; + memcpy(errstr+offset,"\x1B[0;1m",6); offset+=6; + memcpy(errstr+offset,loc+lasttoken.len,i-(loc-start)-lasttoken.len); offset+=i-(loc-start)-lasttoken.len; + memcpy(errstr+offset,"\x1B[0m",4); offset+=4; + errstr[offset]='\0'; + return errstr; } -AST* parse(const char *source){ +AST* parse(const char *source,char **errmsg){ AST *bl=malloc(sizeof(AST)); if(!bl)outofmem(); bl->type=AST_BLOCK; @@ -389,15 +444,17 @@ AST* parse(const char *source){ AST *node=parsestmt(source+cursor,&reslen); if(!node){ ast_free(bl); + *errmsg=reportparseerror(source); return NULL; } bl->b.exprs[bl->b.len++]=node; cursor+=reslen; const char *src=source+cursor; Token tok=nexttoken(&src,false); - printtoken(stderr,tok,"parse "); + //printtoken(stderr,tok,"parse "); if(tok.type!=TT_ENDSTMT){ ast_free(bl); + *errmsg=reportparseerror(source); return NULL; } cursor=src-source; @@ -87,6 +87,6 @@ typedef enum Associativity{ } Associativity; -AST* parse(const char *source); +AST* parse(const char *source,char **errmsg); void ast_debug(FILE *stream,const AST *ast); void ast_free(AST *ast); |