summaryrefslogtreecommitdiff
path: root/parser.c
diff options
context:
space:
mode:
authortomsmeding <tom.smeding@gmail.com>2016-08-06 14:24:07 +0200
committertomsmeding <tom.smeding@gmail.com>2016-08-06 14:25:13 +0200
commit54ddaaaf7692ccec56184cb2c6958a64019b4c6e (patch)
tree44642858155524ef1998670536195f1e8512fe81 /parser.c
parent6eb5ea040f9ebc1a99953ff72c48ebe9f97f7f3c (diff)
Working parser (no functions/blocks)
Diffstat (limited to 'parser.c')
-rw-r--r--parser.c107
1 files changed, 82 insertions, 25 deletions
diff --git a/parser.c b/parser.c
index b2bd68b..56a014e 100644
--- a/parser.c
+++ b/parser.c
@@ -1,7 +1,9 @@
+#define _GNU_SOURCE //asprintf
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
+#include <limits.h>
#include <assert.h>
#include "memory.h"
@@ -79,17 +81,20 @@ static void skipintermediate(const char **sourcep){
*sourcep=source;
}
+static Token lasttoken;
+static const char *lasterrloc;
+
static Token nexttoken(const char **sourcep,bool expectop){
skipintermediate(sourcep);
const char *source=*sourcep;
if(*source=='\0'){
Token tok={TT_EOF,NULL,-1};
- return tok;
+ return lasttoken=tok;
}
if(*source==';'){
Token tok={TT_ENDSTMT,source,1};
(*sourcep)++;
- return tok;
+ return lasttoken=tok;
}
if(isdigit(*source)||(!expectop&&*source=='-'&&isdigit(source[1]))){
char *endp;
@@ -97,7 +102,7 @@ static Token nexttoken(const char **sourcep,bool expectop){
assert(endp!=source);
Token tok={TT_NUM,source,endp-source};
*sourcep=endp;
- return tok;
+ return lasttoken=tok;
}
if(*source=='"'){
int i;
@@ -106,30 +111,30 @@ static Token nexttoken(const char **sourcep,bool expectop){
}
if(!source[i]){
Token tok={TT_ERR,"Non-terminated string",21};
- return tok;
+ return lasttoken=tok;
}
*sourcep+=i+1;
Token tok={TT_STR,source,i+1};
- return tok;
+ return lasttoken=tok;
}
int oplen=parseoplength(source);
if(oplen!=-1){
Token tok={TT_OP,source,oplen};
*sourcep+=oplen;
- return tok;
+ return lasttoken=tok;
}
if(!expectop){
char buf[4]={'(',*source,')','\0'};
if(precedence(buf)!=-1){
Token tok={TT_OP,source,1};
(*sourcep)++;
- return tok;
+ return lasttoken=tok;
}
}
if(strchr("(){}",*source)!=NULL){
Token tok={TT_SYM,source,1};
(*sourcep)++;
- return tok;
+ return lasttoken=tok;
}
if(isalpha(*source)||*source=='_'){
int i;
@@ -138,10 +143,11 @@ static Token nexttoken(const char **sourcep,bool expectop){
}
Token tok={TT_WORD,source,i};
*sourcep+=i;
- return tok;
+ return lasttoken=tok;
}
Token tok={TT_ERR,"Unrecognised token",18};
- return tok;
+ lasterrloc=source;
+ return lasttoken=tok;
}
@@ -169,12 +175,12 @@ static void printtoken(FILE *stream,Token tok,const char *msg){
}
-static AST* parseexpr(const char *source,int *reslen,int minprec);
+static AST* parseexpr(const char *source,int *reslen,int minprec,int maxprec);
static AST* parseterm(const char *source,int *reslen){
const char *origsource=source;
const Token tok=nexttoken(&source,false);
- printtoken(stderr,tok,"parseterm");
+ //printtoken(stderr,tok,"parseterm");
AST *node;
switch(tok.type){
case TT_NUM:{
@@ -257,10 +263,11 @@ static AST* parseterm(const char *source,int *reslen){
assert(NOT_IMPLEMENTED);
} else if(tok.len==1&&tok.str[0]=='('){
int len;
- node=parseexpr(source,&len,0);
+ node=parseexpr(source,&len,0,INT_MAX);
if(!node)return NULL;
source+=len;
Token aftertok=nexttoken(&source,false);
+ //printtoken(stderr,aftertok,"braceclose");
if(aftertok.type!=TT_SYM||aftertok.len!=1||aftertok.str[0]!=')'){
ast_free(node);
return NULL;
@@ -283,7 +290,7 @@ static AST* parseterm(const char *source,int *reslen){
node->o.op=opconststring_len(buf,tok.len+2);
node->o.left=NULL;
int len;
- node->o.right=parseexpr(source,&len,precedence(buf));
+ node->o.right=parseexpr(source,&len,precedence(buf),INT_MAX);
if(!node->o.right){
free(node);
return NULL;
@@ -303,8 +310,8 @@ static AST* parseterm(const char *source,int *reslen){
return node;
}
-//Uses precedence climbing
-static AST* parseexpr(const char *source,int *reslen,int minprec){
+//Uses precedence climbing; maxprec is INT_MAX unless dealing with nonassociative operators
+static AST* parseexpr_(const char *source,int *reslen,int minprec,int maxprec){
const char *origsource=source;
int len;
AST *tree=parseterm(source,&len);
@@ -313,14 +320,14 @@ static AST* parseexpr(const char *source,int *reslen,int minprec){
while(true){
const char *beforeop=source;
Token tok=nexttoken(&source,true);
- printtoken(stderr,tok,"parseEXPR");
+ //printtoken(stderr,tok,"parseEXPR");
if(tok.type==TT_ENDSTMT){
- fprintf(stderr," (token undo)\n");
+ //fprintf(stderr," (token undo)\n");
source=beforeop;
break;
}
if(tok.type==TT_SYM&&tok.len==1&&tok.str[0]==')'){
- fprintf(stderr," (token undo)\n");
+ //fprintf(stderr," (token undo)\n");
source=beforeop;
break;
}
@@ -330,10 +337,14 @@ static AST* parseexpr(const char *source,int *reslen,int minprec){
}
int prec=precedence_len(tok.str,tok.len);
if(prec<minprec){
- fprintf(stderr," (token undo)\n");
+ //fprintf(stderr," (token undo)\n");
source=beforeop;
break;
}
+ if(prec>maxprec){
+ ast_free(tree);
+ return NULL;
+ }
Associativity assoc=associativity_len(tok.str,tok.len);
int q;
switch(assoc){
@@ -343,11 +354,11 @@ static AST* parseexpr(const char *source,int *reslen,int minprec){
case AS_LEFT: q=prec+1; break;
case AS_RIGHT: q=prec; break;
- case AS_NONASSOC: q=prec+1; minprec=prec+1; break;
+ case AS_NONASSOC: q=prec+1; maxprec=prec-1; break;
default: assert(false);
}
- AST *right=parseexpr(source,&len,q);
+ AST *right=parseexpr(source,&len,q,maxprec);
if(!right){
ast_free(tree);
return NULL;
@@ -366,11 +377,55 @@ static AST* parseexpr(const char *source,int *reslen,int minprec){
return tree;
}
+static AST* parseexpr(const char *source,int *reslen,int minprec,int maxprec){
+ static int depth=0;
+ //fprintf(stderr,"\x1B[32mEXPR ENTER >>> (%d)\x1B[0m\n",depth);
+ depth++;
+ AST *r=parseexpr_(source,reslen,minprec,maxprec);
+ depth--;
+ //fprintf(stderr,"\x1B[32mEXPR LEAVE <<< (%d)\x1B[0m\n",depth);
+ return r;
+}
+
static AST* parsestmt(const char *source,int *reslen){
- return parseexpr(source,reslen,0);
+ return parseexpr(source,reslen,0,INT_MAX);
+}
+
+static char* reportparseerror(const char *source){
+ const char *loc=lasttoken.type==TT_ERR?lasterrloc:lasttoken.str;
+ if(loc==NULL){
+ char *buf;
+ asprintf(&buf,"\x1B[1mParse error: unexpected end of file\x1B[0m");
+ return buf;
+ }
+ assert(loc>=source);
+ int i;
+ for(i=0;i<100;i++){
+ if(loc-i==source||loc[-i]=='\n')break;
+ }
+ bool cutoff=i==100;
+ const char *start=loc-i+(cutoff||loc[-i]=='\n');
+ for(;i<150;i++){
+ if(start[i]=='\0'||start[i]=='\n')break;
+ }
+ bool endcutoff=i==150;
+ int totallen=17+4*cutoff+i+7+6+4*endcutoff+4;
+ char *errstr=malloc(totallen+1);
+ if(!errstr)outofmem();
+ int offset=0;
+ memcpy(errstr+offset,"\x1B[1mParse error: ",17); offset+=17;
+ if(cutoff){memcpy(errstr+offset,"... ",4); offset+=4;}
+ memcpy(errstr+offset,start,loc-start); offset+=loc-start;
+ memcpy(errstr+offset,"\x1B[31;4m",7); offset+=7;
+ memcpy(errstr+offset,loc,lasttoken.len); offset+=lasttoken.len;
+ memcpy(errstr+offset,"\x1B[0;1m",6); offset+=6;
+ memcpy(errstr+offset,loc+lasttoken.len,i-(loc-start)-lasttoken.len); offset+=i-(loc-start)-lasttoken.len;
+ memcpy(errstr+offset,"\x1B[0m",4); offset+=4;
+ errstr[offset]='\0';
+ return errstr;
}
-AST* parse(const char *source){
+AST* parse(const char *source,char **errmsg){
AST *bl=malloc(sizeof(AST));
if(!bl)outofmem();
bl->type=AST_BLOCK;
@@ -389,15 +444,17 @@ AST* parse(const char *source){
AST *node=parsestmt(source+cursor,&reslen);
if(!node){
ast_free(bl);
+ *errmsg=reportparseerror(source);
return NULL;
}
bl->b.exprs[bl->b.len++]=node;
cursor+=reslen;
const char *src=source+cursor;
Token tok=nexttoken(&src,false);
- printtoken(stderr,tok,"parse ");
+ //printtoken(stderr,tok,"parse ");
if(tok.type!=TT_ENDSTMT){
ast_free(bl);
+ *errmsg=reportparseerror(source);
return NULL;
}
cursor=src-source;