#define _GNU_SOURCE //asprintf #include #include #include #include #include #include #include "memory.h" #include "opfuncs.h" #include "parser.h" #define DEBUG #ifdef DEBUG #define DBG(...) __VA_ARGS__ #else #define DBG(...) #endif #define DBGF(...) DBG(fprintf(stderr,__VA_ARGS__)) #define NOT_IMPLEMENTED false static bool ishexdigit(char c){ return (c>='0'&&c<='9')||(c>='a'&&c<='f')||(c>='A'&&c<='F'); } static int hexnumber(char c){ return c<='9'?c-'0':(c&~('a'-'A'))-'A'+10; } static char hexencode(int n){ return n<10?n+'0':n+'a'; } typedef enum Tokentype{ TT_NUM, TT_STR, TT_WORD, TT_OP, TT_SYM, //all symbols that are not operators TT_ENDSTMT, TT_EOF, TT_ERR=-1 } Tokentype; typedef struct Token{ Tokentype type; const char *str; //Pointer into source string, or NULL if TT_EOF int len; } Token; static bool parsecomment(const char **sourcep){ const char *source=*sourcep; if(*source!='#')return false; if(source[1]=='#'&&source[2]=='#'){ source+=3; while(*source&& (*source!='#'||source[1]!='#'||source[2]!='#')){ source++; } if(!*source)return false; //unclosed block comment source+=2; } else { while(*source&&*source!='\n')source++; if(*source)source++; } *sourcep=source; return true; } static void skipintermediate(const char **sourcep){ const char *source=*sourcep; bool acted; do { acted=false; while(isspace(*source)){ source++; acted=true; } if(parsecomment(&source)){ acted=true; } } while(acted); *sourcep=source; } static Token lasttoken; static const char *lasterrloc; static Token nexttoken(const char **sourcep,bool expectop){ skipintermediate(sourcep); const char *source=*sourcep; if(*source=='\0'){ Token tok={TT_EOF,NULL,-1}; return lasttoken=tok; } if(*source==';'){ Token tok={TT_ENDSTMT,source,1}; (*sourcep)++; return lasttoken=tok; } if(isdigit(*source)||(!expectop&&*source=='-'&&isdigit(source[1]))){ char *endp; strtod(source,&endp); assert(endp!=source); Token tok={TT_NUM,source,endp-source}; *sourcep=endp; return lasttoken=tok; } if(*source=='"'){ int i; for(i=1;source[i]&&source[i]!='"';i++){ if(source[i]=='\\')i++; } if(!source[i]){ Token tok={TT_ERR,"Non-terminated string",21}; return lasttoken=tok; } *sourcep+=i+1; Token tok={TT_STR,source,i+1}; return lasttoken=tok; } int oplen=parseoplength(source); if(oplen!=-1){ Token tok={TT_OP,source,oplen}; *sourcep+=oplen; return lasttoken=tok; } if(!expectop){ char buf[4]={'(',*source,')','\0'}; if(precedence(buf)!=-1){ Token tok={TT_OP,source,1}; (*sourcep)++; return lasttoken=tok; } } if(strchr("(){},",*source)!=NULL){ Token tok={TT_SYM,source,1}; (*sourcep)++; return lasttoken=tok; } if(isalpha(*source)||*source=='_'){ int i; for(i=1;source[i];i++){ if(!isalpha(source[i])&&!isdigit(source[i])&&source[i]!='_')break; } Token tok={TT_WORD,source,i}; *sourcep+=i; return lasttoken=tok; } Token tok={TT_ERR,"Unrecognised token",18}; lasterrloc=source; return lasttoken=tok; } #ifdef DEBUG static void printtoken(FILE *stream,Token tok,const char *msg){ const char *type; switch(tok.type){ case TT_NUM: type="TT_NUM"; break; case TT_STR: type="TT_STR"; break; case TT_WORD: type="TT_WORD"; break; case TT_OP: type="TT_OP"; break; case TT_SYM: type="TT_SYM"; break; case TT_ENDSTMT: type="TT_ENDSTMT"; break; case TT_EOF: type="TT_EOF"; break; case TT_ERR: type="TT_ERR"; break; default: type="TT_(??\?)"; break; //TRIGRAPHS ._. } if(tok.len!=-1){ char buf[tok.len+1]; memcpy(buf,tok.str,tok.len); buf[tok.len]='\0'; fprintf(stream,"(%s) Token: %s '%s'\n",msg,type,buf); } else { fprintf(stream,"(%s) Token: %s (null)\n",msg,type); } } #endif static AST* parseexpr(const char *source,int *reslen,int minprec,int maxprec); static AST* parseterm(const char *source,int *reslen){ const char *origsource=source; const Token tok=nexttoken(&source,false); DBG(printtoken(stderr,tok,"parseterm")); AST *node; switch(tok.type){ case TT_NUM:{ node=malloc(sizeof(AST)); if(!node)outofmem(); node->type=AST_NUM; char *endp; int intv=strtol(tok.str,&endp,0); node->n.isint=endp-tok.str==tok.len; if(node->n.isint)node->n.i=intv; else node->n.d=strtod(tok.str,NULL); break; } case TT_STR:{ int slen=0; for(int i=1;i=tok.len-1||!ishexdigit(tok.str[i+1])||!ishexdigit(tok.str[i+2])){ return NULL; } i+=2; } } node=malloc(sizeof(AST)); if(!node)outofmem(); node->type=AST_STR; node->s.len=slen; node->s.str=malloc(slen+1); if(!node->s.str)outofmem(); int j=0; for(int i=1;is.str[j++]=tok.str[i]; continue; } i++; switch(tok.str[i]){ case 'n': node->s.str[j++]='\n'; break; case 'r': node->s.str[j++]='\r'; break; case 't': node->s.str[j++]='\t'; break; case 'b': node->s.str[j++]='\b'; break; case 'a': node->s.str[j++]='\a'; break; case 'x': node->s.str[j++]=16*hexnumber(tok.str[i+1])+hexnumber(tok.str[i+2]); i+=2; break; default: node->s.str[j++]=tok.str[i]; break; } } node->s.str[j]='\0'; break; } case TT_WORD:{ if(tok.len==2&&memcmp(tok.str,"if",2)==0){ int len; AST *cond=parseexpr(source,&len,0,INT_MAX); if(!cond)return NULL; source+=len; AST *thenbody=parseexpr(source,&len,0,INT_MAX); if(!thenbody){ ast_free(cond); return NULL; } source+=len; AST *elsebody=NULL; const char *src=source; Token elsetok=nexttoken(&src,false); if(elsetok.type==TT_WORD&&elsetok.len==4&&memcmp(elsetok.str,"else",4)==0){ DBG(printtoken(stderr,elsetok,"if else ")); source=src; elsebody=parseexpr(source,&len,0,INT_MAX); if(!elsebody){ ast_free(cond); ast_free(thenbody); return NULL; } source+=len; } node=malloc(sizeof(AST)); if(!node)outofmem(); node->type=AST_IF; node->i.cond=cond; node->i.thenb=thenbody; node->i.elseb=elsebody; break; } if(tok.len==5&&memcmp(tok.str,"while",2)==0)assert(NOT_IMPLEMENTED); const char *tempsource=source; Token next=nexttoken(&source,false); if(next.len==1&&next.str[0]=='('){ DBG(printtoken(stderr,next,"function ")); AST **args=malloc(sizeof(AST*)); int nargs=0; int len; #define FREEARGSRETNULL do {for(int i=0;itype=AST_CALL; node->c.func=malloc(tok.len+1); if(!node->c.func)outofmem(); memcpy(node->c.func,tok.str,tok.len); node->c.func[tok.len]='\0'; node->c.nargs=nargs; node->c.args=args; #undef FREEARGSRETNULL } else { source=tempsource; node=malloc(sizeof(AST)); if(!node)outofmem(); node->type=AST_VAR; node->v.name=malloc(tok.len+1); if(!node->v.name)outofmem(); memcpy(node->v.name,tok.str,tok.len); node->v.name[tok.len]='\0'; } break; } case TT_SYM: if(tok.len==1&&tok.str[0]=='{'){ node=malloc(sizeof(AST)); if(!node)outofmem(); node->type=AST_BLOCK; int sz=32; node->b.len=0; node->b.exprs=calloc(sz,sizeof(AST*)); if(!node->b.exprs)outofmem(); int len; int cursor=0; while(true){ if(node->b.len==sz){ sz*=2; node->b.exprs=realloc(node->b.exprs,sz*sizeof(AST*)); if(!node->b.exprs)outofmem(); } AST *n=parseexpr(source+cursor,&len,0,INT_MAX); if(!n){ ast_free(node); return NULL; } node->b.exprs[node->b.len++]=n; cursor+=len; const char *src=source+cursor; Token tok=nexttoken(&src,false); DBG(printtoken(stderr,tok,"block ; ")); if(tok.type!=TT_ENDSTMT){ ast_free(node); return NULL; } cursor=src-source; src=source+cursor; tok=nexttoken(&src,false); if(tok.type==TT_SYM&&tok.len==1&&tok.str[0]=='}'){ source=src; DBG(printtoken(stderr,tok,"block } ")); break; } } } else if(tok.len==1&&tok.str[0]=='('){ int len; node=parseexpr(source,&len,0,INT_MAX); if(!node)return NULL; source+=len; Token aftertok=nexttoken(&source,false); DBG(printtoken(stderr,aftertok,"parenclose")); if(aftertok.type!=TT_SYM||aftertok.len!=1||aftertok.str[0]!=')'){ ast_free(node); return NULL; } } else { return NULL; } break; case TT_OP:{ char buf[tok.len+3]; buf[0]='('; memcpy(buf+1,tok.str,tok.len); buf[tok.len+1]=')'; buf[tok.len+2]='\0'; if(associativity(buf)==AS_PREFIX){ node=malloc(sizeof(AST)); if(!node)outofmem(); node->type=AST_OP; node->o.op=opconststring_len(buf,tok.len+2); node->o.left=NULL; int len; node->o.right=parseexpr(source,&len,precedence(buf),INT_MAX); if(!node->o.right){ free(node); return NULL; } source+=len; } else return NULL; break; } case TT_ENDSTMT: case TT_EOF: case TT_ERR: return NULL; } *reslen=source-origsource; return node; } //Uses precedence climbing; maxprec is INT_MAX unless dealing with nonassociative operators static AST* parseexpr_(const char *source,int *reslen,int minprec,int maxprec){ const char *origsource=source; int len; AST *tree=parseterm(source,&len); if(!tree)return NULL; source+=len; while(true){ const char *beforeop=source; Token tok=nexttoken(&source,true); DBG(printtoken(stderr,tok,"parseEXPR")); if(tok.type==TT_ENDSTMT){ DBGF(" (token undo)\n"); source=beforeop; break; } if(tok.type==TT_SYM&&tok.len==1&&(tok.str[0]==')'||tok.str[0]==',')){ DBGF(" (token undo)\n"); source=beforeop; break; } if(tok.type!=TT_OP){ /*ast_free(tree); return NULL;*/ DBGF(" (token undo)\n"); source=beforeop; break; } int prec=precedence_len(tok.str,tok.len); if(precmaxprec){ ast_free(tree); return NULL; } Associativity assoc=associativity_len(tok.str,tok.len); int q; switch(assoc){ case AS_PREFIX: case AS_SUFFIX: ast_free(tree); return NULL; case AS_LEFT: q=prec+1; break; case AS_RIGHT: q=prec; break; case AS_NONASSOC: q=prec+1; maxprec=prec-1; break; default: assert(false); } AST *right=parseexpr(source,&len,q,maxprec); if(!right){ ast_free(tree); return NULL; } source+=len; AST *opnode=malloc(sizeof(AST)); if(!opnode)outofmem(); opnode->type=AST_OP; opnode->o.op=opconststring_len(tok.str,tok.len); if(!opnode->o.op)outofmem(); opnode->o.left=tree; opnode->o.right=right; tree=opnode; } *reslen=source-origsource; return tree; } static AST* parseexpr(const char *source,int *reslen,int minprec,int maxprec){ static int depth=0; DBGF("\x1B[32mEXPR ENTER >>> (%d)\x1B[0m\n",depth); depth++; AST *r=parseexpr_(source,reslen,minprec,maxprec); depth--; DBGF("\x1B[32mEXPR LEAVE <<< (%d)\x1B[0m\n",depth); return r; } static AST* parsestmt(const char *source,int *reslen){ return parseexpr(source,reslen,0,INT_MAX); } static char* reportparseerror(const char *source){ const char *loc=lasttoken.type==TT_ERR?lasterrloc:lasttoken.str; if(loc==NULL){ char *buf; asprintf(&buf,"\x1B[1mParse error: unexpected end of file\x1B[0m"); return buf; } assert(loc>=source); int i; for(i=0;i<100;i++){ if(loc-i==source||loc[-i]=='\n')break; } bool cutoff=i==100; const char *start=loc-i+(cutoff||loc[-i]=='\n'); for(;i<150;i++){ if(start[i]=='\0'||start[i]=='\n')break; } bool endcutoff=i==150; int totallen=17+4*cutoff+i+7+6+4*endcutoff+4; char *errstr=malloc(totallen+1); if(!errstr)outofmem(); int offset=0; memcpy(errstr+offset,"\x1B[1mParse error: ",17); offset+=17; if(cutoff){memcpy(errstr+offset,"... ",4); offset+=4;} memcpy(errstr+offset,start,loc-start); offset+=loc-start; memcpy(errstr+offset,"\x1B[31;4m",7); offset+=7; memcpy(errstr+offset,loc,lasttoken.len); offset+=lasttoken.len; memcpy(errstr+offset,"\x1B[0;1m",6); offset+=6; memcpy(errstr+offset,loc+lasttoken.len,i-(loc-start)-lasttoken.len); offset+=i-(loc-start)-lasttoken.len; memcpy(errstr+offset,"\x1B[0m",4); offset+=4; errstr[offset]='\0'; return errstr; } AST* parse(const char *source,char **errmsg){ AST *bl=malloc(sizeof(AST)); if(!bl)outofmem(); bl->type=AST_BLOCK; int sz=32; bl->b.len=0; bl->b.exprs=calloc(sz,sizeof(AST*)); if(!bl->b.exprs)outofmem(); int reslen; int cursor=0; while(true){ if(bl->b.len==sz){ sz*=2; bl->b.exprs=realloc(bl->b.exprs,sz*sizeof(AST*)); if(!bl->b.exprs)outofmem(); } AST *node=parsestmt(source+cursor,&reslen); if(!node){ ast_free(bl); *errmsg=reportparseerror(source); return NULL; } bl->b.exprs[bl->b.len++]=node; cursor+=reslen; const char *src=source+cursor; Token tok=nexttoken(&src,false); DBG(printtoken(stderr,tok,"parse ")); if(tok.type!=TT_ENDSTMT){ ast_free(bl); *errmsg=reportparseerror(source); return NULL; } cursor=src-source; src=source+cursor; tok=nexttoken(&src,false); if(tok.type==TT_EOF)break; } return bl; } static const char* charblock(char c,int n){ static char *buf=NULL; if(!buf)buf=malloc(n+1); else buf=realloc(buf,n+1); if(!buf)outofmem(); memset(buf,c,n); buf[n]='\0'; return buf; } #define TABW (4) #define INDENT fprintf(stream,"%s",charblock(' ',TABW*indent)); static void ast_debug_(FILE *stream,const AST *ast,int indent){ switch(ast->type){ case AST_BLOCK: if(ast->b.len==0){ fprintf(stream,"{}"); break; } fprintf(stream,"{\n"); indent++; for(int i=0;ib.len;i++){ INDENT ast_debug_(stream,ast->b.exprs[i],indent); fprintf(stream,";\n"); } indent--; INDENT fprintf(stream,"}"); break; case AST_OP:{ bool leftp=ast->o.left&&ast->o.left->type==AST_OP&&precedence(ast->o.left->o.op)<=precedence(ast->o.op); bool rightp=ast->o.right&&ast->o.right->type==AST_OP&&precedence(ast->o.right->o.op)<=precedence(ast->o.op); bool spaces=ast->o.left&&ast->o.right; //fprintf(stderr,"[[op='%s' p=%d lp=%d rp=%d]]",ast->o.op,precedence(ast->o.op),leftp,rightp); if(leftp)fputc('(',stream); if(ast->o.left)ast_debug_(stream,ast->o.left,indent); fprintf(stream,"%s%s%s%s%s",leftp?")":"",spaces?" ":"",ast->o.op,spaces?" ":"",rightp?"(":""); if(ast->o.right)ast_debug_(stream,ast->o.right,indent); if(rightp)fputc(')',stream); break; } case AST_NUM: if(ast->n.isint)fprintf(stream,"%lld",ast->n.i); else fprintf(stream,"%g",ast->n.d); break; case AST_STR: fputc('"',stream); for(int i=0;is.len;i++){ if(ast->s.str[i]<32||ast->s.str[i]>126){ fprintf(stream,"\\x%c%c",hexencode((unsigned char)ast->s.str[i]/16),hexencode((unsigned char)ast->s.str[i]%16)); } else fputc(ast->s.str[i],stream); } fputc('"',stream); break; case AST_VAR: fprintf(stream,"%s",ast->v.name); break; case AST_CALL: fprintf(stream,"%s(",ast->c.func); for(int i=0;ic.nargs;i++){ if(i!=0)fprintf(stream,", "); ast_debug_(stream,ast->c.args[i],indent); } fputc(')',stream); break; case AST_IF: fprintf(stream,"if "); ast_debug_(stream,ast->i.cond,indent); fputc('\n',stream); indent++; INDENT ast_debug_(stream,ast->i.thenb,indent); indent--; if(ast->i.elseb){ fputc('\n',stream); INDENT fprintf(stream,"else\n"); indent++; INDENT ast_debug_(stream,ast->i.elseb,indent); indent--; } break; case AST_WHILE: assert(NOT_IMPLEMENTED); break; default: fprintf(stream,"AST_(??\?)"); break; } } void ast_debug(FILE *stream,const AST *ast){ ast_debug_(stream,ast,0); fputc('\n',stream); } void ast_free(AST *ast){ switch(ast->type){ case AST_BLOCK:{ for(int i=0;ib.len;i++)if(ast->b.exprs[i])ast_free(ast->b.exprs[i]); free(ast->b.exprs); break; } case AST_OP:{ if(ast->o.left)ast_free(ast->o.left); if(ast->o.right)ast_free(ast->o.right); break; } case AST_NUM: break; case AST_STR:{ if(ast->s.str)free(ast->s.str); break; } case AST_VAR:{ if(ast->v.name)free(ast->v.name); break; } case AST_CALL:{ if(ast->c.func)free(ast->c.func); for(int i=0;ic.nargs;i++)if(ast->c.args[i])ast_free(ast->c.args[i]); free(ast->c.args); break; } case AST_IF:{ if(ast->i.cond)free(ast->i.cond); if(ast->i.thenb)free(ast->i.thenb); if(ast->i.elseb)free(ast->i.elseb); break; } case AST_WHILE:{ if(ast->w.cond)free(ast->w.cond); if(ast->w.body)free(ast->w.body); break; } } free(ast); }