#include #include #include #include #include "parser.h" #include "util.h" typedef struct Cursor{ const char *s; int l; } Cursor; typedef enum Tokentype{ TT_EOF, //str=NULL, len=-1 TT_SYMBOL, TT_WORD, TT_QUOTEDWORD, TT_NUMBER, TT_STRING, TT_ERR=-1 //str is an error description, len=-1 } Tokentype; typedef struct Token{ Tokentype type; const char *str; //pointer into source int len; } Token; #define SYMBOLCHARS "'()[]" static Token tt_make(Tokentype type,const char *str,int len){ Token tok={type,str,len}; return tok; } static Token tt_eof(void){ return tt_make(TT_EOF,NULL,-1); } static Token tt_err(const char *errstr){ return tt_make(TT_ERR,errstr,-1); } static void advance(Cursor *cursor,int n){ assert(cursor->l>=n); cursor->s+=n; cursor->l-=n; } static bool iswordchar(char c){ return strchr(SYMBOLCHARS,c)==NULL&&c>=33&&c<=126; } static Token nexttoken(Cursor *cursor){ while(cursor->l>=1&&isspace(*cursor->s))advance(cursor,1); if(cursor->l==0)return tt_eof(); bool acted; do { acted=false; if(*cursor->s==';'){ acted=true; int i; for(i=1;il;i++){ if(cursor->s[i]=='\n')break; } if(i>=cursor->l-1){ advance(cursor,cursor->l); return tt_eof(); } advance(cursor,i+1); } if(cursor->l>=4&&cursor->s[0]=='#'&&cursor->s[1]=='|'){ acted=true; int i; for(i=3;il;i++){ if(cursor->s[i-1]=='|'&&cursor->s[i]=='#')break; } if(i>=cursor->l-1){ advance(cursor,cursor->l); return tt_eof(); } advance(cursor,i+1); } } while(acted); while(cursor->l>=1&&isspace(*cursor->s))advance(cursor,1); if(cursor->l==0)return tt_eof(); if(strchr(SYMBOLCHARS,*cursor->s)!=NULL){ advance(cursor,1); return tt_make(TT_SYMBOL,cursor->s-1,1); } if(isdigit(*cursor->s)||(cursor->l>=2&&cursor->s[0]=='-'&&isdigit(cursor->s[1]))){ char *endp; strtod(cursor->s,&endp); assert(endp>cursor->s); int len=endp-cursor->s; advance(cursor,len); return tt_make(TT_NUMBER,cursor->s-len,len); } if(*cursor->s=='"'){ int i; for(i=1;il;i++){ if(cursor->s[i]=='"')break; if(cursor->s[i]=='\\')i++; } if(i==cursor->l){ return tt_err("Unclosed string in source"); } i++; advance(cursor,i); return tt_make(TT_STRING,cursor->s-i,i); } bool isquoted=false; if(*cursor->s=='\''){ isquoted=true; advance(cursor,1); if(cursor->l==0||!iswordchar(*cursor->s)){ return tt_err("Lone single quote in source"); } } int i; for(i=0;il;i++){ if(!iswordchar(cursor->s[i]))break; } if(i==0){ return tt_err("Unrecognised character while looking for next token"); } advance(cursor,i); return tt_make(isquoted?TT_QUOTEDWORD:TT_WORD,cursor->s-i,i); } static bool ishexdigit(char c){ return (c>='0'&&c<='9')||(c>='a'&&c<='f')||(c>='A'&&c<='F'); } static int hexnum(char c){ return c<='9'?c-'0':(c&~32)-'A'; } static ParseRet pr_ast(AST *ast){ ParseRet pr={ast,NULL}; return pr; } static ParseRet pr_err(char *errstr){ ParseRet pr={NULL,errstr}; return pr; } static ParseRet pr_err_c(const char *errstr){ return pr_err(copystring(errstr)); } static ParseRet parse_(Cursor *cursor){ Token tok=nexttoken(cursor); switch(tok.type){ case TT_EOF: return pr_err_c("Unexpected end-of-file"); case TT_SYMBOL:{ char closing; if(tok.len!=1)assert(false); if(tok.str[0]=='\''){ ParseRet pr=parse_(cursor); if(pr.errstr)return pr; if(pr.ast->type==AST_WORD){ char *word=pr.ast->wo.word; pr.ast->type=AST_SYMBOL; pr.ast->sy.name=word; pr.ast->sy.symid=-1; return pr; } return pr_ast(ast_quoted(pr.ast)); } if(tok.str[0]=='(')closing=')'; else if(tok.str[0]=='[')closing=']'; else if(tok.str[0]==')'||tok.str[0]==']'){ return pr_err_c("Unexpected closing paren in source"); } else assert(false); int sz=2,len=0; AST **nodes=malloc(sz,AST*); while(true){ Cursor cur2v=*cursor,*cur2=&cur2v; Token t=nexttoken(cur2); if(t.type==TT_SYMBOL&&t.len==1&&t.str[0]==closing){ *cursor=cur2v; break; } if(t.type==TT_EOF){ for(int i=0;i=2&&tok.str[0]=='"'&&tok.str[tok.len-1]=='"'); int len=0; for(int i=1;i=tok.len-3||!ishexdigit(tok.str[i+1])||!ishexdigit(tok.str[i+2])){ return pr_err_c("\"\\x\" in string needs two hexadecimal digits"); } i+=2; } } len++; } char *buf=malloc(len==0?1:len,char); int j=0; for(int i=1;i