#include #include #include #include #include #include #include "parser.h" using namespace std; #define DEBUG cerr<<'['<<__FILE__<<':'<<__LINE__<<"] " template static ostream& operator<<(ostream &os,const vector &v){ os<<'{'; bool first=true; for(const T &t : v){ if(!first)os<<", "; else first=false; os< tok_symbols={ "==", "!=", ">", "<", ">=", "<=", ":=", "=", "+", "-", "*", "/", "%", "(", ")", ",", "{", "}", "?", "??", }; static bool isSymbolPrefix(const string &s){ for(const string &sym : tok_symbols){ if(s.size()<=sym.size()&&sym.substr(0,s.size())==s)return true; } return false; } template static bool contains(const vector &v,const T &target){ for(const T &t : v){ if(t==target)return true; } return false; } class Token{ public: enum class Type{ word, number, string, symbol, terminator, }; Type type; string str; Site site; Token(Type type,const string &str,const Site &site) :type(type),str(str),site(site){} }; class Tokeniser{ const string &source; const string &filename; i64 idx,nextidx; i64 lnum,linex; Token::Type ttype; /*struct State{ i64 idx,nextidx; i64 lnum,lineidx; Token::Type ttype; }; stack statestack;*/ bool eof(i64 at){ return at>=(i64)source.size(); } string get_() const { if(eof())throw runtime_error("Tokeniser::get() on eof"); if(nextidx==-1)throw runtime_error("Tokeniser::get() before advance"); if(nextidx==-2)throw runtime_error("Tokeniser::get() after eof"); assert(nextidx>=0); return source.substr(idx,nextidx-idx); } public: Tokeniser(const string &source,const string &filename) :source(source),filename(filename), idx(0),nextidx(-1), lnum(1),linex(1){} Tokeniser& operator=(const Tokeniser &other){ if(&source!=&other.source||&filename!=&other.filename){ throw runtime_error("Tokeniser::operator= on incompatible Tokeniser"); } idx=other.idx; nextidx=other.nextidx; lnum=other.lnum; linex=other.linex; ttype=other.ttype; return *this; } Site site() const { return Site(filename,lnum,linex); } /*void save(){ statestack.push({idx,nextidx,lnum,lineidx,ttype}); } void restore(){ if(statestack.size()==0)throw runtime_error("Tokeniser::restore() on empty stack"); const State &st=statestack.top(); idx=st.idx; nextidx=st.nextidx; lnum=st.lnum; linex=st.linex; ttype=st.ttype; statestack.pop(); } void discardstate(){ if(statestack.size()==0)throw runtime_error("Tokeniser::discardstate() on empty stack"); statestack.pop(); }*/ bool eof() const { return idx>=(i64)source.size(); } Token get(){ return Token(ttype,get_(),site()); } // Returns whether there are more tokens bool advance(){ if(eof())return false; // Let nextidx catch up with idx while(idx optable={ {"*", {"*", 6,Associativity::left}}, {"/", {"/", 6,Associativity::left}}, {"%", {"%", 6,Associativity::left}}, {"+", {"+", 5,Associativity::left}}, {"-", {"-", 5,Associativity::left}}, {"==",{"==",3,Associativity::left}}, {"!=",{"!=",3,Associativity::left}}, {">", {">", 3,Associativity::left}}, {"<", {"<", 3,Associativity::left}}, {">=",{">=",3,Associativity::left}}, {"<=",{"<=",3,Associativity::left}}, }; static char unhexchar(char c){ if(c>='0'&&c<='9')return c-'0'; if(c>='a'&&c<='f')return c-'a'+10; if(c>='A'&&c<='F')return c-'A'+10; return (char)-1; } static string parseString(const string &repr,Site site){ if(repr.size()<2||repr[0]!='"'||repr.back()!='"')throw runtime_error("String not surrounded with quotes"); string res; res.reserve(repr.size()+3); for(i64 i=1;i<(i64)repr.size()-1;i++){ if(repr[i]=='\\'){ switch(repr[i+1]){ case 'n': res+='\n'; i++; break; case 'r': res+='\r'; i++; break; case 't': res+='\t'; i++; break; case '"': res+='"'; i++; break; case 'x':{ if(i+3>=(i64)repr.size()-1)throw ParseError(site.addX(i),"Invalid hexadecimal escape"); char c1=unhexchar(repr[i+2]); char c2=unhexchar(repr[i+3]); if(c1==(char)-1||c2==(char)-1)throw ParseError(site.addX(i),"Invalid hexadecimal escape"); res+=(char)(16*c1+c2); i+=3; break; } default: throw ParseError(site.addX(i),"Invalid hexadecimal escape"); } } else { res+=repr[i]; } } return res; } static Expression parseExpression(Tokeniser &tokeniser,int minprec=-1); static StatementList parseScopeDef(Tokeniser &tokeniser); static vector parseArgumentList(Tokeniser &tokeniser){ if(tokeniser.eof())throw ParseError(tokeniser.site(),"Expected argument list but found EOF"); Token tok=tokeniser.get(); if(tok.type!=Token::Type::symbol||tok.str!="("){ throw ParseError(tok.site,"Expected argument list but found '"+tok.str+"'"); } tokeniser.advance(); vector args; while(true){ Expression expr=parseExpression(tokeniser); if(tokeniser.eof()){ throw ParseError(tokeniser.site(),"Expected ')' or ',' after argument but found EOF"); } tok=tokeniser.get(); if(tok.type!=Token::Type::symbol||(tok.str!=")"&&tok.str!=",")){ throw ParseError(tok.site,"Expected ')' or ',' after argument but found something else"); } tokeniser.advance(); args.push_back(expr); if(tok.str==")")break; } return args; } static vector parseArgumentNameList(Tokeniser &tokeniser){ if(tokeniser.eof())throw ParseError(tokeniser.site(),"Expected argument name list but found EOF"); Token tok=tokeniser.get(); if(tok.type!=Token::Type::symbol||tok.str!="("){ throw ParseError(tok.site,"Expected argument name list but found '"+tok.str+"'"); } tokeniser.advance(); vector args; while(true){ tok=tokeniser.get(); if(tok.type!=Token::Type::word){ throw ParseError(tok.site,"Expected argument name but found something else"); } Name name=tok.str; tokeniser.advance(); if(tokeniser.eof()){ throw ParseError(tokeniser.site(),"Expected ')' or ',' after argument name but found EOF"); } tok=tokeniser.get(); if(tok.type!=Token::Type::symbol||(tok.str!=")"&&tok.str!=",")){ throw ParseError(tok.site,"Expected ')' or ',' after argument name but found something else"); } tokeniser.advance(); args.push_back(name); if(tok.str==")")break; } return args; } static Expression parseAtom(Tokeniser &tokeniser){ if(tokeniser.eof())throw ParseError(tokeniser.site(),"Expected atom but found EOF"); Token tok=tokeniser.get(); switch(tok.type){ case Token::Type::word:{ tokeniser.advance(); if(tokeniser.eof()){ if(tok.str=="if")throw ParseError(tok.site,"Expected expressions after 'if' but found EOF"); Expression expr=Expression(Expression::Type::call,tok.str,vector()); expr.site=tok.site; return expr; } if(tok.str=="if"){ Expression cond=parseExpression(tokeniser); if(tokeniser.eof())throw ParseError(tokeniser.site(),"Expected 'then' but found EOF"); Token tok2=tokeniser.get(); if(tok2.type!=Token::Type::word||tok2.str!="then"){ throw ParseError(tok2.site,"Expected 'then' but got '"+tok2.str+"'"); } tokeniser.advance(); Expression ex1=parseExpression(tokeniser); if(tokeniser.eof())throw ParseError(tokeniser.site(),"Expected 'else' but found EOF"); tok2=tokeniser.get(); if(tok2.type!=Token::Type::word||tok2.str!="else"){ throw ParseError(tok2.site,"Expected 'else' but got '"+tok2.str+"'"); } tokeniser.advance(); Expression ex2=parseExpression(tokeniser); return Expression(Expression::Type::cond,{cond,ex1,ex2}); } Token tok2=tokeniser.get(); if(tok2.type==Token::Type::symbol&&tok2.str=="("){ vector args=parseArgumentList(tokeniser); bool done=false; if(tokeniser.eof())done=true; else { tok2=tokeniser.get(); if(tok2.type!=Token::Type::symbol||tok2.str!="{")done=true; } if(done){ Expression expr=Expression(Expression::Type::call,tok.str,args); expr.site=tok.site; return expr; } return Expression(Expression::Type::dive,tok.str,args, ScopeDef(ScopeDef::Type::direct,parseScopeDef(tokeniser),{})); } else if(tok2.type==Token::Type::symbol&&tok2.str=="{"){ return Expression(Expression::Type::dive,tok.str,{}, ScopeDef(ScopeDef::Type::direct,parseScopeDef(tokeniser),{})); } else { Expression expr=Expression(Expression::Type::call,tok.str,vector()); expr.site=tok.site; return expr; } } case Token::Type::number:{ tokeniser.advance(); Expression expr=Expression(Expression::Type::number,strtod(tok.str.data(),nullptr)); expr.site=tok.site; return expr; } case Token::Type::string:{ tokeniser.advance(); Expression expr=Expression(Expression::Type::string,parseString(tok.str,tok.site)); expr.site=tok.site; return expr; } case Token::Type::symbol:{ if(tok.str=="("){ tokeniser.advance(); Expression expr=parseExpression(tokeniser); if(tokeniser.eof())throw ParseError(tokeniser.site(),"Expected ')' but found EOF"); Token tok2=tokeniser.get(); if(tok2.type!=Token::Type::symbol||tok2.str!=")"){ throw ParseError(tok2.site,"Expected ')' but found something else"); } tokeniser.advance(); return expr; } ScopeDef::Type sctype; if(tok.str=="?")sctype=ScopeDef::Type::lazy; else if(tok.str=="??")sctype=ScopeDef::Type::function; else if(tok.str!="{"){ throw ParseError(tok.site,"Unexpected token '"+tok.str+"' in expression atom position"); } else sctype=ScopeDef::Type::direct; vector args; if(sctype!=ScopeDef::Type::direct){ tokeniser.advance(); if(tokeniser.eof()){ throw ParseError(tokeniser.site(),"Expected scope after '"+tok.str+"' but found EOF"); } Token tok2=tokeniser.get(); if(tok2.type!=Token::Type::symbol){ throw ParseError(tok2.site,"Expected '(' or '{' after '"+tok.str+"'"); } if(tok2.type==Token::Type::symbol&&tok2.str=="("){ args=parseArgumentNameList(tokeniser); } } // DEBUG<<"args: "<second; if(op.prec