diff options
Diffstat (limited to 'ast.cpp')
-rw-r--r-- | ast.cpp | 244 |
1 files changed, 244 insertions, 0 deletions
@@ -0,0 +1,244 @@ +#include <stdexcept> +#include <cctype> +#include "ast.h" + +using namespace std; + + +ParseError::ParseError(const string &what_arg) + :runtime_error(what_arg){} +ParseError::ParseError(const char *what_arg) + :runtime_error(what_arg){} + + +Lambda::Lambda(){} +Lambda::Lambda(const Name &arg,const AST &ast) + :arg(arg),body(ast){} + + +AST::AST() + :type(Type::tuple){} + +AST AST::makeNumber(Number numval){ + AST ast; ast.type=AST::Type::number; ast.numval=numval; return ast; +} +AST AST::makeString(String strval){ + AST ast; ast.type=AST::Type::string; ast.strval=strval; return ast; +} +AST AST::makeName(Name nameval){ + AST ast; ast.type=AST::Type::name; ast.nameval=nameval; return ast; +} +AST AST::makeIndex(Index indexval){ + AST ast; ast.type=AST::Type::index; ast.indexval=indexval; return ast; +} +AST AST::makeTuple(Terms terms){ + AST ast; ast.type=AST::Type::tuple; ast.terms=terms; return ast; +} +AST AST::makeLambda(Lambda lambdaval){ + AST ast; ast.type=AST::Type::lambda; ast.lambdaval=lambdaval; return ast; +} +AST AST::makeLambda(const Name &arg,const AST &body){ + AST ast; ast.type=AST::Type::lambda; ast.lambdaval.arg=arg; ast.lambdaval.body=body; return ast; +} +AST AST::makeNative(const Native &native){ + AST ast; ast.type=AST::Type::native; ast.nativeval=native; return ast; +} + + +bool iswordchar(char c){ + return isalpha(c)||isdigit(c)||strchr("!$%&*+,-./:;<=>?@^_|~",c)!=nullptr; +} + +class AST::Tokeniser{ + const string source; + const i64 size; + i64 index=0,nextat=0; + +public: + Tokeniser(const string &source) + :source(source),size(source.size()){ + // cerr<<"Tokeniser: initialised with <<"<<source<<">>"<<endl; + advance(); + } + + bool eof() const { + return index==-1; + } + + string get(){ + if(index==-1)throw logic_error("Tokeniser::get() after last token"); + if(nextat==index)throw logic_error("Tokeniser::get() before advance()"); + // cerr<<"Tokeniser: got <<"<<source.substr(index,nextat-index)<<">>"<<endl; + // cerr<<" left: <<"<<source.substr(nextat)<<">>"<<endl; + return source.substr(index,nextat-index); + } + + bool advance(){ + index=nextat; + // cerr<<"advance: reading from <<"<<source.substr(index)<<">>"<<endl; + + do { + while(index<size&&isspace(source[index])){ + index++; + } + if(index==size){ + index=nextat=-1; + return false; + } + if(source[index]!='#')break; + while(index<size&&source[index]!='\n'){ + index++; + } + if(index==size){ + index=nextat=-1; + return false; + } + } while(isspace(source[index])||source[index]=='#'); + + if(strchr("'()\\",source[index])!=nullptr){ + nextat=index+1; + return true; + } else if(index<size-1&&source[index]=="λ"[0]&&source[index+1]=="λ"[1]){ + nextat=index+2; + return true; + } else if(isdigit(source[index])||(index<size-1&&source[index]=='-'&&isdigit(source[index+1]))){ + nextat=index+1; + while(nextat<size&&isdigit(source[nextat])){ + nextat++; + } + return true; + } else if(iswordchar(source[index])){ + nextat=index+1; + while(nextat<size&&iswordchar(source[nextat])){ + nextat++; + } + return true; + } else { + throw ParseError(string("Invalid token starting with '")+source[index]+"'"); + } + } +}; + +AST& AST::parse(AST::Tokeniser &tokeniser){ + if(tokeniser.eof()){ + throw ParseError("No tokens to parse"); + } + const string &token=tokeniser.get(); + if(token.size()==0){ + throw logic_error("Unexpected empty token from tokeniser"); + } + if(isdigit(token[0])||(token.size()>1&&token[0]=='-'&&isdigit(token[1]))){ + // cerr<<"AST::parse: type = number"<<endl; + type=Type::number; + numval=strtol(token.data(),NULL,10); + } else if(iswordchar(token[0])){ + // cerr<<"AST::parse: type = name"<<endl; + type=Type::name; + nameval=token; + } else if(token=="("){ + // cerr<<"AST::parse: type = tuple"<<endl; + type=Type::tuple; + while(true){ + if(!tokeniser.advance()){ + throw ParseError("List opened with '(' not closed"); + } + if(tokeniser.get()==")")break; + terms.push_back(AST().parse(tokeniser)); + } + } else if(token=="'"){ + if(!tokeniser.advance()){ + throw ParseError("Unexpected EOF after quote"); + } + parse(tokeniser); + quoted=true; + } else if(token=="\\"||token=="λ"){ + type=Type::lambda; + if(!tokeniser.advance()){ + throw ParseError("Unexpected EOF after '\\'"); + } + const string &arg=tokeniser.get(); + if(!iswordchar(arg[0])){ + throw ParseError("Name expected after '\\'"); + } + lambdaval.arg=arg; + if(!tokeniser.advance()){ + throw ParseError("Body expected after argument of '\\'"); + } + lambdaval.body=AST().parse(tokeniser); + } else if(token==")"){ + throw ParseError("Unexpected token '"+token+"'"); + } else { + throw logic_error("Unrecognised token from tokeniser: '"+token+"'"); + } + return *this; +} + +AST::AST(const string &source) + :AST(source.data()){}; + +AST::AST(const char *source){ + Tokeniser tokeniser(source); + parse(tokeniser); + if(tokeniser.advance()){ + throw ParseError("Trailing content in source"); + } +} + + +ostream& operator<<(ostream &os,const AST &ast){ + if(ast.quoted){ + os<<"'"; + } + + switch(ast.type){ + case AST::Type::number: + os<<ast.numval; + break; + + case AST::Type::string: + os<<'"'; + for(char c : ast.strval){ + switch(c){ + case 9: os<<"\\t"; break; + case 10: os<<"\\n"; break; + case 13: os<<"\\r"; break; + default: + if(c>=32&&c<=126){ + os<<c; + } else { + os<<"\\x" + <<"0123456789abcdef"[(unsigned char)c/16] + <<"0123456789abcdef"[(unsigned char)c%16]; + } + } + } + os<<'"'; + break; + + case AST::Type::name: + os<<ast.nameval; + break; + + case AST::Type::index: + os<<'['<<ast.indexval<<']'; + break; + + case AST::Type::tuple: + os<<'('; + for(i64 i=0;i<(i64)ast.terms.size();i++){ + if(i!=0)os<<' '; + os<<ast.terms[i]; + } + os<<')'; + break; + + case AST::Type::lambda: + os<<"\\"<<ast.lambdaval.arg<<' '<<*ast.lambdaval.body; + break; + + case AST::Type::native: + os<<"\\ [native]"; + break; + } + return os; +} |