summaryrefslogtreecommitdiff
path: root/ast.cpp
diff options
context:
space:
mode:
authortomsmeding <tom.smeding@gmail.com>2016-11-20 11:27:07 +0100
committertomsmeding <tom.smeding@gmail.com>2016-11-20 11:27:07 +0100
commit3b390967e7c2ee4ac6d1a67c77f40ed43005e012 (patch)
tree4be72c3ed32277329c472c1dc72793577ea29195 /ast.cpp
Initial
Diffstat (limited to 'ast.cpp')
-rw-r--r--ast.cpp244
1 files changed, 244 insertions, 0 deletions
diff --git a/ast.cpp b/ast.cpp
new file mode 100644
index 0000000..88d716a
--- /dev/null
+++ b/ast.cpp
@@ -0,0 +1,244 @@
+#include <stdexcept>
+#include <cctype>
+#include "ast.h"
+
+using namespace std;
+
+
+ParseError::ParseError(const string &what_arg)
+ :runtime_error(what_arg){}
+ParseError::ParseError(const char *what_arg)
+ :runtime_error(what_arg){}
+
+
+Lambda::Lambda(){}
+Lambda::Lambda(const Name &arg,const AST &ast)
+ :arg(arg),body(ast){}
+
+
+AST::AST()
+ :type(Type::tuple){}
+
+AST AST::makeNumber(Number numval){
+ AST ast; ast.type=AST::Type::number; ast.numval=numval; return ast;
+}
+AST AST::makeString(String strval){
+ AST ast; ast.type=AST::Type::string; ast.strval=strval; return ast;
+}
+AST AST::makeName(Name nameval){
+ AST ast; ast.type=AST::Type::name; ast.nameval=nameval; return ast;
+}
+AST AST::makeIndex(Index indexval){
+ AST ast; ast.type=AST::Type::index; ast.indexval=indexval; return ast;
+}
+AST AST::makeTuple(Terms terms){
+ AST ast; ast.type=AST::Type::tuple; ast.terms=terms; return ast;
+}
+AST AST::makeLambda(Lambda lambdaval){
+ AST ast; ast.type=AST::Type::lambda; ast.lambdaval=lambdaval; return ast;
+}
+AST AST::makeLambda(const Name &arg,const AST &body){
+ AST ast; ast.type=AST::Type::lambda; ast.lambdaval.arg=arg; ast.lambdaval.body=body; return ast;
+}
+AST AST::makeNative(const Native &native){
+ AST ast; ast.type=AST::Type::native; ast.nativeval=native; return ast;
+}
+
+
+bool iswordchar(char c){
+ return isalpha(c)||isdigit(c)||strchr("!$%&*+,-./:;<=>?@^_|~",c)!=nullptr;
+}
+
+class AST::Tokeniser{
+ const string source;
+ const i64 size;
+ i64 index=0,nextat=0;
+
+public:
+ Tokeniser(const string &source)
+ :source(source),size(source.size()){
+ // cerr<<"Tokeniser: initialised with <<"<<source<<">>"<<endl;
+ advance();
+ }
+
+ bool eof() const {
+ return index==-1;
+ }
+
+ string get(){
+ if(index==-1)throw logic_error("Tokeniser::get() after last token");
+ if(nextat==index)throw logic_error("Tokeniser::get() before advance()");
+ // cerr<<"Tokeniser: got <<"<<source.substr(index,nextat-index)<<">>"<<endl;
+ // cerr<<" left: <<"<<source.substr(nextat)<<">>"<<endl;
+ return source.substr(index,nextat-index);
+ }
+
+ bool advance(){
+ index=nextat;
+ // cerr<<"advance: reading from <<"<<source.substr(index)<<">>"<<endl;
+
+ do {
+ while(index<size&&isspace(source[index])){
+ index++;
+ }
+ if(index==size){
+ index=nextat=-1;
+ return false;
+ }
+ if(source[index]!='#')break;
+ while(index<size&&source[index]!='\n'){
+ index++;
+ }
+ if(index==size){
+ index=nextat=-1;
+ return false;
+ }
+ } while(isspace(source[index])||source[index]=='#');
+
+ if(strchr("'()\\",source[index])!=nullptr){
+ nextat=index+1;
+ return true;
+ } else if(index<size-1&&source[index]=="λ"[0]&&source[index+1]=="λ"[1]){
+ nextat=index+2;
+ return true;
+ } else if(isdigit(source[index])||(index<size-1&&source[index]=='-'&&isdigit(source[index+1]))){
+ nextat=index+1;
+ while(nextat<size&&isdigit(source[nextat])){
+ nextat++;
+ }
+ return true;
+ } else if(iswordchar(source[index])){
+ nextat=index+1;
+ while(nextat<size&&iswordchar(source[nextat])){
+ nextat++;
+ }
+ return true;
+ } else {
+ throw ParseError(string("Invalid token starting with '")+source[index]+"'");
+ }
+ }
+};
+
+AST& AST::parse(AST::Tokeniser &tokeniser){
+ if(tokeniser.eof()){
+ throw ParseError("No tokens to parse");
+ }
+ const string &token=tokeniser.get();
+ if(token.size()==0){
+ throw logic_error("Unexpected empty token from tokeniser");
+ }
+ if(isdigit(token[0])||(token.size()>1&&token[0]=='-'&&isdigit(token[1]))){
+ // cerr<<"AST::parse: type = number"<<endl;
+ type=Type::number;
+ numval=strtol(token.data(),NULL,10);
+ } else if(iswordchar(token[0])){
+ // cerr<<"AST::parse: type = name"<<endl;
+ type=Type::name;
+ nameval=token;
+ } else if(token=="("){
+ // cerr<<"AST::parse: type = tuple"<<endl;
+ type=Type::tuple;
+ while(true){
+ if(!tokeniser.advance()){
+ throw ParseError("List opened with '(' not closed");
+ }
+ if(tokeniser.get()==")")break;
+ terms.push_back(AST().parse(tokeniser));
+ }
+ } else if(token=="'"){
+ if(!tokeniser.advance()){
+ throw ParseError("Unexpected EOF after quote");
+ }
+ parse(tokeniser);
+ quoted=true;
+ } else if(token=="\\"||token=="λ"){
+ type=Type::lambda;
+ if(!tokeniser.advance()){
+ throw ParseError("Unexpected EOF after '\\'");
+ }
+ const string &arg=tokeniser.get();
+ if(!iswordchar(arg[0])){
+ throw ParseError("Name expected after '\\'");
+ }
+ lambdaval.arg=arg;
+ if(!tokeniser.advance()){
+ throw ParseError("Body expected after argument of '\\'");
+ }
+ lambdaval.body=AST().parse(tokeniser);
+ } else if(token==")"){
+ throw ParseError("Unexpected token '"+token+"'");
+ } else {
+ throw logic_error("Unrecognised token from tokeniser: '"+token+"'");
+ }
+ return *this;
+}
+
+AST::AST(const string &source)
+ :AST(source.data()){};
+
+AST::AST(const char *source){
+ Tokeniser tokeniser(source);
+ parse(tokeniser);
+ if(tokeniser.advance()){
+ throw ParseError("Trailing content in source");
+ }
+}
+
+
+ostream& operator<<(ostream &os,const AST &ast){
+ if(ast.quoted){
+ os<<"'";
+ }
+
+ switch(ast.type){
+ case AST::Type::number:
+ os<<ast.numval;
+ break;
+
+ case AST::Type::string:
+ os<<'"';
+ for(char c : ast.strval){
+ switch(c){
+ case 9: os<<"\\t"; break;
+ case 10: os<<"\\n"; break;
+ case 13: os<<"\\r"; break;
+ default:
+ if(c>=32&&c<=126){
+ os<<c;
+ } else {
+ os<<"\\x"
+ <<"0123456789abcdef"[(unsigned char)c/16]
+ <<"0123456789abcdef"[(unsigned char)c%16];
+ }
+ }
+ }
+ os<<'"';
+ break;
+
+ case AST::Type::name:
+ os<<ast.nameval;
+ break;
+
+ case AST::Type::index:
+ os<<'['<<ast.indexval<<']';
+ break;
+
+ case AST::Type::tuple:
+ os<<'(';
+ for(i64 i=0;i<(i64)ast.terms.size();i++){
+ if(i!=0)os<<' ';
+ os<<ast.terms[i];
+ }
+ os<<')';
+ break;
+
+ case AST::Type::lambda:
+ os<<"\\"<<ast.lambdaval.arg<<' '<<*ast.lambdaval.body;
+ break;
+
+ case AST::Type::native:
+ os<<"\\ [native]";
+ break;
+ }
+ return os;
+}