From 76500bc57fa7d27c73905739a12520706f817534 Mon Sep 17 00:00:00 2001 From: tomsmeding Date: Sat, 4 Feb 2017 23:26:05 +0100 Subject: Parses list.squig --- ast.cpp | 207 +++++++++++++++++++++++++++++++++++++ ast.h | 31 ++++-- list.squig | 62 +++++------ main.cpp | 41 +++++++- parser.cpp | 339 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- parser.h | 1 + 6 files changed, 618 insertions(+), 63 deletions(-) diff --git a/ast.cpp b/ast.cpp index e69de29..d1742e2 100644 --- a/ast.cpp +++ b/ast.cpp @@ -0,0 +1,207 @@ +#include "ast.h" + +using namespace std; + + +static int indent_level_index=ios_base::xalloc(); + + +namespace std{ + string to_string(Expression::Type type){ + switch(type){ + case Expression::Type::binop: return "binop"; + case Expression::Type::unop: return "unop"; + case Expression::Type::call: return "call"; + case Expression::Type::dive: return "dive"; + case Expression::Type::number: return "number"; + case Expression::Type::string: return "string"; + case Expression::Type::scope: return "scope"; + default: return to_string((int)type); + } + } + + string to_string(Statement::Type type){ + switch(type){ + case Statement::Type::create: return "create"; + case Statement::Type::assign: return "assign"; + case Statement::Type::expression: return "expression"; + default: return to_string((int)type); + } + } +} + + +Site::Site():lnum(-1),linex(-1){} +Site::Site(const string &filename,i64 lnum,i64 linex) + :filename(filename),lnum(lnum),linex(linex){} + +Site Site::addX(i64 dx){ + return Site(filename,lnum,linex+dx); +} + +ostream& operator<<(ostream &os,Site site){ + return os< &args) + :type(type),body(body),args(args){} +/*Scope::Scope(Type type,const StatementList &body,const vector &nameargs) + :type(type),body(body){ + for(const Name &n : nameargs){ + args.emplace_back(Expression::Type::call,n,vector()); + } +}*/ + +string indent(int amount){ + return string(amount*4,' '); +} + +ostream& operator<<(ostream &os,const Scope &scope){ + switch(scope.type){ + case Scope::Type::direct: break; + case Scope::Type::lazy: os<<'?'; break; + case Scope::Type::function: os<<"??"; break; + } + if(scope.args.size()!=0){ + os<<'('; + bool first=true; + for(const Expression &ex : scope.args){ + if(!first)os<<", "; + else first=false; + os< &args) + :type(type),name(name),args(args){ + if(type!=Type::binop&&type!=Type::call){ + throw runtime_error("Expression(type,name,args) called with invalid type "+to_string(type)); + } +} +Expression::Expression(Type type,const Name &name,const Expression &arg) + :type(type),name(name),args({arg}){ + if(type!=Type::unop){ + throw runtime_error("Expression(type,name,arg) called with invalid type "+to_string(type)); + } +} +Expression::Expression(Type type,const Name &name,const vector &args,const Scope &scope) + :type(type),name(name),args(args),scope(scope){ + if(type!=Type::dive){ + throw runtime_error("Expression(type,name,args,scope) called with invalid type "+to_string(type)); + } +} +Expression::Expression(Type type,double numval) + :type(type),numval(numval){ + if(type!=Type::number){ + throw runtime_error("Expression(type,numval) called with invalid type "+to_string(type)); + } +} +Expression::Expression(Type type,const string &strval) + :type(type),strval(strval){ + if(type!=Type::string){ + throw runtime_error("Expression(type,strval) called with invalid type "+to_string(type)); + } +} +Expression::Expression(Type type,const Scope &scope) + :type(type),scope(scope){ + if(type!=Type::scope){ + throw runtime_error("Expression(type,scope) called with invalid type "+to_string(type)); + } +} +Expression::Expression(Type type,const vector &args) + :type(type),args(args){ + if(type!=Type::cond){ + throw runtime_error("Expression(type,args) called with invalid type "+to_string(type)); + } +} + +ostream& operator<<(ostream &os,const Expression &expr){ + switch(expr.type){ + case Expression::Type::binop: + return os<<'('< #include #include #include "global.h" @@ -8,6 +9,7 @@ using namespace std; class Statement; +class Expression; using Name = string; using StatementList = vector; @@ -18,37 +20,45 @@ public: Site(); Site(const string &filename,i64 lnum,i64 linex); + + Site addX(i64 dx); }; +ostream& operator<<(ostream &os,Site site); + class Scope{ public: enum class Type{ direct, lazy, function, - async, + // async, }; Type type; StatementList body; - vector args; + vector args; Site site; Scope(); - Scope(Type type,const StatementList &body,const vector &args); + Scope(Type type,const StatementList &body,const vector &args); + // Scope(Type type,const StatementList &body,const vector &args); }; +ostream& operator<<(ostream &os,const Scope &scope); + class Expression{ public: enum class Type{ binop, // name, args[0], args[1] unop, // name, args[0] - call, // name, args - dive, // name, args, body + call, // name, args -- variable reference when args={} + dive, // name, args, scope -- variable dive when args={} number, // numval string, // strval scope, // scope + cond, // args[0] (cond), args[1] (then), args[2] (else) }; Type type; @@ -57,19 +67,21 @@ public: double numval; string strval; Scope scope; - StatementList body; Site site; - Expression(); + // Expression(); Expression(Type type,const Name &name,const vector &args); // binop, call Expression(Type type,const Name &name,const Expression &arg); // unop - Expression(Type type,const Name &name,const vector &args,const StatementList &body); // dive + Expression(Type type,const Name &name,const vector &args,const Scope &scope); // dive Expression(Type type,double numval); // number Expression(Type type,const string &strval); // string Expression(Type type,const Scope &scope); // scope + Expression(Type type,const vector &args); // cond }; +ostream& operator<<(ostream &os,const Expression &expr); + class Statement{ public: enum class Type{ @@ -88,3 +100,6 @@ public: Statement(Type type,const Name &dstvar,const Expression &expr); // create, assign Statement(Type type,const Expression &expr); // expr }; + +ostream& operator<<(ostream &os,const Statement &st); +ostream& operator<<(ostream &os,const StatementList &stl); diff --git a/list.squig b/list.squig index ea0d333..b08dafd 100644 --- a/list.squig +++ b/list.squig @@ -1,11 +1,13 @@ -new_list := ??{ - front := nil - back := nil +list_new := ??{ + x := { + front := nil + back := nil + } } list_push_front := ??(list, item){ list { - if front == nil { + if front == nil then { front = { value := item next := nil @@ -24,7 +26,7 @@ list_push_front := ??(list, item){ list_push_back := ??(list, item){ list { - if back == nil { + if back == nil then { front = { value := item next := nil @@ -44,67 +46,59 @@ list_push_back := ??(list, item){ list_pop_front := ??(list){ x := nil list { - if front == nil { + if front == nil then { throw_error("Call to 'list_pop_front' on empty list") - } + } else {} front { x = value front = next } - if front == nil { + if front == nil then { back = nil - } + } else {} } } list_pop_back := ??(list){ x := nil list { - if back == nil { + if back == nil then { throw_error("Call to 'list_pop_back' on empty list") - } + } else {} back { x = value back = prev } - if back == nil { + if back == nil then { front = nil - } + } else {} } } list_get := nil { get_helper := ??(front, idx){ - if front == nil { + if front == nil then { throw_error("Index past end of list in 'list_get'") - } + } else {} x := nil - if idx == 0 { + if idx == 0 then { front { x = value } } else { front { - found := nil - get_helper(next, idx - 1){ - found = x - } - x = found + x = get_helper(next, idx - 1) } } } list_get = ??(list, idx){ - if idx < 0 { + if idx < 0 then { throw_error("Negative index in 'list_get'") - } + } else {} x := nil list { - found := nil - get_helper(front, idx){ - found = x - } - x = found + x = get_helper(front, idx) } } } @@ -112,23 +106,23 @@ list_get := nil list_set := nil { set_helper := ??(front, idx, val){ - if front == nil { + if front == nil then { throw_error("Index past end of list in 'list_set'") - } - if idx == 0 { + } else {} + if idx == 0 then { front { value = val } } else { front { - set_helper(next, idx - 1, val){} + set_helper(next, idx - 1, val) } } } list_set = ??(list, idx, val){ - if idx < 0 { + if idx < 0 then { throw_error("Negative index in 'list_set'") - } + } else {} list { set_helper(front, idx, val) } diff --git a/main.cpp b/main.cpp index b28e446..f8b9681 100644 --- a/main.cpp +++ b/main.cpp @@ -1,11 +1,48 @@ #include +#include +#include #include "parser.h" using namespace std; +string readstream(istream &in){ + string res; + char buf[4096]; + while(true){ + in.read(buf,sizeof(buf)); + if(in.gcount()==0)break; + res.append(buf,in.gcount()); + } + return res; +} + int main(int argc,char **argv){ - (void)argc; - (void)argv; + string source,filename; + bool fromstdin=false; + if(argc==1)fromstdin=true; + else if(argc==2){ + if(strcmp(argv[1],"-")==0)fromstdin=true; + else filename=argv[1]; + } else { + cerr<<"Pass source on stdin, or filename as argument (or '-' for stdin)"< #include +#include #include #include #include @@ -8,10 +9,27 @@ using namespace std; +#define DEBUG cerr<<'['<<__FILE__<<':'<<__LINE__<<"] " + +template +static ostream& operator<<(ostream &os,const vector &v){ + os<<'{'; + bool first=true; + for(const T &t : v){ + if(!first)os<<", "; + else first=false; + os< tok_symbols={ ":=", "=", "+", "-", "*", "/", "%", "(", ")", ",", - "{", "}", "?{", "??{", "}&", + "{", "}", "?", "??", }; static bool isSymbolPrefix(const string &s){ @@ -108,6 +126,10 @@ public: return *this; } + Site site() const { + return Site(filename,lnum,linex); + } + /*void save(){ statestack.push({idx,nextidx,lnum,lineidx,ttype}); } @@ -131,7 +153,7 @@ public: } Token get(){ - return Token(ttype,get_(),Site(filename,lnum,linex)); + return Token(ttype,get_(),site()); } // Returns whether there are more tokens @@ -144,7 +166,7 @@ public: lnum++; linex=1; } else { - linex++; + linex+=1+3*(source[idx]=='\t'); } idx++; } @@ -158,7 +180,7 @@ public: ttype=Token::Type::terminator; return true; } - linex++; + linex+=1+3*(source[idx]=='\t'); idx++; } if(eof())return false; @@ -200,7 +222,7 @@ public: if(source[nextidx]=='.'){ nextidx++; if(eof(nextidx)||!isdigit(source[nextidx])){ - throw ParseError("Incomplete floating point literal at EOF"); + throw ParseError(site(),"Incomplete floating point literal at EOF"); } while(!eof(nextidx)&&isdigit(source[nextidx]))nextidx++; if(eof(nextidx))return true; @@ -208,11 +230,11 @@ public: if(strchr("eE",source[nextidx])!=NULL){ nextidx++; if(eof(nextidx)||strchr("+-0123456789",source[nextidx])==NULL){ - throw ParseError("Incomplete floating point literal at EOF"); + throw ParseError(site(),"Incomplete floating point literal at EOF"); } if(strchr("+-",source[nextidx])!=NULL){ nextidx++; - if(eof(nextidx))throw ParseError("Incomplete floating point literal at EOF"); + if(eof(nextidx))throw ParseError(site(),"Incomplete floating point literal at EOF"); } while(!eof(nextidx)&&isdigit(source[nextidx]))nextidx++; } @@ -225,8 +247,9 @@ public: nextidx++; while(!eof(nextidx)&&source[nextidx]!='"'){ if(source[nextidx]=='\\')nextidx++; + nextidx++; } - if(eof(nextidx))throw ParseError("Incomplete string literal at EOF"); + if(eof(nextidx))throw ParseError(site(),"Incomplete string literal at EOF"); nextidx++; return true; } @@ -236,23 +259,266 @@ public: ttype=Token::Type::symbol; nextidx++; while(!eof(nextidx)){ - if(!isSymbolPrefix(get_()))return true; + if(!isSymbolPrefix(get_())){ + nextidx--; + return true; + } nextidx++; } + nextidx--; if(contains(tok_symbols,get_()))return true; - else throw ParseError("Unknown symbol at EOF"); + else throw ParseError(site(),"Unknown symbol at EOF"); } - throw ParseError("Unknown token starting at '"+source.substr(idx,5)+"'"); + throw ParseError(site(),"Unknown token starting at '"+source.substr(idx,5)+"'"); } }; -static Expression parseExpression(Tokeniser &tokeniser){ - ; +enum class Associativity{ + left, + right, +}; + +struct OpInfo{ + string name; + int prec; //higher is tighter-binding + Associativity assoc; +}; + +unordered_map optable={ + {"*", {"*", 6,Associativity::left}}, + {"/", {"/", 6,Associativity::left}}, + {"%", {"%", 6,Associativity::left}}, + + {"+", {"+", 5,Associativity::left}}, + {"-", {"-", 5,Associativity::left}}, + + {"==",{"==",3,Associativity::left}}, + {"!=",{"!=",3,Associativity::left}}, + {">", {">", 3,Associativity::left}}, + {"<", {"<", 3,Associativity::left}}, + {">=",{">=",3,Associativity::left}}, + {"<=",{"<=",3,Associativity::left}}, +}; + +static char unhexchar(char c){ + if(c>='0'&&c<='9')return c-'0'; + if(c>='a'&&c<='f')return c-'a'+10; + if(c>='A'&&c<='F')return c-'A'+10; + return (char)-1; +} + +static string parseString(const string &repr,Site site){ + if(repr.size()<2||repr[0]!='"'||repr.back()!='"')throw runtime_error("String not surrounded with quotes"); + string res; + res.reserve(repr.size()+3); + for(i64 i=1;i<(i64)repr.size()-1;i++){ + if(repr[i]=='\\'){ + switch(repr[i+1]){ + case 'n': res+='\n'; i++; break; + case 'r': res+='\r'; i++; break; + case 't': res+='\t'; i++; break; + case '"': res+='"'; i++; break; + case 'x':{ + if(i+3>=(i64)repr.size()-1)throw ParseError(site.addX(i),"Invalid hexadecimal escape"); + char c1=unhexchar(repr[i+2]); + char c2=unhexchar(repr[i+3]); + if(c1==(char)-1||c2==(char)-1)throw ParseError(site.addX(i),"Invalid hexadecimal escape"); + res+=(char)(16*c1+c2); + i+=3; + break; + } + default: + throw ParseError(site.addX(i),"Invalid hexadecimal escape"); + } + } else { + res+=repr[i]; + } + } + return res; +} + +static Expression parseExpression(Tokeniser &tokeniser,int minprec=-1); +static StatementList parseScope(Tokeniser &tokeniser); + +static vector parseArgumentList(Tokeniser &tokeniser){ + if(tokeniser.eof())throw ParseError(tokeniser.site(),"Expected argument list but found EOF"); + Token tok=tokeniser.get(); + if(tok.type!=Token::Type::symbol||tok.str!="("){ + throw ParseError(tok.site,"Expected argument list but found '"+tok.str+"'"); + } + tokeniser.advance(); + vector args; + while(true){ + Expression expr=parseExpression(tokeniser); + if(tokeniser.eof()){ + throw ParseError(tokeniser.site(),"Expected ')' or ',' after argument but found EOF"); + } + tok=tokeniser.get(); + if(tok.type!=Token::Type::symbol||(tok.str!=")"&&tok.str!=",")){ + throw ParseError(tok.site,"Expected ')' or ',' after argument but found something else"); + } + tokeniser.advance(); + args.push_back(expr); + if(tok.str==")")break; + } + return args; +} + +static Expression parseAtom(Tokeniser &tokeniser){ + if(tokeniser.eof())throw ParseError(tokeniser.site(),"Expected atom but found EOF"); + Token tok=tokeniser.get(); + switch(tok.type){ + case Token::Type::word:{ + tokeniser.advance(); + if(tokeniser.eof()){ + if(tok.str=="if")throw ParseError(tok.site,"Expected expressions after 'if' but found EOF"); + Expression expr=Expression(Expression::Type::call,tok.str,vector()); + expr.site=tok.site; + return expr; + } + if(tok.str=="if"){ + Expression cond=parseExpression(tokeniser); + if(tokeniser.eof())throw ParseError(tokeniser.site(),"Expected 'then' but found EOF"); + Token tok2=tokeniser.get(); + if(tok2.type!=Token::Type::word||tok2.str!="then"){ + throw ParseError(tok2.site,"Expected 'then' but got '"+tok2.str+"'"); + } + Expression ex1=parseExpression(tokeniser); + if(tokeniser.eof())throw ParseError(tokeniser.site(),"Expected 'else' but found EOF"); + tok2=tokeniser.get(); + if(tok2.type!=Token::Type::word||tok2.str!="else"){ + throw ParseError(tok2.site,"Expected 'else' but got '"+tok2.str+"'"); + } + Expression ex2=parseExpression(tokeniser); + + return Expression(Expression::Type::cond,{cond,ex1,ex2}); + } + + Token tok2=tokeniser.get(); + if(tok2.type==Token::Type::symbol&&tok2.str=="("){ + vector args=parseArgumentList(tokeniser); + bool done=false; + if(tokeniser.eof())done=true; + else { + tok2=tokeniser.get(); + if(tok2.type!=Token::Type::symbol||tok2.str!="{")done=true; + } + if(done){ + Expression expr=Expression(Expression::Type::call,tok.str,args); + expr.site=tok.site; + return expr; + } + return Expression(Expression::Type::dive,tok.str,args, + Scope(Scope::Type::direct,parseScope(tokeniser),{})); + } else if(tok2.type==Token::Type::symbol&&tok2.str=="{"){ + return Expression(Expression::Type::dive,tok.str,{}, + Scope(Scope::Type::direct,parseScope(tokeniser),{})); + } else { + Expression expr=Expression(Expression::Type::call,tok.str,vector()); + expr.site=tok.site; + return expr; + } + } + + case Token::Type::number:{ + tokeniser.advance(); + Expression expr=Expression(Expression::Type::number,strtod(tok.str.data(),nullptr)); + expr.site=tok.site; + return expr; + } + + case Token::Type::string:{ + tokeniser.advance(); + Expression expr=Expression(Expression::Type::string,parseString(tok.str,tok.site)); + expr.site=tok.site; + return expr; + } + + case Token::Type::symbol:{ + if(tok.str=="("){ + tokeniser.advance(); + Expression expr=parseExpression(tokeniser); + if(tokeniser.eof())throw ParseError(tokeniser.site(),"Expected ')' but found EOF"); + Token tok2=tokeniser.get(); + if(tok2.type!=Token::Type::symbol||tok2.str!=")"){ + throw ParseError(tok2.site,"Expected ')' but found something else"); + } + tokeniser.advance(); + return expr; + } + Scope::Type sctype; + if(tok.str=="?")sctype=Scope::Type::lazy; + else if(tok.str=="??")sctype=Scope::Type::function; + else if(tok.str!="{"){ + throw ParseError(tok.site,"Unexpected token '"+tok.str+"' in expression atom position"); + } else sctype=Scope::Type::direct; + vector args; + if(sctype!=Scope::Type::direct){ + tokeniser.advance(); + if(tokeniser.eof()){ + throw ParseError(tokeniser.site(),"Expected scope after '"+tok.str+"' but found EOF"); + } + Token tok2=tokeniser.get(); + if(tok2.type!=Token::Type::symbol){ + throw ParseError(tok2.site,"Expected '(' or '{' after '"+tok.str+"'"); + } + if(tok2.type==Token::Type::symbol&&tok2.str=="("){ + args=parseArgumentList(tokeniser); + } + } + // DEBUG<<"args: "<second; + + if(op.prec