diff options
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | Makefile | 20 | ||||
-rw-r--r-- | ast.cpp | 0 | ||||
-rw-r--r-- | ast.h | 90 | ||||
-rw-r--r-- | global.h | 8 | ||||
-rw-r--r-- | list.squig | 136 | ||||
-rw-r--r-- | main.cpp | 11 | ||||
-rw-r--r-- | parser.cpp | 211 | ||||
-rw-r--r-- | parser.h | 14 |
9 files changed, 492 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..19e39f8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.o +squig diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..4e9ed24 --- /dev/null +++ b/Makefile @@ -0,0 +1,20 @@ +CXX = g++ +CXXFLAGS = -Wall -Wextra -std=c++11 -g -fwrapv +TARGET = squig + +.PHONY: all clean remake + +all: $(TARGET) + +clean: + rm -f $(TARGET) *.o + +remake: clean + $(MAKE) all + + +$(TARGET): $(patsubst %.cpp,%.o,$(wildcard *.cpp)) + $(CXX) -o $@ $< + +%.o: %.cpp $(wildcard *.h) + $(CXX) $(CXXFLAGS) -c -o $@ $< @@ -0,0 +1,90 @@ +#pragma once + +#include <string> +#include <vector> +#include "global.h" + +using namespace std; + + +class Statement; +using Name = string; +using StatementList = vector<Statement>; + +class Site{ +public: + string filename; + i64 lnum,linex; + + Site(); + Site(const string &filename,i64 lnum,i64 linex); +}; + +class Scope{ +public: + enum class Type{ + direct, + lazy, + function, + async, + }; + + Type type; + StatementList body; + vector<Name> args; + + Site site; + + Scope(); + Scope(Type type,const StatementList &body,const vector<Name> &args); +}; + +class Expression{ +public: + enum class Type{ + binop, // name, args[0], args[1] + unop, // name, args[0] + call, // name, args + dive, // name, args, body + number, // numval + string, // strval + scope, // scope + }; + + Type type; + Name name; + vector<Expression> args; + double numval; + string strval; + Scope scope; + StatementList body; + + Site site; + + Expression(); + Expression(Type type,const Name &name,const vector<Expression> &args); // binop, call + Expression(Type type,const Name &name,const Expression &arg); // unop + Expression(Type type,const Name &name,const vector<Expression> &args,const StatementList &body); // dive + Expression(Type type,double numval); // number + Expression(Type type,const string &strval); // string + Expression(Type type,const Scope &scope); // scope +}; + +class Statement{ +public: + enum class Type{ + create, // dstvar, expr + assign, // dstvar, expr + expression, // expr + }; + + Type type; + Name dstvar; + Expression expr; + + Site site; + + Statement(); + Statement(Type type,const Name &dstvar,const Expression &expr); // create, assign + Statement(Type type,const Expression &expr); // expr +}; diff --git a/global.h b/global.h new file mode 100644 index 0000000..4abd67a --- /dev/null +++ b/global.h @@ -0,0 +1,8 @@ +#pragma once + +#include <cstdint> + +using namespace std; + +using i64 = int64_t; +using u64 = uint64_t; diff --git a/list.squig b/list.squig new file mode 100644 index 0000000..ea0d333 --- /dev/null +++ b/list.squig @@ -0,0 +1,136 @@ +new_list := ??{ + front := nil + back := nil +} + +list_push_front := ??(list, item){ + list { + if front == nil { + front = { + value := item + next := nil + prev := nil + } + back = front + } else { + front = { + value := item + next := front + prev := nil + } + } + } +} + +list_push_back := ??(list, item){ + list { + if back == nil { + front = { + value := item + next := nil + prev := nil + } + back = front + } else { + back = { + value := item + next := nil + prev := back + } + } + } +} + +list_pop_front := ??(list){ + x := nil + list { + if front == nil { + throw_error("Call to 'list_pop_front' on empty list") + } + front { + x = value + front = next + } + if front == nil { + back = nil + } + } +} + +list_pop_back := ??(list){ + x := nil + list { + if back == nil { + throw_error("Call to 'list_pop_back' on empty list") + } + back { + x = value + back = prev + } + if back == nil { + front = nil + } + } +} + +list_get := nil +{ + get_helper := ??(front, idx){ + if front == nil { + throw_error("Index past end of list in 'list_get'") + } + x := nil + if idx == 0 { + front { + x = value + } + } else { + front { + found := nil + get_helper(next, idx - 1){ + found = x + } + x = found + } + } + } + list_get = ??(list, idx){ + if idx < 0 { + throw_error("Negative index in 'list_get'") + } + x := nil + list { + found := nil + get_helper(front, idx){ + found = x + } + x = found + } + } +} + +list_set := nil +{ + set_helper := ??(front, idx, val){ + if front == nil { + throw_error("Index past end of list in 'list_set'") + } + if idx == 0 { + front { + value = val + } + } else { + front { + set_helper(next, idx - 1, val){} + } + } + } + list_set = ??(list, idx, val){ + if idx < 0 { + throw_error("Negative index in 'list_set'") + } + list { + set_helper(front, idx, val) + } + } +} diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..b28e446 --- /dev/null +++ b/main.cpp @@ -0,0 +1,11 @@ +#include <iostream> +#include "parser.h" + +using namespace std; + + +int main(int argc,char **argv){ + (void)argc; + (void)argv; + return 0; +} diff --git a/parser.cpp b/parser.cpp new file mode 100644 index 0000000..73ae368 --- /dev/null +++ b/parser.cpp @@ -0,0 +1,211 @@ +#include <stdexcept> +#include <cstring> +#include <cctype> +#include <cassert> +#include "parser.h" + +using namespace std; + + +ParseError::ParseError(const string &what_arg) + :runtime_error(what_arg){} +ParseError::ParseError(const char *what_arg) + :runtime_error(what_arg){} + + +static bool isinitwordchar(char c){ + return isalpha(c)||c=='_'; +} + +static bool iswordchar(char c){ + return isalpha(c)||isdigit(c)||c=='_'; +} + +static const vector<string> tok_symbols={ + "==", "!=", ">", "<", ">=", "<=", + ":=", "=", + "+", "-", "*", "/", "%", + "(", ")", ",", + "{", "}", "?{", "??{", "}&", +}; + +static bool isSymbolPrefix(const string &s){ + for(const string &sym : tok_symbols){ + if(s.size()<=sym.size()&&sym.substr(0,s.size())==s)return true; + } + return false; +} + +template <typename T> +static bool contains(const vector<T> &v,const T &target){ + for(const T &t : v){ + if(t==target)return true; + } + return false; +} + + +class Token{ +public: + enum class Type{ + word, + number, + string, + symbol, + }; + + Type type; + string str; + Site site; + + Token(Type type,const string &str,const Site &site) + :type(type),str(str),site(site){} +}; + + +class Tokeniser{ + const string &source; + const string &filename; + i64 idx,nextidx; + i64 lnum,linex; + Token::Type ttype; + + bool eof(i64 at){ + return at>=(i64)source.size(); + } + + string get_() const { + if(eof())throw runtime_error("Tokeniser::get() on eof"); + if(nextidx==-1)throw runtime_error("Tokeniser::get() before advance"); + if(nextidx==-2)throw runtime_error("Tokeniser::get() after eof"); + assert(nextidx>=0); + return source.substr(idx,nextidx-idx); + } + +public: + Tokeniser(const string &source,const string &filename) + :source(source),filename(filename), + idx(0),nextidx(-1), + lnum(1),linex(1){} + + bool eof() const { + return idx>=(i64)source.size(); + } + + Token get() const { + return Token(ttype,get_(),Site(filename,lnum,linex)); + } + + // Returns whether there are more tokens + bool advance(){ + if(eof())return false; + + while(idx<nextidx){ + if(source[idx]=='\n'){ + lnum++; + linex=1; + } else { + linex++; + } + idx++; + } + + while(true){ + i64 origidx=idx; + while(!eof()&&isspace(source[idx])){ + if(source[idx]=='\n'){ + lnum++; + linex=1; + } else { + linex++; + } + idx++; + } + if(eof())return false; + + if(source[idx]=='#'){ + while(!eof()&&source[idx]!='\n')idx++; + idx++; + lnum++; + linex=1; + if(eof())return false; + } + + if(idx==origidx)break; + } + + nextidx=idx; + + // Word + if(isinitwordchar(source[nextidx])){ + ttype=Token::Type::word; + do nextidx++; + while(!eof(nextidx)&&iswordchar(source[nextidx])); + return true; + } + + // Number literal + if(isdigit(source[nextidx])||(!eof(nextidx+1)&&source[nextidx]=='-'&&isdigit(source[nextidx+1]))){ + ttype=Token::Type::number; + if(source[nextidx]=='-')nextidx++; + while(!eof(nextidx)&&isdigit(source[nextidx]))nextidx++; + if(eof(nextidx))return true; + if(source[nextidx]=='.'){ + nextidx++; + if(eof(nextidx)||!isdigit(source[nextidx])){ + throw ParseError("Incomplete floating point literal at EOF"); + } + while(!eof(nextidx)&&isdigit(source[nextidx]))nextidx++; + if(eof(nextidx))return true; + } + if(strchr("eE",source[nextidx])!=NULL){ + nextidx++; + if(eof(nextidx)||strchr("+-0123456789",source[nextidx])==NULL){ + throw ParseError("Incomplete floating point literal at EOF"); + } + if(strchr("+-",source[nextidx])!=NULL){ + nextidx++; + if(eof(nextidx))throw ParseError("Incomplete floating point literal at EOF"); + } + while(!eof(nextidx)&&isdigit(source[nextidx]))nextidx++; + } + return true; + } + + // String literal + if(source[nextidx]=='"'){ + ttype=Token::Type::string; + nextidx++; + while(!eof(nextidx)&&source[nextidx]!='"'){ + if(source[nextidx]=='\\')nextidx++; + } + if(eof(nextidx))throw ParseError("Incomplete string literal at EOF"); + nextidx++; + return true; + } + + // Symbol + if(isSymbolPrefix({source[idx]})){ + ttype=Token::Type::symbol; + nextidx++; + while(!eof(nextidx)){ + if(!isSymbolPrefix(get_()))return true; + nextidx++; + } + if(contains(tok_symbols,get_()))return true; + else throw ParseError("Unknown symbol at EOF"); + } + + throw ParseError("Unknown token starting at '"+source.substr(idx,5)+"'"); + } +}; + +StatementList parse(const string &source,const string &filename){ + Tokeniser tokeniser(source,filename); + StatementList stl; + while(tokeniser.advance()){ + Token tok=tokeniser.get(); + + } + return stl; +} diff --git a/parser.h b/parser.h new file mode 100644 index 0000000..0c860d5 --- /dev/null +++ b/parser.h @@ -0,0 +1,14 @@ +#pragma once + +#include "ast.h" + +using namespace std; + + +class ParseError : public runtime_error{ +public: + explicit ParseError(const string &what_arg); + explicit ParseError(const char *what_arg); +}; + +StatementList parse(const string &source,const string &filename); |