From 897fb17dd6a045a7056e6d6babbbb24748f698f6 Mon Sep 17 00:00:00 2001 From: tomsmeding Date: Sat, 9 Dec 2017 10:48:58 +0100 Subject: Initial --- .gitignore | 2 + Makefile | 17 ++++ ast.hs | 46 +++++++++ closuretest.lisp | 1 + compiler.hs | 278 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ f.lisp | 7 ++ fibo.lisp | 156 +++++++++++++++++++++++++++++++ fiboY.lisp | 39 ++++++++ intermediate.hs | 95 +++++++++++++++++++ interpreter.hs | 234 ++++++++++++++++++++++++++++++++++++++++++++++ main.hs | 84 +++++++++++++++++ match.lisp | 15 +++ optimiser.hs | 24 +++++ parser.hs | 77 +++++++++++++++ stdlib.hs | 16 ++++ test.hs | 16 ++++ vm.hs | 99 ++++++++++++++++++++ 17 files changed, 1206 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 ast.hs create mode 100644 closuretest.lisp create mode 100644 compiler.hs create mode 100644 f.lisp create mode 100644 fibo.lisp create mode 100644 fiboY.lisp create mode 100644 intermediate.hs create mode 100644 interpreter.hs create mode 100644 main.hs create mode 100644 match.lisp create mode 100644 optimiser.hs create mode 100644 parser.hs create mode 100644 stdlib.hs create mode 100644 test.hs create mode 100644 vm.hs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a4ff550 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +obj/ +lisp diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f053d5f --- /dev/null +++ b/Makefile @@ -0,0 +1,17 @@ +GHC = ghc +GHCFLAGS = -O3 -Wall +TARGET = lisp + +OBJDIR = obj + +.PHONY: all clean + +all: $(TARGET) + +clean: + rm -f $(TARGET) + rm -rf obj + +$(TARGET): $(wildcard *.hs) + @mkdir -p $(OBJDIR) + $(GHC) -o $@ $(GHCFLAGS) $^ -hidir $(OBJDIR) -odir $(OBJDIR) diff --git a/ast.hs b/ast.hs new file mode 100644 index 0000000..eae5af8 --- /dev/null +++ b/ast.hs @@ -0,0 +1,46 @@ +module AST where + +import Data.List + + +data Program = Program [Value] + +type Name = String + +data Value + = VList [Value] + | VNum Int + | VString String + | VName Name + | VQuoted Value + | VLambda [Name] Value + | VBuiltin String + | VEllipsis + deriving (Eq) + + +instance Show Program where + show (Program l) = intercalate "\n" $ map show l + +instance Show Value where + show (VList es) = '(' : intercalate " " (map show es) ++ ")" + show (VNum i) = show i + show (VString s) = show s + show (VName n) = n + show (VQuoted e) = '\'' : show e + show (VLambda as v) = "(lambda (" ++ intercalate " " as ++ ") " ++ show v ++ ")" + show (VBuiltin str) = "[[builtin " ++ str ++ "]]" + show VEllipsis = "..." + + +fromVName :: Value -> Maybe Name +fromVName (VName s) = Just s +fromVName _ = Nothing + +fromVNum :: Value -> Maybe Int +fromVNum (VNum i) = Just i +fromVNum _ = Nothing + +fromVString :: Value -> Maybe String +fromVString (VString s) = Just s +fromVString _ = Nothing diff --git a/closuretest.lisp b/closuretest.lisp new file mode 100644 index 0000000..9827eb1 --- /dev/null +++ b/closuretest.lisp @@ -0,0 +1 @@ +(((lambda (x) (lambda (y) (+ x y))) 1) 2) diff --git a/compiler.hs b/compiler.hs new file mode 100644 index 0000000..d63f7bf --- /dev/null +++ b/compiler.hs @@ -0,0 +1,278 @@ +{-# LANGUAGE DeriveFunctor, GeneralizedNewtypeDeriving #-} +module Compiler(IRProgram, compileProgram) where + +import Control.Monad.Except +import Control.Monad.State.Strict +import Data.List +import Data.Maybe +import qualified Data.Map.Strict as Map +import qualified Data.Set as Set +import Debug.Trace + +import AST +import Intermediate + + +data TaggedValue + = TVList [TaggedValue] + | TVNum Int + | TVString String + | TVName Name (Maybe Int) -- Nothing: unknown, Just n: defined n lambdas up (0 = current lambda arg) + | TVQuoted Value + | TVDefine Name TaggedValue + | TVLambda [Name] TaggedValue [Name] -- (args) (body) (closure slot names) + | TVEllipsis + deriving Show + +-- also does some preprocessing, like parsing lambda's and defines +analyseValue :: Value -> TaggedValue +analyseValue = go [] + where + go :: [Set.Set Name] -> Value -> TaggedValue + go scopes (VList [VName "define", VName name, VList args, body]) + | Just names <- mapM fromVName args = go scopes (VList [VName "define", VName name, VLambda names body]) + | otherwise = error "Invalid 'define' shorthand syntax: Invalid argument list" + go scopes (VList [VName "define", VName name, value]) = TVDefine name (go scopes value) + go scopes (VList [VName "lambda", VList args, body]) + | Just names <- mapM fromVName args = go scopes (VLambda names body) + | otherwise = error "Invalid 'lambda' syntax: Invalid argument list" + go _ (VList (VName "lambda" : _)) = error "Invalid 'lambda' syntax" + go scopes (VList values) = TVList (map (go scopes) values) + go _ (VNum n) = TVNum n + go _ (VString s) = TVString s + go scopes (VName name) = TVName name (findIndex id (map (Set.member name) scopes)) + go _ (VQuoted value) = TVQuoted value + go scopes (VLambda args body) = + let t = go (Set.fromList args : scopes) body + in TVLambda args t (Set.toList (collectEscapes 0 t)) + go _ (VBuiltin _) = undefined + go _ VEllipsis = TVEllipsis + + collectEscapes :: Int -> TaggedValue -> Set.Set Name + collectEscapes limit (TVList values) = Set.unions (map (collectEscapes limit) values) + collectEscapes limit (TVName name (Just n)) | n > limit = Set.singleton name + collectEscapes limit (TVLambda _ body _) = collectEscapes (limit + 1) body + collectEscapes _ _ = Set.empty + + +data CompState = CompState + { csNextId :: Int + , csBlocks :: Map.Map Int BB + , csCurrent :: Int + , csScopes :: [Map.Map Name ScopeItem] + , csDefines :: Map.Map Name Ref + , csBuiltins :: Map.Map Name () + , csFunctions :: Map.Map Name GlobFuncDef + , csDatas :: [Value] } + deriving Show + +data ScopeItem = SIParam Int | SIClosure Int | SIGlobal + deriving Show + +newtype CM a = CM {unCM :: StateT CompState (Except String) a} + deriving (Functor, Applicative, Monad, MonadState CompState, MonadError String) + +builtinMap :: Map.Map Name () +builtinMap = Map.fromList [("+", ()), ("-", ()), ("<=", ()), ("print", ())] + +bbId :: BB -> Int +bbId (BB i _ _) = i + +initState :: CompState +initState = CompState 0 Map.empty undefined [] Map.empty builtinMap Map.empty [] + +runCM :: CM a -> Either String a +runCM act = runExcept $ evalStateT (unCM act) initState + +genId :: CM Int +genId = state $ \s -> (csNextId s, s {csNextId = csNextId s + 1}) + +genTemp :: CM Ref +genTemp = liftM RTemp genId + +newBlock :: CM Int +newBlock = do + i <- genId + modify $ \s -> s {csBlocks = Map.insert i (BB i [] IUnknown) (csBlocks s)} + return i + +switchBlock :: Int -> CM () +switchBlock i = modify $ \s -> s {csCurrent = i} + +newBlockSwitch :: CM Int +newBlockSwitch = do + i <- newBlock + switchBlock i + return i + +rememberBlock :: CM a -> CM a +rememberBlock act = do + b <- gets csCurrent + res <- act + switchBlock b + return res + +modifyBlock :: (BB -> BB) -> CM () +modifyBlock f = do + st <- get + let current = csCurrent st + Just bb = Map.lookup current (csBlocks st) + put $ st {csBlocks = Map.insert current (f bb) (csBlocks st)} + +addIns :: Instruction -> CM () +addIns ins = modifyBlock $ \(BB i inss term) -> BB i (inss ++ [ins]) term + +setTerm :: Terminator -> CM () +setTerm term = modifyBlock $ \(BB i inss _) -> BB i inss term + +lookupVar :: Name -> CM (Either ScopeItem Ref) +lookupVar name = gets csScopes >>= \scopes -> case msum (map (Map.lookup name) scopes) of + Just si -> return (Left si) + Nothing -> gets csDefines >>= \defines -> case Map.lookup name defines of + Just ref -> return (Right ref) + Nothing -> return (Left SIGlobal) + +dataTableAdd :: Value -> CM Int +dataTableAdd v = state $ \ctx -> (length (csDatas ctx), ctx {csDatas = csDatas ctx ++ [v]}) + +functionAdd :: Name -> GlobFuncDef -> CM () +functionAdd name gfd = modify $ \s -> s {csFunctions = Map.insert name gfd (csFunctions s)} + +defineAdd :: Name -> Ref -> CM () +defineAdd name ref = modify $ \s -> s {csDefines = Map.insert name ref (csDefines s)} + +withScope :: Map.Map Name ScopeItem -> CM a -> CM a +withScope sc act = do + modify $ \s -> s {csScopes = sc : csScopes s} + res <- act + modify $ \s -> s {csScopes = tail (csScopes s)} + return res + + +compileProgram :: Program -> Either String IRProgram +compileProgram (Program values) = runCM $ do + bstart <- newBlockSwitch + forM_ values $ \value -> do + bnext <- newBlock + ref <- genTValue (analyseValue value) bnext + switchBlock bnext + addIns (RNone, IDiscard ref) + setTerm IExit + ([firstbb], otherbbs) <- liftM (partition ((== bstart) . bbId) . Map.elems) (gets csBlocks) + funcs <- gets csFunctions + datas <- gets csDatas + return (IRProgram (firstbb : otherbbs) funcs datas) + +genTValue :: TaggedValue -> Int -> CM Ref +genTValue (TVList []) _ = throwError "Empty call" +genTValue (TVList (TVName "do" _ : stmts)) nextnext = do + forM_ (init stmts) $ \stmt -> do + b <- newBlock + r <- genTValue stmt b + switchBlock b + addIns (RNone, IDiscard r) + genTValue (last stmts) nextnext +genTValue (TVList [TVName "if" _, cond, val1, val2]) nextnext = do + b1 <- newBlock + bthen <- newBlock + belse <- newBlock + bthen' <- newBlock + belse' <- newBlock + + condref <- genTValue cond b1 + switchBlock b1 + setTerm $ IBr condref bthen belse + resref <- genTemp + + switchBlock bthen + thenref <- genTValue val1 bthen' + switchBlock bthen' + addIns (resref, IAssign thenref) + setTerm $ IJmp nextnext + + switchBlock belse + elseref <- genTValue val2 belse' + switchBlock belse' + addIns (resref, IAssign elseref) + setTerm $ IJmp nextnext + + return resref +genTValue (TVList (TVName "if" _ : _)) _ = throwError "Invalid 'if' syntax" +genTValue (TVList (funcexpr : args)) nextnext = do + refs <- forM args $ \value -> do + bnext <- newBlock + ref <- genTValue value bnext + switchBlock bnext + return ref + b <- newBlock + funcref <- genTValue funcexpr b + switchBlock b + resref <- genTemp + addIns (resref, ICallC funcref refs) + setTerm $ IJmp nextnext + return resref +genTValue (TVNum n) nextnext = do + setTerm $ IJmp nextnext + return (RConst n) +genTValue (TVString s) nextnext = do + i <- dataTableAdd (VString s) + r <- genTemp + addIns (r, IData i) + setTerm $ IJmp nextnext + return r +genTValue (TVDefine name value) nextnext = do + dref <- genTemp + defineAdd name dref + vref <- genTValue value nextnext + -- traceShowM ("tvdefine_refs", dref, vref, name, value) + addIns (dref, IAssign vref) + return RNone +genTValue (TVLambda args body closure) nextnext = do + startb <- rememberBlock $ + withScope (Map.fromList (zip args (map SIParam [0..]) ++ zip closure (map SIClosure [0..]))) $ do + b <- newBlockSwitch + b2 <- newBlock + ref <- genTValue body b2 + switchBlock b2 + setTerm $ IRet ref + return b + uid <- genId + let uname = show uid ++ "L" -- starts with digit, so cannot clash with user-defined name + functionAdd uname (GlobFuncDef startb (length args) closure) + resref <- case closure of + [] -> return (RSClo uname) + _ -> do + refs <- foldM (\refs' cname -> do + b <- newBlock + r <- genTValue (TVName cname undefined) b + switchBlock b + return (r : refs')) + [] closure + r <- genTemp + addIns (r, IAllocClo uname refs) + return r + setTerm $ IJmp nextnext + return resref +genTValue (TVName name _) nextnext = do + r <- genTemp + lookupVar name >>= \si -> case si of + Right ref -> addIns (r, IAssign ref) + Left (SIParam n) -> addIns (r, IParam n) + Left (SIClosure n) -> addIns (r, IClosure n) + Left SIGlobal -> do + funcs <- gets csFunctions + builtins <- gets csBuiltins + case (Map.lookup name funcs, Map.lookup name builtins) of + (Just (GlobFuncDef _ _ []), _) -> addIns (r, IAssign (RSClo name)) + (Just (GlobFuncDef _ _ cs), _) -> do + refs <- foldM (\refs' cname -> do + b <- newBlock + r' <- genTValue (TVName cname undefined) b + switchBlock b + return (r' : refs')) + [] cs + addIns (r, IAllocClo name refs) + (_, Just ()) -> addIns (r, IAssign (RSClo name)) + _ -> throwError $ "Use of undefined name \"" ++ name ++ "\"" + setTerm $ IJmp nextnext + return r diff --git a/f.lisp b/f.lisp new file mode 100644 index 0000000..5644138 --- /dev/null +++ b/f.lisp @@ -0,0 +1,7 @@ +(print 42) +(print "kaas") +(if 42 (print "ja") (print "nee")) +(define f (lambda (a) (print a))) +(f "iets") +(define f (lambda (a) (print a a))) +(f "iets") diff --git a/fibo.lisp b/fibo.lisp new file mode 100644 index 0000000..7ce4ea6 --- /dev/null +++ b/fibo.lisp @@ -0,0 +1,156 @@ +(define fibo1 (n) + (if (<= n 0) 0 + (if (<= n 2) 1 + (+ (fibo1 (- n 1)) (fibo1 (- n 2)))))) + +; == main lambda == +; Params: 1 +; B0: +; t0 <- param 0 +; t1 <- callf "<=" [t0 0] +; br t1 B1 B2 +; B1: +; t2 <- assign 0 +; jmp B5 +; B2: +; t3 <- param 0 +; t4 <- callf "<=" [t3 2] +; br t4 B3 B4 +; B3: +; t2 <- assign 1 +; jmp B5 +; B4: +; t5 <- param 0 +; t6 <- callf "-" [t5 1] +; t7 <- callf "fibo1" [t6] ; patched up (statically known address, afterwards) +; t8 <- param 0 +; t9 <- callf "-" [t8 2] +; t10 <- callf "fibo1" [t9] ; patched up +; t2 <- callf "+" [t7 t10] +; jmp B5 +; B5: +; return t2 + + +(define fibo2help (a b n) + (if (<= n 0) b + (fibo2help b (+ a b) (- n 1)))) + +(define fibo2 (n) + (if (<= n 0) 0 + (if (<= n 2) 1 + (fibo2help 1 1 (- n 2))))) + + +(define fibo3 (n) + ((lambda (helper) + (if (<= n 0) 0 + (if (<= n 2) 1 + (helper helper 1 1 (- n 2))))) + (lambda (recur a b n) + (if (<= n 0) b + (recur recur b (+ a b) (- n 1)))))) + +; == L1 == +; Params: 1 +; Closure slots: 1 +; B0: +; t0 <- closure 0 +; t1 <- callf "<=" [t0 0] +; br t1 B1 B2 +; B1: +; t2 <- assign 0 +; jmp B5 +; B2: +; t3 <- closure 0 +; t4 <- callf "<=" [t3 2] +; br t4 B3 B4 +; B3: +; t2 <- assign 1 +; jmp B5 +; B4: +; t5 <- param 0 +; t6 <- param 0 +; t7 <- closure 0 +; t8 <- callf "-" [t7 2] +; t2 <- callc t5 [t6 1 1 t8] +; jmp B5 +; B5: +; return t2 +; +; == L2 == +; Params: 4 +; B0: +; t0 <- param 3 +; t1 <- callf "<=" [t0 0] +; br t1 B1 B2 +; B1: +; t2 <- param 2 +; jmp B3 +; B2: +; t3 <- param 0 +; t4 <- param 0 +; t5 <- param 2 +; t6 <- param 1 +; t7 <- param 2 +; t8 <- callf "+" [t6 t7] +; t9 <- param 3 +; t10 <- callf "-" [t9 1] +; t2 <- callc t3 [t4 t5 t8 t10] +; jmp B3 +; B3: +; return t2 +; +; == main lambda == +; Params: 1 +; B0: +; t0 <- param 0 +; t1 <- alloc-closure L1 [t0] +; t2 <- callc t1 [C(L2)] +; free-closure t1 +; return t2 + + +(define for (start end f) + (if (<= start end) + (do (f start) (for (+ start 1) end f)) + 0)) ; TODO: '() + +; == main lambda == +; Params: 3 +; Data table: +; 0: () +; B0: +; t0 <- param 0 +; t1 <- param 1 +; t2 <- callf "<=" [t0 t1] +; br t2 B1 B2 +; B1: +; t3 <- param 2 +; t4 <- param 0 +; _ <- callc t3 [t4] +; t5 <- param 0 +; t6 <- callf "+" [t5 1] +; t7 <- param 1 +; t8 <- param 2 +; t9 <- callf "for" [t6 t7 t8] ; patched up +; jmp B3 +; B2: +; t9 <- data 0 +; jmp B3 +; B3: +; return t9 + +(for 1 25 (lambda (n) (print (fibo3 n)))) + +; == L1 == +; Params: 1 +; B0: +; t0 <- param 0 +; t1 <- callf "fibo3" [t0] +; t2 <- callf "print" [t1] +; return t2 +; +; == global code == +; B0: +; _ <- callf "for" [1 25 C(L1)] diff --git a/fiboY.lisp b/fiboY.lisp new file mode 100644 index 0000000..f46c00a --- /dev/null +++ b/fiboY.lisp @@ -0,0 +1,39 @@ +(define YY (recur) (lambda (f) (lambda (a) (f ((recur recur) f) a)))) +(define Y (YY YY)) + +(define forX (recur (low high func)) + (if (low <= high) + (do + (func low) + (recur (list (+ low 1) high func))) + '())) + +(define for (Y forX)) + +(define fibohelperX (recur (n a b)) + (if (n <= 0) a + (recur (list (- n 1) b (+ a b))))) + +(define fibohelper (Y fibohelperX)) + +(define fibo (n) (fibohelper (n 0 1))) + + + + + +; Fill in the dots with the lines below... +; If you're done, try to dump the whole expression into test.hs. + +; ((lambda (YY) ...) (lambda (recur) (lambda (f) (lambda (a) (f ((recur recur) f) a))))) +; +; ((lambda (Y) ...) (YY YY)) +; +; ((lambda (forX) ...) (lambda (recur args) ; args: (low high func) +; (if ((car args) <= (cadr args)) +; (do +; ((cadr (cdr args)) (car args)) +; (recur (list (+ low 1) high func))) +; '()))) + + diff --git a/intermediate.hs b/intermediate.hs new file mode 100644 index 0000000..c72e81c --- /dev/null +++ b/intermediate.hs @@ -0,0 +1,95 @@ +module Intermediate where + +import Data.List +import qualified Data.Map.Strict as Map + +import AST + + +data IRProgram = + IRProgram [BB] + (Map.Map Name GlobFuncDef) + [Value] -- data table + +data GlobFuncDef = + GlobFuncDef Int -- BB id of implementation + Int -- number of arguments + [Name] -- closure slots + +data BB = BB Int [Instruction] Terminator + +type Instruction = (Ref, InsCode) + +data Ref + = RConst Int + | RTemp Int + | RSClo Name -- static closure object of a function + | RNone + deriving Eq + +data InsCode + = IAssign Ref + | IParam Int + | IClosure Int + | IData Int + | ICallC Ref [Ref] + | IAllocClo Name [Ref] + | IDiscard Ref + deriving Eq + +data Terminator + = IBr Ref Int Int + | IJmp Int + | IRet Ref + | IExit + | IUnknown + deriving Eq + + +bidOf :: BB -> Int +bidOf (BB i _ _) = i + +termOf :: BB -> Terminator +termOf (BB _ _ t) = t + + +instance Show IRProgram where + show (IRProgram bbs gfds datas) = intercalate "\n" $ + ["IRPROGRAM", "Data Table:"] ++ map (("- " ++) . show) datas ++ + ["Global functions:"] ++ map (\(n, gfd) -> "- " ++ n ++ ": " ++ show gfd) (Map.assocs gfds) ++ + ["Blocks:"] ++ [intercalate "\n" (map show bbs)] + +instance Show GlobFuncDef where + show (GlobFuncDef bbid na []) = "BB " ++ show bbid ++ " (" ++ show na ++ ")" + show (GlobFuncDef bbid na cs) = + "BB " ++ show bbid ++ " (" ++ show na ++ ") (closure slots: " ++ intercalate ", " cs ++ ")" + +instance Show BB where + show (BB i inss term) = + "BB " ++ show i ++ + concatMap (\(r, ic) -> case r of + RNone -> "\n " ++ show ic + _ -> "\n " ++ show r ++ " <- " ++ show ic) inss ++ + "\n " ++ show term + +instance Show Ref where + show (RConst n) = show n + show (RTemp n) = "t" ++ show n + show (RSClo name) = "SC(\"" ++ name ++ "\")" + show RNone = "<>" + +instance Show InsCode where + show (IAssign r) = "assign " ++ show r + show (IParam n) = "param " ++ show n + show (IClosure n) = "closure " ++ show n + show (IData n) = "data " ++ show n + show (ICallC r as) = "callc " ++ show r ++ " " ++ show as + show (IAllocClo name vs) = "alloc-closure \"" ++ name ++ "\" " ++ show vs + show (IDiscard r) = "discard " ++ show r + +instance Show Terminator where + show (IBr r b1 b2) = "br " ++ show r ++ " " ++ show b1 ++ " " ++ show b2 + show (IJmp b) = "jmp " ++ show b + show (IRet r) = "ret " ++ show r + show IExit = "exit" + show IUnknown = "<>" diff --git a/interpreter.hs b/interpreter.hs new file mode 100644 index 0000000..4595035 --- /dev/null +++ b/interpreter.hs @@ -0,0 +1,234 @@ +{-# LANGUAGE TupleSections, DeriveFunctor, GeneralizedNewtypeDeriving #-} +module Interpreter(newContext, interpret, interpretProgram, Context) where + +import Control.Applicative +import Control.Monad +import Control.Monad.Except +import Control.Monad.State +import Data.List +import Data.Maybe +import qualified Data.Map.Strict as Map +-- import Debug.Trace + +import AST +import Parser + + +-- TODO: place bottom varmap separately for performance? (global define's in deeply nested contexts) +type VarMap = Map.Map Name Value +data Context = Context {varMapStk :: [VarMap]} + +newtype IM a = IM {unIM :: StateT Context (ExceptT String IO) a} + deriving (Functor, Applicative, Monad, MonadError String, MonadState Context, MonadIO) + +type Builtin = [Value] -> IM Value + +newContext :: Context +newContext = Context [Map.fromList [(k, VBuiltin k) | k <- Map.keys builtins]] + +lookupVar :: Name -> IM (Maybe Value) +lookupVar name = liftM (msum . map (Map.lookup name)) (gets varMapStk) + +withScopeMap :: VarMap -> IM a -> IM a +withScopeMap vm act = do + modify $ \ctx -> ctx {varMapStk = vm : varMapStk ctx} + x <- act + modify $ \ctx -> ctx {varMapStk = tail (varMapStk ctx)} + return x + +builtins :: Map.Map String Builtin +builtins = Map.fromList + [("read", readBuiltin), + ("eval", evalBuiltin), + ("print", printBuiltin), + ("getline", getlineBuiltin), + ("loop", loopBuiltin), + ("do", doBuiltin), + ("if", ifBuiltin), + ("define", defineBuiltin), + ("lambda", lambdaBuiltin), + ("match", matchBuiltin), + ("+", plusBuiltin), + ("-", arithBuiltin "-" (-) 0), ("*", arithBuiltin "*" (*) 1), + ("/", arithBuiltin "/" div 1), ("%", arithBuiltin "%" mod 1), + ("<", compareBuiltin "<" (<) (<)), ("<=", compareBuiltin "<=" (<=) (<=)), + (">", compareBuiltin ">" (>) (>)), (">=", compareBuiltin ">=" (>=) (>=)), + ("=", compareBuiltin "=" (==) (==)), ("!=", neqBuiltin)] + +nArguments :: String -> Int -> Bool -> Builtin -> Builtin +nArguments name n doeval f args + | length args /= n = + throwError $ "Function '" ++ name ++ "' expects " ++ naStr n ++ " but got " ++ naStr (length args) + | doeval = mapM evalValue args >>= f + | otherwise = f args + where + naStr 0 = "no arguments" + naStr 1 = "1 argument" + naStr num = show num ++ " arguments" + + +readBuiltin :: Builtin +readBuiltin = nArguments "read" 1 True go + where + go :: Builtin + go [VString s] = either (throwError . show) return $ parseExpression s + go _ = throwError "Can only 'read' a string" + +evalBuiltin :: Builtin +evalBuiltin = nArguments "eval" 1 True (evalValue . head) + +printBuiltin :: Builtin +printBuiltin args = do + args' <- mapM evalValue args + liftIO (putStrLn $ intercalate " " $ map printShow args') >> return (VList []) + where + printShow :: Value -> String + printShow (VString s) = s + printShow v = show v + +loopBuiltin :: Builtin +loopBuiltin = nArguments "loop" 1 False $ forever . evalValue . head + +getlineBuiltin :: Builtin +getlineBuiltin = nArguments "getline" 0 True $ const (liftIO getLine >>= return . VString) + +doBuiltin :: Builtin +doBuiltin [] = return (VList []) +doBuiltin args = fmap last $ mapM evalValue args + +ifBuiltin :: Builtin +ifBuiltin [cond, v1] = evalValue cond >>= \c -> if truthy c then evalValue v1 else return (VList []) +ifBuiltin [cond, v1, v2] = evalValue cond >>= \c -> if truthy c then evalValue v1 else evalValue v2 +ifBuiltin a = throwError $ "Cannot pass " ++ show (length a) ++ " arguments to 'if'" + +defineBuiltin :: Builtin +defineBuiltin [VName name, val] = do + val' <- evalValue val + stk <- gets varMapStk + let go [vm] _ = [Map.insert name val' vm] + go (vm : vms) (False : prs) = vm : go vms prs + go (vm : vms) (True : _) = Map.insert name val' vm : vms + go _ _ = undefined + modify $ \ctx -> ctx {varMapStk = go stk (map (isJust . Map.lookup name) stk)} + return (VList []) +defineBuiltin [name@(VName _), VList args, val] + | Just names <- mapM fromVName args = defineBuiltin [name, VLambda names val] + | otherwise = throwError "Invalid 'define' syntax: invalid argument list" +defineBuiltin _ = throwError "Invalid 'define' syntax" + +lambdaBuiltin :: Builtin +lambdaBuiltin = nArguments "lambda" 2 False go + where + go :: Builtin + go [VList args, body] + | Just names <- mapM fromVName args = return (VLambda names body) + | otherwise = throwError "Invalid 'lambda' syntax: invalid argument list" + go _ = throwError "Invalid 'lambda' syntax" + +matchBuiltin :: Builtin +matchBuiltin [] = throwError "Invalid 'match' syntax: empty match" +matchBuiltin [_] = throwError "Invalid 'match' syntax: no arms" +matchBuiltin (subject : arms) = do + subject' <- evalValue subject + go subject' arms + where + go :: Value -> [Value] -> IM Value + go _ [def] = evalValue def + go subject' (VList [pat, value] : rest) = + case match pat subject' Map.empty of + Nothing -> go subject' rest + Just mp -> withScopeMap mp (evalValue value) + go _ _ = throwError "Invalid 'match' syntax: invalid arm" + +plusBuiltin :: Builtin +plusBuiltin [] = return (VNum 0) +plusBuiltin args + | Just nums <- mapM fromVNum args = return (VNum (sum nums)) + | Just strs <- mapM maybeStrings args = return (VString (concat strs)) + | otherwise = throwError "Invalid argument types to operator '+'" + +arithBuiltin :: String -> (Int -> Int -> Int) -> Int -> Builtin +arithBuiltin name oper idelem args = do + args' <- mapM evalValue args + case mapM fromVNum args' of + Just [] -> return (VNum idelem) + Just (hd : tl) -> return (VNum (foldl oper hd tl)) + _ -> throwError $ "Invalid argument types to operator '" ++ name ++ "'" + +neqBuiltin :: Builtin +neqBuiltin = fmap (\(VNum x) -> VNum (1 - x)) . compareBuiltin "!=" (==) (==) + +compareBuiltin :: String -> (Int -> Int -> Bool) -> (String -> String -> Bool) -> Builtin +compareBuiltin name oper soper args = do + args' <- mapM evalValue args + res <- case () of + _ | Just nums <- mapM fromVNum args' -> return $ all (uncurry oper) (zip nums (tail nums)) + | Just strs <- mapM maybeStrings args' -> return $ all (uncurry soper) (zip strs (tail strs)) + | otherwise -> throwError $ "Invalid argument types to operator '" ++ name ++ "'" + return $ VNum $ fromIntegral $ fromEnum res + + +truthy :: Value -> Bool +truthy (VNum n) = n /= 0 +truthy _ = True + +match :: Value -> Value -> VarMap -> Maybe VarMap +match (VList []) (VList []) mp = Just mp +match (VList [VEllipsis]) (VList _) mp = Just mp +match (VList (pat : pats)) (VList (val : vals)) mp = match pat val mp >>= match (VList pats) (VList vals) +match (VName name) val mp = case Map.lookup name mp of + Nothing -> Just (Map.insert name val mp) + Just val' | val == val' -> Just mp + | otherwise -> Nothing +match (VQuoted a) (VQuoted b) mp = match a b mp +match (VLambda _ _) _ _ = Nothing +match a b mp | a == b = Just mp + | otherwise = Nothing + +maybeStrings :: Value -> Maybe String +maybeStrings = liftM2 (<|>) fromVString (fmap show . fromVNum) + + +evalValue :: Value -> IM Value +-- evalValue v | traceShow v False = undefined +evalValue (VList exs) = listCall exs +evalValue e@(VNum _) = return e +evalValue e@(VString _) = return e +evalValue (VName name) = lookupVar name >>= \mval -> case mval of + Just value -> return value + Nothing -> throwError $ "Use of undefined variable '" ++ name ++ "'" +evalValue (VQuoted e) = return e +evalValue e@(VLambda _ _) = return e +evalValue e@(VBuiltin _) = return e +evalValue VEllipsis = throwError "Unexpected ellipsis in code" + +listCall :: [Value] -> IM Value +listCall [] = throwError "Cannot call ()" +listCall (hd : args) = evalValue hd >>= \hd' -> case hd' of + VLambda names body + | length names == length args -> do + args' <- mapM evalValue args + withScopeMap (Map.fromList (zip names args')) (evalValue body) + | otherwise -> throwError $ "Invalid number of arguments in call to lambda " ++ + "(" ++ show (length args) ++ " found, " ++ show (length names) ++ " needed)" + VBuiltin name -> + case Map.lookup name builtins of + Just f -> f args + Nothing -> throwError $ "Unknown builtin '" ++ name ++ "'" + v -> throwError $ "Cannot call value: " ++ show v + + +interpret :: Context -> Value -> IO (Either String (Value, Context)) +interpret ctx val = + runExceptT $ flip runStateT ctx $ unIM $ evalValue val + +interpretProgram :: Context -> Program -> IO (Either String Context) +interpretProgram rootctx (Program l) = go l rootctx + where + go :: [Value] -> Context -> IO (Either String Context) + go [] ctx = return (Right ctx) + go (val : vals) ctx = do + e <- interpret ctx val + case e of + Left err -> return (Left err) + Right (_, ctx') -> go vals ctx' diff --git a/main.hs b/main.hs new file mode 100644 index 0000000..51e5815 --- /dev/null +++ b/main.hs @@ -0,0 +1,84 @@ +module Main where + +import Data.Char +import Data.List +import System.Console.Readline +import System.Environment +import System.Exit +import System.IO.Error + +import Compiler +import Interpreter +import Optimiser +import Parser +import Stdlib +import VM + + +usage :: IO () +usage = do + progname <- getProgName + putStrLn $ "Usage: " ++ progname ++ " [filename.lisp]" + +repl :: Context -> IO () +repl ctx = do + mline <- fmap (fmap strip) (readline "> ") + case mline of + Nothing -> putStrLn "" + Just "" -> repl ctx + Just (';' : _) -> repl ctx + Just line -> do + addHistory line + case parseExpression line of + Right val -> do + ires <- interpret ctx val + case ires of + Right (retval, ctx') -> do + putStrLn $ "\x1B[36m" ++ show retval ++ "\x1B[0m" + repl ctx' + Left err -> do + putStrLn $ "\x1B[31;1mError: " ++ err ++ "\x1B[0m" + repl ctx + Left err -> do + putStrLn $ "\x1B[31;1mParse error:\n" ++ show err ++ "\x1B[0m" + repl ctx + +runFile :: String -> Context -> IO () +runFile fname ctx = do + source <- readFile fname + case parseProgram source of + Right ast -> do + res <- interpretProgram ctx ast + case res of + Right _ -> return () + Left err -> die $ "Error: " ++ err + Left err -> die $ "Parse error:\n" ++ show err + +strip :: String -> String +strip = dropWhileEnd isSpace . dropWhile isSpace + +handleEOFError :: IO () -> IO () +handleEOFError op = catchIOError op (\e -> if isEOFError e then putStrLn "" else ioError e) + +-- main :: IO () +-- main = do +-- clargs <- getArgs +-- Right ctx <- interpretProgram newContext stdlib +-- case clargs of +-- [] -> handleEOFError (repl ctx) +-- [fname] -> runFile fname ctx +-- _ -> usage >> exitFailure + +main :: IO () +main = do + clargs <- getArgs + source <- case clargs of + [] -> getContents + [fname] -> readFile fname + _ -> usage >> exitFailure + + prog <- either (die . show) return (parseProgram source) + irprog <- either die return (compileProgram prog) + let opt = optimise irprog + -- print opt + vmRun opt diff --git a/match.lisp b/match.lisp new file mode 100644 index 0000000..84503e0 --- /dev/null +++ b/match.lisp @@ -0,0 +1,15 @@ +(define f (x) + (match x + ((1 2 3) "1-2-3") + ((1) "just 1") + ((1 ...) "1 something") + ((n ...) "number something") + ('v "something quoted") + "dunno")) + +(print (f '(1 2 3))) +(print (f '(1))) +(print (f '(1 2 3 4 5))) +(print (f '(2 3 4 5))) +(print (f ''"kaas")) +(print (f "kaas")) diff --git a/optimiser.hs b/optimiser.hs new file mode 100644 index 0000000..c4c60cb --- /dev/null +++ b/optimiser.hs @@ -0,0 +1,24 @@ +module Optimiser(optimise) where + +import Data.List + +import Intermediate + + +optimise :: IRProgram -> IRProgram +optimise (IRProgram bbs gfds datas) = IRProgram (mergeBlocks bbs) gfds datas + +mergeBlocks :: [BB] -> [BB] +mergeBlocks [] = [] +mergeBlocks allbbs@(BB startb _ _ : _) = + uncurry (++) (partition ((== startb) . bidOf) (go allbbs (length allbbs))) + where + go [] _ = [] + go bbs 0 = bbs + go (bb@(BB bid inss term) : bbs) n = case partition (hasJumpTo bid . termOf) bbs of + ([], _) -> go (bbs ++ [bb]) (n - 1) + ([BB bid' inss' _], rest) -> go (BB bid' (inss' ++ inss) term : rest) n + _ -> go (bbs ++ [bb]) (n - 1) + + hasJumpTo bid (IJmp a) = a == bid + hasJumpTo _ _ = False diff --git a/parser.hs b/parser.hs new file mode 100644 index 0000000..4f9e965 --- /dev/null +++ b/parser.hs @@ -0,0 +1,77 @@ +module Parser(parseProgram, parseExpression) where + +import Control.Monad +import Data.Char +import Text.Parsec + +import AST + + +type Parser = Parsec String () + + +parseProgram :: String -> Either ParseError Program +parseProgram = parse pProgram "" + +pProgram :: Parser Program +pProgram = between pWhiteComment eof (liftM Program (many pValue)) + + +parseExpression :: String -> Either ParseError Value +parseExpression = parse pExpression "" + +pExpression :: Parser Value +pExpression = between pWhiteComment eof pValue + + +pValue :: Parser Value +pValue = pVEllipsis <|> pVList <|> pVNum <|> pVString <|> pVName <|> pVQuoted "value" + +pVList :: Parser Value +pVList = flip label "list" $ do + symbol "(" + exs <- many pValue + symbol ")" + return $ VList exs + +pVNum :: Parser Value +pVNum = liftM (VNum . read) (many1 digit) <* pWhiteComment "number" + +pVString :: Parser Value +pVString = flip label "string" $ do + void $ char '"' + s <- manyTill anyChar (symbol "\"") + return $ VString s + +pVName :: Parser Value +pVName = flip label "name" $ do + first <- satisfy isFirstNameChar + rest <- many (satisfy isNameChar) + pWhiteComment + return $ VName $ first : rest + where + isNameChar c = ((c >= '*' && c <= 'z') || c `elem` "!#$%&|~") && c /= ';' + isFirstNameChar c = isNameChar c && not (isDigit c) + +pVQuoted :: Parser Value +pVQuoted = char '\'' >> liftM VQuoted pValue "quoted value" + +pVEllipsis :: Parser Value +pVEllipsis = symbol "..." >> return VEllipsis "ellipsis" + + +symbol :: String -> Parser () +symbol s = try (string s) >> pWhiteComment + +pWhiteComment :: Parser () +pWhiteComment = do + pWhitespace + void $ many $ pComment >> pWhitespace + where + pWhitespace :: Parser () + pWhitespace = void (many space) "whitespace" + + pComment :: Parser () + pComment = flip label "comment" $ do + void $ char ';' + void (manyTill anyChar (void endOfLine <|> eof)) diff --git a/stdlib.hs b/stdlib.hs new file mode 100644 index 0000000..6f7334f --- /dev/null +++ b/stdlib.hs @@ -0,0 +1,16 @@ +module Stdlib(stdlib) where + +import AST +import Parser + + +stdlib :: Program +stdlib = fromRight $ parseProgram + "(define not (x) (= x 0))\n\ + \(define id (x) x)\n\ + \(define nil (x) (= x '()))\n\ + \(define . (f g) (lambda (x) (f (g x))))\n" + +fromRight :: Show a => Either a b -> b +fromRight (Right b) = b +fromRight (Left a) = error $ "fromRight on Left: " ++ show a diff --git a/test.hs b/test.hs new file mode 100644 index 0000000..a763524 --- /dev/null +++ b/test.hs @@ -0,0 +1,16 @@ +import System.Exit + +import Compiler +import Optimiser +import Parser +import VM + +main :: IO () +main = do + -- let Right p = parseProgram "(print (lambda (n) ((lambda (helper) (if (<= n 0) 0 (if (<= n 2) 1 (helper helper 1 1 (- n 2))))) (lambda (recur a b n) (if (<= n 0) b (recur recur b (+ a b) (- n 1)))))))" + -- let Right p = parseProgram "(print ((lambda (n) ((lambda (helper) (if (<= n 0) 0 (if (<= n 2) 1 (helper helper 1 1 (- n 2))))) (lambda (recur a b n) (if (<= n 0) b (recur recur b (+ a b) (- n 1)))))) 6))" + let Right p = parseProgram "(do (define f (lambda (n) (+ n 1))) (print (f 10)))" + prog <- either die return (compileProgram p) + let opt = optimise prog + print opt + vmRun opt diff --git a/vm.hs b/vm.hs new file mode 100644 index 0000000..10c084f --- /dev/null +++ b/vm.hs @@ -0,0 +1,99 @@ +module VM(vmRun) where + +import Control.Monad +import Data.List +import Data.Maybe +import qualified Data.Map.Strict as Map +import Data.Map.Strict ((!)) +import qualified System.IO.Error as IO +import Debug.Trace + +import AST +import Intermediate + + +data Info = Info (Map.Map Int BB) (Map.Map Name GlobFuncDef) + +type TempMap = Map.Map Int RunValue + +data State = State TempMap ([RunValue], [RunValue]) + +data RunValue = RClosure Name [RunValue] | RValue Value + deriving Show + +kErrorExit :: String +kErrorExit = "VM:exit" + +vmRun :: IRProgram -> IO () +vmRun (IRProgram bbs gfds []) = + let bbmap = Map.fromList [(bidOf bb, bb) | bb <- bbs] + info = Info bbmap gfds + state = State Map.empty ([], []) + in IO.catchIOError (void $ vmRunBB info state (head bbs)) vmErrorHandler +vmRun _ = undefined + +vmErrorHandler :: IOError -> IO () +vmErrorHandler e = + if IO.isUserError e && IO.ioeGetErrorString e == kErrorExit then return () else IO.ioError e + +vmRunBB :: Info -> State -> BB -> IO (RunValue, State) +vmRunBB info state (BB _ inss term) = do + state' <- foldM (vmRunInstr info) state inss + vmRunTerm info state' term + +vmRunInstr :: Info -> State -> Instruction -> IO State +-- vmRunInstr _ _ ins | traceShow ins False = undefined +vmRunInstr info@(Info bbmap gfds) state@(State tmap (args, closure)) (dest, instr) = case instr of + IAssign ref -> return (assignRef state dest (findRef tmap ref)) + IParam i -> return (assignRef state dest (args !! i)) + IClosure i -> return (assignRef state dest (closure !! i)) + IData _ -> undefined + ICallC cl as -> case findRef tmap cl of + RClosure clname clvals -> case Map.lookup clname gfds of + Just (GlobFuncDef b _ _) -> + let Just bb = Map.lookup b bbmap + in do + -- traceM ("call " ++ show cl ++ " with arguments " ++ show (map (findRef tmap) as)) + (rv, _) <- vmRunBB info (State tmap (map (findRef tmap) as, clvals)) bb + return (assignRef state dest rv) + Nothing -> liftM (assignRef state dest) (vmRunBuiltin clname (map (findRef tmap) as)) + _ -> error "VM: Cannot call non-closure object" + IAllocClo name clrefs -> return (assignRef state dest (RClosure name (map (findRef tmap) clrefs))) + IDiscard _ -> return state + +vmRunTerm :: Info -> State -> Terminator -> IO (RunValue, State) +vmRunTerm info@(Info bbmap gfds) state@(State tmap (args, closure)) term = case term of + IBr ref b1 b2 -> vmRunBB info state . (bbmap !) $ if truthy (findRef tmap ref) then b1 else b2 + IJmp b -> vmRunBB info state (bbmap ! b) + IRet ref -> return (findRef tmap ref, state) + IExit -> IO.ioError (IO.userError kErrorExit) + IUnknown -> undefined + +findRef :: TempMap -> Ref -> RunValue +findRef _ (RConst n) = RValue (VNum n) +findRef tmap (RTemp i) = fromJust (Map.lookup i tmap) +findRef _ (RSClo name) = RClosure name [] +findRef _ RNone = error "VM: None ref used" + +assignRef :: State -> Ref -> RunValue -> State +assignRef (State tmap pair) (RTemp i) rv = State (Map.insert i rv tmap) pair +assignRef _ ref _ = error $ "VM: Unexpected ref " ++ show ref ++ " assigned" + +vmRunBuiltin :: Name -> [RunValue] -> IO RunValue +-- vmRunBuiltin name args | trace (name ++ " " ++ show args) False = undefined +vmRunBuiltin "print" as = putStrLn (intercalate ", " (map printshow as)) >> return (RValue (VList [])) +vmRunBuiltin "<=" [RValue (VNum a), RValue (VNum b)] = return (RValue (VNum (fromEnum (a <= b)))) +vmRunBuiltin "+" [RValue (VNum a), RValue (VNum b)] = return (RValue (VNum (a + b))) +vmRunBuiltin "-" [RValue (VNum a), RValue (VNum b)] = return (RValue (VNum (a - b))) +vmRunBuiltin "car" [RValue (VList (a:_))] = return (RValue a) +vmRunBuiltin "cdr" [RValue (VList (_:a))] = return (RValue (VList a)) +vmRunBuiltin name args = error (name ++ " " ++ show args) + +printshow :: RunValue -> String +printshow (RValue (VString str)) = str +printshow (RValue value) = show value +printshow (RClosure _ _) = "[closure]" + +truthy :: RunValue -> Bool +truthy (RValue (VNum n)) = n /= 0 +truthy _ = True -- cgit v1.2.3-54-g00ecf