summaryrefslogtreecommitdiff
path: root/Compiler.hs
diff options
context:
space:
mode:
Diffstat (limited to 'Compiler.hs')
-rw-r--r--Compiler.hs286
1 files changed, 286 insertions, 0 deletions
diff --git a/Compiler.hs b/Compiler.hs
new file mode 100644
index 0000000..2e3b80b
--- /dev/null
+++ b/Compiler.hs
@@ -0,0 +1,286 @@
+{-# LANGUAGE DeriveFunctor, GeneralizedNewtypeDeriving #-}
+module Compiler(IRProgram, compileProgram) where
+
+import Control.Monad.Except
+import Control.Monad.State.Strict
+import Data.List
+import qualified Data.Map.Strict as Map
+import qualified Data.Set as Set
+import Debug.Trace
+
+import AST
+import Intermediate
+
+
+data TaggedValue
+ = TVList [TaggedValue]
+ | TVNum Int
+ | TVString String
+ | TVName Name (Maybe Int) -- Nothing: unknown, Just n: defined n lambdas up (0 = current lambda arg)
+ | TVQuoted Value
+ | TVDefine Name TaggedValue
+ | TVLambda [Name] TaggedValue [Name] -- (args) (body) (closure slot names)
+ | TVEllipsis
+ deriving Show
+
+-- also does some preprocessing, like parsing lambda's and defines
+analyseValue :: Value -> TaggedValue
+analyseValue = go []
+ where
+ go :: [Set.Set Name] -> Value -> TaggedValue
+ go scopes (VList [VName "define", VName name, VList args, body])
+ | Just names <- mapM fromVName args = go scopes (VList [VName "define", VName name, VLambda names body])
+ | otherwise = error "Invalid 'define' shorthand syntax: Invalid argument list"
+ go scopes (VList [VName "define", VName name, value]) = TVDefine name (go scopes value)
+ go scopes (VList [VName "lambda", VList args, body])
+ | Just names <- mapM fromVName args = go scopes (VLambda names body)
+ | otherwise = error "Invalid 'lambda' syntax: Invalid argument list"
+ go _ (VList (VName "lambda" : _)) = error "Invalid 'lambda' syntax"
+ go scopes (VList values) = TVList (map (go scopes) values)
+ go _ (VNum n) = TVNum n
+ go _ (VString s) = TVString s
+ go scopes (VName name) = TVName name (findIndex id (map (Set.member name) scopes))
+ go _ (VQuoted value) = TVQuoted value
+ go scopes (VLambda args body) =
+ let t = go (Set.fromList args : scopes) body
+ in TVLambda args t (Set.toList (collectEscapes 0 t))
+ go _ (VBuiltin _) = undefined
+ go _ VEllipsis = TVEllipsis
+
+ collectEscapes :: Int -> TaggedValue -> Set.Set Name
+ collectEscapes limit (TVList values) = Set.unions (map (collectEscapes limit) values)
+ collectEscapes limit (TVName name (Just n)) | n > limit = Set.singleton name
+ collectEscapes limit (TVLambda _ body _) = collectEscapes (limit + 1) body
+ collectEscapes _ _ = Set.empty
+
+
+data CompState = CompState
+ { csNextId :: Int
+ , csBlocks :: Map.Map Int BB
+ , csCurrent :: Int
+ , csScopes :: [Map.Map Name ScopeItem]
+ , csDefines :: Map.Map Name Ref
+ , csBuiltins :: Map.Map Name ()
+ , csFunctions :: Map.Map Name GlobFuncDef
+ , csDatas :: [Value] }
+ deriving Show
+
+data ScopeItem = SIParam Int | SIClosure Int | SIGlobal
+ deriving Show
+
+newtype CM a = CM {unCM :: StateT CompState (Except String) a}
+ deriving (Functor, Applicative, Monad, MonadState CompState, MonadError String)
+
+-- TODO: extra info like number of arguments, dunno, might be useful
+builtinMap :: Map.Map Name ()
+builtinMap = Map.fromList [
+ ("+", ()), ("-", ()), ("<=", ()), ("print", ()),
+ ("list", ()), ("car", ()), ("cdr", ())]
+
+bbId :: BB -> Int
+bbId (BB i _ _) = i
+
+initState :: CompState
+initState = CompState 0 Map.empty undefined [] Map.empty builtinMap Map.empty []
+
+runCM :: CM a -> Either String a
+runCM act = runExcept $ evalStateT (unCM act) initState
+
+genId :: CM Int
+genId = state $ \s -> (csNextId s, s {csNextId = csNextId s + 1})
+
+genTemp :: CM Ref
+genTemp = liftM RTemp genId
+
+newBlock :: CM Int
+newBlock = do
+ i <- genId
+ modify $ \s -> s {csBlocks = Map.insert i (BB i [] IUnknown) (csBlocks s)}
+ return i
+
+switchBlock :: Int -> CM ()
+switchBlock i = modify $ \s -> s {csCurrent = i}
+
+newBlockSwitch :: CM Int
+newBlockSwitch = do
+ i <- newBlock
+ switchBlock i
+ return i
+
+rememberBlock :: CM a -> CM a
+rememberBlock act = do
+ b <- gets csCurrent
+ res <- act
+ switchBlock b
+ return res
+
+modifyBlock :: (BB -> BB) -> CM ()
+modifyBlock f = do
+ st <- get
+ let current = csCurrent st
+ Just bb = Map.lookup current (csBlocks st)
+ put $ st {csBlocks = Map.insert current (f bb) (csBlocks st)}
+
+addIns :: Instruction -> CM ()
+addIns ins = modifyBlock $ \(BB i inss term) -> BB i (inss ++ [ins]) term
+
+setTerm :: Terminator -> CM ()
+setTerm term = modifyBlock $ \(BB i inss _) -> BB i inss term
+
+lookupVar :: Name -> CM (Either ScopeItem Ref)
+lookupVar name = gets csScopes >>= \scopes -> case msum (map (Map.lookup name) scopes) of
+ Just si -> return (Left si)
+ Nothing -> gets csDefines >>= \defines -> case Map.lookup name defines of
+ Just ref -> return (Right ref)
+ Nothing -> return (Left SIGlobal)
+
+dataTableAdd :: Value -> CM Int
+dataTableAdd v = state $ \ctx -> (length (csDatas ctx), ctx {csDatas = csDatas ctx ++ [v]})
+
+functionAdd :: Name -> GlobFuncDef -> CM ()
+functionAdd name gfd = modify $ \s -> s {csFunctions = Map.insert name gfd (csFunctions s)}
+
+defineAdd :: Name -> Ref -> CM ()
+defineAdd name ref = modify $ \s -> s {csDefines = Map.insert name ref (csDefines s)}
+
+withScope :: Map.Map Name ScopeItem -> CM a -> CM a
+withScope sc act = do
+ modify $ \s -> s {csScopes = sc : csScopes s}
+ res <- act
+ modify $ \s -> s {csScopes = tail (csScopes s)}
+ return res
+
+
+compileProgram :: Program -> Either String IRProgram
+compileProgram (Program values) = runCM $ do
+ bstart <- newBlockSwitch
+ forM_ values $ \value -> do
+ bnext <- newBlock
+ ref <- genTValue (analyseValue value) bnext
+ switchBlock bnext
+ addIns (RNone, IDiscard ref)
+ setTerm IExit
+ ([firstbb], otherbbs) <- liftM (partition ((== bstart) . bbId) . Map.elems) (gets csBlocks)
+ funcs <- gets csFunctions
+ datas <- gets csDatas
+ return (IRProgram (firstbb : otherbbs) funcs datas)
+
+genTValue :: TaggedValue -> Int -> CM Ref
+genTValue (TVList []) _ = throwError "Empty call"
+genTValue (TVList (TVName "do" _ : stmts)) nextnext = do
+ forM_ (init stmts) $ \stmt -> do
+ b <- newBlock
+ r <- genTValue stmt b
+ switchBlock b
+ addIns (RNone, IDiscard r)
+ genTValue (last stmts) nextnext
+genTValue (TVList [TVName "if" _, cond, val1, val2]) nextnext = do
+ b1 <- newBlock
+ bthen <- newBlock
+ belse <- newBlock
+ bthen' <- newBlock
+ belse' <- newBlock
+
+ condref <- genTValue cond b1
+ switchBlock b1
+ setTerm $ IBr condref bthen belse
+ resref <- genTemp
+
+ switchBlock bthen
+ thenref <- genTValue val1 bthen'
+ switchBlock bthen'
+ addIns (resref, IAssign thenref)
+ setTerm $ IJmp nextnext
+
+ switchBlock belse
+ elseref <- genTValue val2 belse'
+ switchBlock belse'
+ addIns (resref, IAssign elseref)
+ setTerm $ IJmp nextnext
+
+ return resref
+genTValue (TVList (TVName "if" _ : _)) _ = throwError "Invalid 'if' syntax"
+genTValue (TVList (funcexpr : args)) nextnext = do
+ refs <- forM args $ \value -> do
+ bnext <- newBlock
+ ref <- genTValue value bnext
+ switchBlock bnext
+ return ref
+ b <- newBlock
+ funcref <- genTValue funcexpr b
+ switchBlock b
+ resref <- genTemp
+ addIns (resref, ICallC funcref refs)
+ setTerm $ IJmp nextnext
+ return resref
+genTValue (TVNum n) nextnext = do
+ setTerm $ IJmp nextnext
+ return (RConst n)
+genTValue (TVString s) nextnext = do
+ i <- dataTableAdd (VString s)
+ r <- genTemp
+ addIns (r, IData i)
+ setTerm $ IJmp nextnext
+ return r
+genTValue (TVQuoted v) nextnext = do
+ i <- dataTableAdd v
+ r <- genTemp
+ addIns (r, IData i)
+ setTerm $ IJmp nextnext
+ return r
+genTValue (TVDefine name value) nextnext = do
+ dref <- genTemp
+ defineAdd name dref
+ vref <- genTValue value nextnext
+ -- traceM $ "Defining '" ++ name ++ "', ref " ++ show dref ++ ", with value " ++ show vref
+ addIns (dref, IAssign vref)
+ return RNone
+genTValue (TVLambda args body closure) nextnext = do
+ startb <- rememberBlock $
+ withScope (Map.fromList (zip args (map SIParam [0..]) ++ zip closure (map SIClosure [0..]))) $ do
+ b <- newBlockSwitch
+ b2 <- newBlock
+ ref <- genTValue body b2
+ switchBlock b2
+ setTerm $ IRet ref
+ return b
+ uid <- genId
+ let uname = show uid ++ "L" -- starts with digit, so cannot clash with user-defined name
+ functionAdd uname (GlobFuncDef startb (length args) closure)
+ resref <- case closure of
+ [] -> return (RSClo uname)
+ _ -> do
+ refs <- forM closure $ \cname -> do
+ b <- newBlock
+ r <- genTValue (TVName cname undefined) b
+ switchBlock b
+ return r
+ r <- genTemp
+ addIns (r, IAllocClo uname refs)
+ return r
+ setTerm $ IJmp nextnext
+ return resref
+genTValue (TVName name _) nextnext = do
+ r <- genTemp
+ lookupVar name >>= \si -> case si of
+ Right ref -> addIns (r, IAssign ref)
+ Left (SIParam n) -> addIns (r, IParam n)
+ Left (SIClosure n) -> addIns (r, IClosure n)
+ Left SIGlobal -> do
+ funcs <- gets csFunctions
+ builtins <- gets csBuiltins
+ case (Map.lookup name funcs, Map.lookup name builtins) of
+ (Just (GlobFuncDef _ _ []), _) -> addIns (r, IAssign (RSClo name))
+ (Just (GlobFuncDef _ _ cs), _) -> do
+ refs <- foldM (\refs' cname -> do
+ b <- newBlock
+ r' <- genTValue (TVName cname undefined) b
+ switchBlock b
+ return (r' : refs'))
+ [] cs
+ addIns (r, IAllocClo name refs)
+ (_, Just ()) -> addIns (r, IAssign (RSClo name))
+ _ -> throwError $ "Use of undefined name \"" ++ name ++ "\""
+ setTerm $ IJmp nextnext
+ return r
+genTValue TVEllipsis _ = throwError "Ellipses not supported in compiler"