{-# LANGUAGE DeriveFunctor, GeneralizedNewtypeDeriving #-} module Compiler(IRProgram, compileProgram) where import Control.Monad.Except import Control.Monad.State.Strict import Data.List import qualified Data.Map.Strict as Map import qualified Data.Set as Set import Debug.Trace import AST import Intermediate data TaggedValue = TVList [TaggedValue] | TVNum Int | TVString String | TVName Name (Maybe Int) -- Nothing: unknown (external/builtin), Just n: defined n lambdas up (0 = current lambda arg) | TVQuoted Value | TVDefine Name TaggedValue | TVLambda [Name] TaggedValue [Name] -- (args) (body) (closure slot names) | TVLet (Name, TaggedValue) TaggedValue | TVEllipsis deriving Show -- also does some preprocessing, like parsing lambda's and defines analyseValue :: Value -> TaggedValue analyseValue = go (Map.empty, 0) where -- env: (name -> lambda depth at which name is defined, current lambda depth) go :: (Map.Map Name Int, Int) -> Value -> TaggedValue go env (VList [VName "define", VName name, VList args, body]) | Just names <- mapM fromVName args = go env (VList [VName "define", VName name, VLambda names body]) | otherwise = error "Invalid 'define' shorthand syntax: Invalid argument list" go env (VList [VName "define", VName name, value]) = TVDefine name (go env value) go _ (VList (VName "define" : _)) = error "Invalid 'define' syntax" go (envd, depth) (VList [VName "lambda", VList args, body]) | Just names <- mapM fromVName args = go (envd, depth + 1) (VLambda names body) | otherwise = error "Invalid 'lambda' syntax: Invalid argument list" go _ (VList (VName "lambda" : _)) = error "Invalid 'lambda' syntax" -- go env (VList [VName "let", VList args, body]) = -- case sequence [if length p == 2 then Just (p !! 0, p !! 1) else Nothing | VList p <- args] of -- Just pairs -> -- go env (VList (VList [VName "lambda", VList (map fst pairs), body] : map snd pairs)) -- Nothing -> -- error "Invalid 'let' syntax: Invalid variable list (not all pairs)" go env (VList [VName "let", VList args, body]) = case sequence [case p of { [VName n, v] -> Just (n, v); _ -> Nothing } | VList p <- args] of Just pairs -> go env (VLet pairs body) Nothing -> error "Invalid 'let' syntax: Invalid variable list (not all pairs)" go _ (VList (VName "let" : _)) = error "Invalid 'let' syntax: Invalid argument list" go env (VList values) = TVList (map (go env) values) go _ (VNum n) = TVNum n go _ (VString s) = TVString s go (envd, _) (VName name) = TVName name (Map.lookup name envd) go _ (VQuoted value) = TVQuoted value go (envd, depth) (VLambda args body) = let t = go (foldr (flip Map.insert depth) envd args, depth) body in TVLambda args t (Set.toList (collectEscapes depth t)) go (envd, depth) (VLet ((name, value) : args) body) = TVLet (name, go (envd, depth) value) (go (Map.insert name depth envd, depth) (VLet args body)) go env (VLet [] body) = go env body go _ (VBuiltin _) = undefined go _ VEllipsis = TVEllipsis collectEscapes :: Int -> TaggedValue -> Set.Set Name collectEscapes depth (TVList values) = Set.unions (map (collectEscapes depth) values) collectEscapes depth (TVName name (Just d)) = if d < depth then Set.singleton name else Set.empty collectEscapes depth (TVDefine _ value) = collectEscapes depth value collectEscapes depth (TVLambda _ body _) = collectEscapes depth body collectEscapes depth (TVLet (_, value) body) = collectEscapes depth value <> collectEscapes depth body collectEscapes _ (TVName _ Nothing) = Set.empty -- external/builtin names do not need to be captured collectEscapes _ (TVNum _) = Set.empty collectEscapes _ (TVString _) = Set.empty collectEscapes _ (TVQuoted _) = Set.empty collectEscapes _ TVEllipsis = Set.empty data CompState = CompState { csNextId :: Int , csBlocks :: Map.Map Int BB , csCurrent :: Int , csScopes :: [Map.Map Name ScopeItem] , csDefines :: Map.Map Name Ref , csBuiltins :: Map.Map Name () , csFunctions :: Map.Map Name GlobFuncDef , csDatas :: [Value] } deriving Show data ScopeItem = SIParam Int | SIClosure Int | SIGlobal | SIRef Ref deriving Show newtype CM a = CM {unCM :: StateT CompState (Except String) a} deriving (Functor, Applicative, Monad, MonadState CompState, MonadError String) -- TODO: extra info like number of arguments, dunno, might be useful builtinMap :: Map.Map Name () builtinMap = Map.fromList [ ("+", ()), ("-", ()), ("<=", ()), ("=", ()), ("print", ()), ("list", ()), ("car", ()), ("cdr", ()), ("null?", ()), ("sys-open-file", ()), ("sys-close-file", ()), ("sys-get-char", ()), ("sys-put-string", ())] bbId :: BB -> Int bbId (BB i _ _) = i initState :: CompState initState = CompState 0 Map.empty undefined [] Map.empty builtinMap Map.empty [] runCM :: CM a -> Either String a runCM act = runExcept $ evalStateT (unCM act) initState genId :: CM Int genId = state $ \s -> (csNextId s, s {csNextId = csNextId s + 1}) genTemp :: CM Ref genTemp = liftM RTemp genId newBlock :: CM Int newBlock = do i <- genId modify $ \s -> s {csBlocks = Map.insert i (BB i [] IUnknown) (csBlocks s)} return i switchBlock :: Int -> CM () switchBlock i = modify $ \s -> s {csCurrent = i} newBlockSwitch :: CM Int newBlockSwitch = do i <- newBlock switchBlock i return i rememberBlock :: CM a -> CM a rememberBlock act = do b <- gets csCurrent res <- act switchBlock b return res modifyBlock :: (BB -> BB) -> CM () modifyBlock f = do st <- get let current = csCurrent st Just bb = Map.lookup current (csBlocks st) put $ st {csBlocks = Map.insert current (f bb) (csBlocks st)} addIns :: Instruction -> CM () addIns ins = modifyBlock $ \(BB i inss term) -> BB i (inss ++ [ins]) term setTerm :: Terminator -> CM () setTerm term = modifyBlock $ \(BB i inss _) -> BB i inss term lookupVar :: Name -> CM (Either ScopeItem Ref) lookupVar name = gets csScopes >>= \scopes -> case msum (map (Map.lookup name) scopes) of Just si -> return (Left si) Nothing -> gets csDefines >>= \defines -> case Map.lookup name defines of Just ref -> return (Right ref) Nothing -> return (Left SIGlobal) dataTableAdd :: Value -> CM Int dataTableAdd v = state $ \ctx -> (length (csDatas ctx), ctx {csDatas = csDatas ctx ++ [v]}) functionAdd :: Name -> GlobFuncDef -> CM () functionAdd name gfd = modify $ \s -> s {csFunctions = Map.insert name gfd (csFunctions s)} defineAdd :: Name -> Ref -> CM () defineAdd name ref = modify $ \s -> s {csDefines = Map.insert name ref (csDefines s)} withScope :: Map.Map Name ScopeItem -> CM a -> CM a withScope sc act = do modify $ \s -> s {csScopes = sc : csScopes s} res <- act modify $ \s -> s {csScopes = tail (csScopes s)} return res compileProgram :: Program -> Either String IRProgram compileProgram (Program values) = runCM $ do bstart <- newBlockSwitch forM_ values $ \value -> do bnext <- newBlock ref <- genTValue (analyseValue value) bnext switchBlock bnext addIns (RNone, IDiscard ref) setTerm IExit (bbs, otherbbs) <- liftM (partition ((== bstart) . bbId) . Map.elems) (gets csBlocks) let firstbb = case bbs of [bb] -> bb _ -> error "Multiple bb's with the same ID!" funcs <- gets csFunctions datas <- gets csDatas return (IRProgram (firstbb : otherbbs) funcs datas) genTValue :: TaggedValue -> Int -> CM Ref genTValue (TVList []) _ = throwError "Empty call" genTValue (TVList (TVName "do" _ : stmts)) nextnext = do forM_ (init stmts) $ \stmt -> do b <- newBlock r <- genTValue stmt b switchBlock b addIns (RNone, IDiscard r) genTValue (last stmts) nextnext genTValue (TVList [TVName "if" _, cond, val1, val2]) nextnext = do b1 <- newBlock bthen <- newBlock belse <- newBlock bthen' <- newBlock belse' <- newBlock condref <- genTValue cond b1 switchBlock b1 setTerm $ IBr condref bthen belse resref <- genTemp switchBlock bthen thenref <- genTValue val1 bthen' switchBlock bthen' addIns (resref, IAssign thenref) setTerm $ IJmp nextnext switchBlock belse elseref <- genTValue val2 belse' switchBlock belse' addIns (resref, IAssign elseref) setTerm $ IJmp nextnext return resref genTValue (TVList (TVName "if" _ : _)) _ = throwError "Invalid 'if' syntax" genTValue (TVList (funcexpr : args)) nextnext = do refs <- forM args $ \value -> do bnext <- newBlock ref <- genTValue value bnext switchBlock bnext return ref b <- newBlock funcref <- genTValue funcexpr b switchBlock b resref <- genTemp addIns (resref, ICallC funcref refs) setTerm $ IJmp nextnext return resref genTValue (TVNum n) nextnext = do setTerm $ IJmp nextnext return (RConst n) genTValue (TVString s) nextnext = do i <- dataTableAdd (VString s) r <- genTemp addIns (r, IData i) setTerm $ IJmp nextnext return r genTValue (TVQuoted v) nextnext = do i <- dataTableAdd v r <- genTemp addIns (r, IData i) setTerm $ IJmp nextnext return r genTValue (TVDefine name value) nextnext = do dref <- genTemp defineAdd name dref vref <- genTValue value nextnext -- traceM $ "Defining '" ++ name ++ "', ref " ++ show dref ++ ", with value " ++ show vref addIns (dref, IAssign vref) return RNone genTValue (TVLambda args body closure) nextnext = do startb <- rememberBlock $ withScope (Map.fromList (zip args (map SIParam [0..]) ++ zip closure (map SIClosure [0..]))) $ do b <- newBlockSwitch b2 <- newBlock ref <- genTValue body b2 switchBlock b2 setTerm $ IRet ref return b uid <- genId let uname = show uid ++ "L" -- starts with digit, so cannot clash with user-defined name functionAdd uname (GlobFuncDef startb (length args) closure) resref <- case closure of [] -> return (RSClo uname) _ -> do refs <- forM closure $ \cname -> do b <- newBlock r <- genTValue (TVName cname undefined) b switchBlock b return r r <- genTemp addIns (r, IAllocClo uname refs) return r setTerm $ IJmp nextnext return resref genTValue (TVLet (name, value) body) nextnext = do b <- newBlock r <- genTValue value b switchBlock b withScope (Map.singleton name (SIRef r)) (genTValue body nextnext) genTValue (TVName name _) nextnext = do r <- genTemp lookupVar name >>= \si -> case si of Right ref -> addIns (r, IAssign ref) Left (SIRef ref) -> addIns (r, IAssign ref) Left (SIParam n) -> addIns (r, IParam n) Left (SIClosure n) -> addIns (r, IClosure n) Left SIGlobal -> do funcs <- gets csFunctions builtins <- gets csBuiltins case (Map.lookup name funcs, Map.lookup name builtins) of (Just (GlobFuncDef _ _ []), _) -> addIns (r, IAssign (RSClo name)) (Just (GlobFuncDef _ _ cs), _) -> do refs <- foldM (\refs' cname -> do b <- newBlock r' <- genTValue (TVName cname undefined) b switchBlock b return (r' : refs')) [] cs addIns (r, IAllocClo name refs) (_, Just ()) -> addIns (r, IAssign (RSClo name)) _ -> throwError $ "Use of undefined name \"" ++ name ++ "\"" setTerm $ IJmp nextnext return r genTValue TVEllipsis _ = throwError "Ellipses not supported in compiler"