{-# LANGUAGE DeriveFunctor, GeneralizedNewtypeDeriving, LambdaCase #-} module Compiler(IRProgram, compileProgram) where import Control.Monad.Except import Control.Monad.State.Strict import Data.List import qualified Data.Map.Strict as Map import qualified Data.Set as Set -- import Debug.Trace import AST import Intermediate data TaggedValue = TVList [TaggedValue] | TVNum Int | TVString String | TVName Name (Maybe Int) -- Nothing: unknown (external/builtin), Just n: defined n lambdas up (0 = current lambda arg) | TVQuoted Value | TVDeclare Name | TVDefine Name TaggedValue | TVLambda (Maybe Name) [Name] TaggedValue [Name] -- (recname) (args) (body) (closure slot names) | TVLet (Name, TaggedValue) TaggedValue | TVEllipsis deriving Show analyseValue :: Value -> TaggedValue analyseValue = go (Map.empty, 0) where -- env: (name -> lambda depth at which name is defined, current lambda depth) go :: (Map.Map Name Int, Int) -> Value -> TaggedValue go env@(envd, depth) = \case VList values -> TVList (map (go env) values) VNum n -> TVNum n VString s -> TVString s VName name -> TVName name (Map.lookup name envd) VQuoted value -> TVQuoted value VDeclare name -> TVDeclare name VDefine name value -> TVDefine name (go env value) VLambda args body -> let depth' = depth + 1 envd' = foldr (flip Map.insert depth') envd args t = go (envd', depth') body in TVLambda Nothing args t (Set.toList (collectEscapes depth' t)) VLambdaRec recname args body -> let depth' = depth + 1 envd' = Map.insert recname depth' (foldr (flip Map.insert depth') envd args) t = go (envd', depth') body in TVLambda (Just recname) args t (Set.toList (collectEscapes depth' t)) VLet ((name, value) : args) body -> TVLet (name, go (envd, depth) value) (go (Map.insert name depth envd, depth) (VLet args body)) VLet [] body -> go env body VBuiltin _ -> undefined VEllipsis -> TVEllipsis collectEscapes :: Int -> TaggedValue -> Set.Set Name collectEscapes depth (TVList values) = Set.unions (map (collectEscapes depth) values) collectEscapes depth (TVName name (Just d)) = if d < depth then Set.singleton name else Set.empty collectEscapes depth (TVDefine _ value) = collectEscapes depth value collectEscapes depth (TVLambda _ _ body _) = collectEscapes depth body collectEscapes depth (TVLet (_, value) body) = collectEscapes depth value <> collectEscapes depth body collectEscapes _ (TVName _ Nothing) = Set.empty -- external/builtin names do not need to be captured collectEscapes _ (TVNum _) = Set.empty collectEscapes _ (TVString _) = Set.empty collectEscapes _ (TVDeclare _) = Set.empty collectEscapes _ (TVQuoted _) = Set.empty collectEscapes _ TVEllipsis = Set.empty data CompState = CompState { csNextId :: Int , csBlocks :: Map.Map Int BB , csCurrent :: Int , csScopes :: [Map.Map Name ScopeItem] , csDeclares :: Map.Map Name Ref , csDefines :: Map.Map Name Ref , csBuiltins :: Set.Set Name , csFunctions :: Map.Map Name GlobFuncDef , csDatas :: [Value] } deriving Show data ScopeItem = SIParam Int | SIClosure Int | SIGlobal | SIRef Ref deriving Show newtype CM a = CM {unCM :: StateT CompState (Except String) a} deriving (Functor, Applicative, Monad, MonadState CompState, MonadError String) -- TODO: extra info like number of arguments, dunno, might be useful builtinSet :: Set.Set Name builtinSet = Set.fromList [ "+", "-", "*", "/", "mod", "<=", "=", "print", "list", "cons", "car", "cdr", "null?", "sys-open-file", "sys-close-file", "sys-get-char", "sys-put-string", "sys-flush", "sys-stdin", "sys-stdout", "sys-stderr", "length", "substr", "ord", "chr", "concat", "type-list?", "type-number?", "type-string?", "type-quoted?", "type-symbol?", "error"] bbId :: BB -> Int bbId (BB i _ _) = i initState :: CompState initState = CompState 0 Map.empty undefined [] Map.empty Map.empty builtinSet Map.empty [] runCM :: CM a -> Either String a runCM act = runExcept $ evalStateT (unCM act) initState genId :: CM Int genId = state $ \s -> (csNextId s, s {csNextId = csNextId s + 1}) genTemp :: CM Ref genTemp = liftM RTemp genId newBlock :: CM Int newBlock = do i <- genId modify $ \s -> s {csBlocks = Map.insert i (BB i [] IUnknown) (csBlocks s)} return i switchBlock :: Int -> CM () switchBlock i = modify $ \s -> s {csCurrent = i} newBlockSwitch :: CM Int newBlockSwitch = do i <- newBlock switchBlock i return i rememberBlock :: CM a -> CM a rememberBlock act = do b <- gets csCurrent res <- act switchBlock b return res modifyBlock :: (BB -> BB) -> CM () modifyBlock f = do st <- get let current = csCurrent st Just bb = Map.lookup current (csBlocks st) put $ st {csBlocks = Map.insert current (f bb) (csBlocks st)} addIns :: Instruction -> CM () addIns ins = modifyBlock $ \(BB i inss term) -> BB i (inss ++ [ins]) term setTerm :: Terminator -> CM () setTerm term = modifyBlock $ \(BB i inss _) -> BB i inss term lookupVar :: Name -> CM (Either ScopeItem Ref) lookupVar name = gets csScopes >>= \scopes -> case msum (map (Map.lookup name) scopes) of Just si -> return (Left si) Nothing -> gets csDefines >>= \defines -> case Map.lookup name defines of Just ref -> return (Right ref) Nothing -> gets csDeclares >>= \declares -> case Map.lookup name declares of Just ref -> return (Right ref) Nothing -> return (Left SIGlobal) dataTableAdd :: Value -> CM Int dataTableAdd v = state $ \ctx -> (length (csDatas ctx), ctx {csDatas = csDatas ctx ++ [v]}) functionAdd :: Name -> GlobFuncDef -> CM () functionAdd name gfd = modify $ \s -> s {csFunctions = Map.insert name gfd (csFunctions s)} declareAdd :: Name -> Ref -> CM () declareAdd name ref = modify $ \s -> s {csDeclares = Map.insert name ref (csDeclares s)} declarePop :: Name -> CM (Maybe Ref) declarePop name = state $ \s -> case Map.lookup name (csDeclares s) of Nothing -> (Nothing, s) Just ref -> (Just ref, s { csDeclares = Map.delete name (csDeclares s) }) defineAdd :: Name -> Ref -> CM () defineAdd name ref = modify $ \s -> s {csDefines = Map.insert name ref (csDefines s)} withScope :: Map.Map Name ScopeItem -> CM a -> CM a withScope sc act = do modify $ \s -> s {csScopes = sc : csScopes s} res <- act modify $ \s -> s {csScopes = tail (csScopes s)} return res compileProgram :: Program -> Either String IRProgram compileProgram (Program values) = runCM $ do bstart <- newBlockSwitch forM_ values $ \value -> do bnext <- newBlock let tvalue = analyseValue value ref <- genTValue tvalue bnext switchBlock bnext addIns (RNone, IDiscard ref) setTerm IExit (bbs, otherbbs) <- liftM (partition ((== bstart) . bbId) . Map.elems) (gets csBlocks) let firstbb = case bbs of [bb] -> bb _ -> error "Multiple bb's with the same ID!" funcs <- gets csFunctions datas <- gets csDatas return (IRProgram (firstbb : otherbbs) funcs datas) genTValue :: TaggedValue -> Int -> CM Ref genTValue (TVList []) _ = throwError "Empty call" genTValue (TVList [TVName "do" _]) _ = throwError "Empty 'do'" genTValue (TVList (TVName "do" _ : stmts)) nextnext = do forM_ (init stmts) $ \stmt -> do b <- newBlock r <- genTValue stmt b switchBlock b addIns (RNone, IDiscard r) genTValue (last stmts) nextnext genTValue (TVList [TVName "if" _, cond, val1, val2]) nextnext = do b1 <- newBlock bthen <- newBlock belse <- newBlock bthen' <- newBlock belse' <- newBlock condref <- genTValue cond b1 switchBlock b1 setTerm $ IBr condref bthen belse resref <- genTemp switchBlock bthen thenref <- genTValue val1 bthen' switchBlock bthen' addIns (resref, IAssign thenref) setTerm $ IJmp nextnext switchBlock belse elseref <- genTValue val2 belse' switchBlock belse' addIns (resref, IAssign elseref) setTerm $ IJmp nextnext return resref genTValue (TVList (TVName "if" _ : _)) _ = throwError "Invalid 'if' syntax" genTValue (TVList [TVName "exit" _]) _ = do setTerm IExit return RNone genTValue (TVList (funcexpr : args)) nextnext = do refs <- forM args $ \value -> do bnext <- newBlock ref <- genTValue value bnext switchBlock bnext return ref b <- newBlock funcref <- genTValue funcexpr b switchBlock b resref <- genTemp addIns (resref, ICallC funcref (funcref : refs)) setTerm $ IJmp nextnext return resref genTValue (TVNum n) nextnext = do setTerm $ IJmp nextnext return (RConst n) genTValue (TVString s) nextnext = do i <- dataTableAdd (VString s) r <- genTemp addIns (r, IData i) setTerm $ IJmp nextnext return r genTValue (TVQuoted v) nextnext = do i <- dataTableAdd v r <- genTemp addIns (r, IData i) setTerm $ IJmp nextnext return r genTValue (TVDeclare name) nextnext = do dref <- genTemp declareAdd name dref setTerm $ IJmp nextnext return RNone genTValue (TVDefine name value) nextnext = do dref <- declarePop name >>= maybe genTemp return defineAdd name dref b <- newBlock vref <- genTValue value b switchBlock b addIns (dref, IAssign vref) setTerm $ IJmp nextnext return RNone genTValue (TVLambda mrecname args body closure) nextnext = do let bindpairs = maybe [] (\n -> [(n, SIParam 0)]) mrecname ++ zip args (map SIParam [1..]) ++ zip closure (map SIClosure [0..]) startb <- rememberBlock $ withScope (Map.fromList bindpairs) $ do b <- newBlockSwitch b2 <- newBlock ref <- genTValue body b2 switchBlock b2 setTerm $ IRet ref return b uid <- genId let uname = show uid ++ "L" -- starts with digit, so cannot clash with user-defined name functionAdd uname (GlobFuncDef startb (length args) closure) resref <- case closure of [] -> return (RSClo uname) _ -> do refs <- forM closure $ \cname -> do b <- newBlock r <- genTValue (TVName cname undefined) b switchBlock b return r r <- genTemp addIns (r, IAllocClo uname refs) return r setTerm $ IJmp nextnext return resref genTValue (TVLet (name, value) body) nextnext = do b <- newBlock r <- genTValue value b switchBlock b withScope (Map.singleton name (SIRef r)) (genTValue body nextnext) genTValue (TVName name _) nextnext = do r <- genTemp lookupVar name >>= \si -> case si of Right ref -> addIns (r, IAssign ref) Left (SIRef ref) -> addIns (r, IAssign ref) Left (SIParam n) -> addIns (r, IParam n) Left (SIClosure n) -> addIns (r, IClosure n) Left SIGlobal -> do funcs <- gets csFunctions builtins <- gets csBuiltins case (Map.lookup name funcs, name `Set.member` builtins) of (Just (GlobFuncDef _ _ []), _) -> addIns (r, IAssign (RSClo name)) (Just (GlobFuncDef _ _ cs), _) -> do refs <- foldM (\refs' cname -> do b <- newBlock r' <- genTValue (TVName cname undefined) b switchBlock b return (r' : refs')) [] cs addIns (r, IAllocClo name refs) (_, True) -> addIns (r, IAssign (RSClo name)) _ -> throwError $ "Use of undefined name \"" ++ name ++ "\"" setTerm $ IJmp nextnext return r genTValue TVEllipsis _ = throwError "Ellipses not supported in compiler"