{-# LANGUAGE DeriveFunctor, GeneralizedNewtypeDeriving #-} module Compiler(IRProgram, compileProgram) where import Control.Monad.Except import Control.Monad.State.Strict import Data.List import qualified Data.Map.Strict as Map import qualified Data.Set as Set import Debug.Trace import AST import Intermediate data TaggedValue = TVList [TaggedValue] | TVNum Int | TVString String | TVName Name (Maybe Int) -- Nothing: unknown, Just n: defined n lambdas up (0 = current lambda arg) | TVQuoted Value | TVDefine Name TaggedValue | TVLambda [Name] TaggedValue [Name] -- (args) (body) (closure slot names) | TVEllipsis deriving Show -- also does some preprocessing, like parsing lambda's and defines analyseValue :: Value -> TaggedValue analyseValue = go [] where go :: [Set.Set Name] -> Value -> TaggedValue go scopes (VList [VName "define", VName name, VList args, body]) | Just names <- mapM fromVName args = go scopes (VList [VName "define", VName name, VLambda names body]) | otherwise = error "Invalid 'define' shorthand syntax: Invalid argument list" go scopes (VList [VName "define", VName name, value]) = TVDefine name (go scopes value) go scopes (VList [VName "lambda", VList args, body]) | Just names <- mapM fromVName args = go scopes (VLambda names body) | otherwise = error "Invalid 'lambda' syntax: Invalid argument list" go _ (VList (VName "lambda" : _)) = error "Invalid 'lambda' syntax" go scopes (VList values) = TVList (map (go scopes) values) go _ (VNum n) = TVNum n go _ (VString s) = TVString s go scopes (VName name) = TVName name (findIndex id (map (Set.member name) scopes)) go _ (VQuoted value) = TVQuoted value go scopes (VLambda args body) = let t = go (Set.fromList args : scopes) body in TVLambda args t (Set.toList (collectEscapes 0 t)) go _ (VBuiltin _) = undefined go _ VEllipsis = TVEllipsis collectEscapes :: Int -> TaggedValue -> Set.Set Name collectEscapes limit (TVList values) = Set.unions (map (collectEscapes limit) values) collectEscapes limit (TVName name (Just n)) | n > limit = Set.singleton name collectEscapes limit (TVLambda _ body _) = collectEscapes (limit + 1) body collectEscapes _ _ = Set.empty data CompState = CompState { csNextId :: Int , csBlocks :: Map.Map Int BB , csCurrent :: Int , csScopes :: [Map.Map Name ScopeItem] , csDefines :: Map.Map Name Ref , csBuiltins :: Map.Map Name () , csFunctions :: Map.Map Name GlobFuncDef , csDatas :: [Value] } deriving Show data ScopeItem = SIParam Int | SIClosure Int | SIGlobal deriving Show newtype CM a = CM {unCM :: StateT CompState (Except String) a} deriving (Functor, Applicative, Monad, MonadState CompState, MonadError String) -- TODO: extra info like number of arguments, dunno, might be useful builtinMap :: Map.Map Name () builtinMap = Map.fromList [ ("+", ()), ("-", ()), ("<=", ()), ("print", ()), ("list", ()), ("car", ()), ("cdr", ())] bbId :: BB -> Int bbId (BB i _ _) = i initState :: CompState initState = CompState 0 Map.empty undefined [] Map.empty builtinMap Map.empty [] runCM :: CM a -> Either String a runCM act = runExcept $ evalStateT (unCM act) initState genId :: CM Int genId = state $ \s -> (csNextId s, s {csNextId = csNextId s + 1}) genTemp :: CM Ref genTemp = liftM RTemp genId newBlock :: CM Int newBlock = do i <- genId modify $ \s -> s {csBlocks = Map.insert i (BB i [] IUnknown) (csBlocks s)} return i switchBlock :: Int -> CM () switchBlock i = modify $ \s -> s {csCurrent = i} newBlockSwitch :: CM Int newBlockSwitch = do i <- newBlock switchBlock i return i rememberBlock :: CM a -> CM a rememberBlock act = do b <- gets csCurrent res <- act switchBlock b return res modifyBlock :: (BB -> BB) -> CM () modifyBlock f = do st <- get let current = csCurrent st Just bb = Map.lookup current (csBlocks st) put $ st {csBlocks = Map.insert current (f bb) (csBlocks st)} addIns :: Instruction -> CM () addIns ins = modifyBlock $ \(BB i inss term) -> BB i (inss ++ [ins]) term setTerm :: Terminator -> CM () setTerm term = modifyBlock $ \(BB i inss _) -> BB i inss term lookupVar :: Name -> CM (Either ScopeItem Ref) lookupVar name = gets csScopes >>= \scopes -> case msum (map (Map.lookup name) scopes) of Just si -> return (Left si) Nothing -> gets csDefines >>= \defines -> case Map.lookup name defines of Just ref -> return (Right ref) Nothing -> return (Left SIGlobal) dataTableAdd :: Value -> CM Int dataTableAdd v = state $ \ctx -> (length (csDatas ctx), ctx {csDatas = csDatas ctx ++ [v]}) functionAdd :: Name -> GlobFuncDef -> CM () functionAdd name gfd = modify $ \s -> s {csFunctions = Map.insert name gfd (csFunctions s)} defineAdd :: Name -> Ref -> CM () defineAdd name ref = modify $ \s -> s {csDefines = Map.insert name ref (csDefines s)} withScope :: Map.Map Name ScopeItem -> CM a -> CM a withScope sc act = do modify $ \s -> s {csScopes = sc : csScopes s} res <- act modify $ \s -> s {csScopes = tail (csScopes s)} return res compileProgram :: Program -> Either String IRProgram compileProgram (Program values) = runCM $ do bstart <- newBlockSwitch forM_ values $ \value -> do bnext <- newBlock ref <- genTValue (analyseValue value) bnext switchBlock bnext addIns (RNone, IDiscard ref) setTerm IExit ([firstbb], otherbbs) <- liftM (partition ((== bstart) . bbId) . Map.elems) (gets csBlocks) funcs <- gets csFunctions datas <- gets csDatas return (IRProgram (firstbb : otherbbs) funcs datas) genTValue :: TaggedValue -> Int -> CM Ref genTValue (TVList []) _ = throwError "Empty call" genTValue (TVList (TVName "do" _ : stmts)) nextnext = do forM_ (init stmts) $ \stmt -> do b <- newBlock r <- genTValue stmt b switchBlock b addIns (RNone, IDiscard r) genTValue (last stmts) nextnext genTValue (TVList [TVName "if" _, cond, val1, val2]) nextnext = do b1 <- newBlock bthen <- newBlock belse <- newBlock bthen' <- newBlock belse' <- newBlock condref <- genTValue cond b1 switchBlock b1 setTerm $ IBr condref bthen belse resref <- genTemp switchBlock bthen thenref <- genTValue val1 bthen' switchBlock bthen' addIns (resref, IAssign thenref) setTerm $ IJmp nextnext switchBlock belse elseref <- genTValue val2 belse' switchBlock belse' addIns (resref, IAssign elseref) setTerm $ IJmp nextnext return resref genTValue (TVList (TVName "if" _ : _)) _ = throwError "Invalid 'if' syntax" genTValue (TVList (funcexpr : args)) nextnext = do refs <- forM args $ \value -> do bnext <- newBlock ref <- genTValue value bnext switchBlock bnext return ref b <- newBlock funcref <- genTValue funcexpr b switchBlock b resref <- genTemp addIns (resref, ICallC funcref refs) setTerm $ IJmp nextnext return resref genTValue (TVNum n) nextnext = do setTerm $ IJmp nextnext return (RConst n) genTValue (TVString s) nextnext = do i <- dataTableAdd (VString s) r <- genTemp addIns (r, IData i) setTerm $ IJmp nextnext return r genTValue (TVQuoted v) nextnext = do i <- dataTableAdd v r <- genTemp addIns (r, IData i) setTerm $ IJmp nextnext return r genTValue (TVDefine name value) nextnext = do dref <- genTemp defineAdd name dref vref <- genTValue value nextnext -- traceM $ "Defining '" ++ name ++ "', ref " ++ show dref ++ ", with value " ++ show vref addIns (dref, IAssign vref) return RNone genTValue (TVLambda args body closure) nextnext = do startb <- rememberBlock $ withScope (Map.fromList (zip args (map SIParam [0..]) ++ zip closure (map SIClosure [0..]))) $ do b <- newBlockSwitch b2 <- newBlock ref <- genTValue body b2 switchBlock b2 setTerm $ IRet ref return b uid <- genId let uname = show uid ++ "L" -- starts with digit, so cannot clash with user-defined name functionAdd uname (GlobFuncDef startb (length args) closure) resref <- case closure of [] -> return (RSClo uname) _ -> do refs <- forM closure $ \cname -> do b <- newBlock r <- genTValue (TVName cname undefined) b switchBlock b return r r <- genTemp addIns (r, IAllocClo uname refs) return r setTerm $ IJmp nextnext return resref genTValue (TVName name _) nextnext = do r <- genTemp lookupVar name >>= \si -> case si of Right ref -> addIns (r, IAssign ref) Left (SIParam n) -> addIns (r, IParam n) Left (SIClosure n) -> addIns (r, IClosure n) Left SIGlobal -> do funcs <- gets csFunctions builtins <- gets csBuiltins case (Map.lookup name funcs, Map.lookup name builtins) of (Just (GlobFuncDef _ _ []), _) -> addIns (r, IAssign (RSClo name)) (Just (GlobFuncDef _ _ cs), _) -> do refs <- foldM (\refs' cname -> do b <- newBlock r' <- genTValue (TVName cname undefined) b switchBlock b return (r' : refs')) [] cs addIns (r, IAllocClo name refs) (_, Just ()) -> addIns (r, IAssign (RSClo name)) _ -> throwError $ "Use of undefined name \"" ++ name ++ "\"" setTerm $ IJmp nextnext return r genTValue TVEllipsis _ = throwError "Ellipses not supported in compiler"