From 0e1f435314b382cb78056f04d0997df43e4f8fcf Mon Sep 17 00:00:00 2001 From: Tom Smeding Date: Mon, 26 Mar 2018 21:34:51 +0200 Subject: Rename files for case-sensitive file system --- Compiler.hs | 286 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 286 insertions(+) create mode 100644 Compiler.hs (limited to 'Compiler.hs') diff --git a/Compiler.hs b/Compiler.hs new file mode 100644 index 0000000..2e3b80b --- /dev/null +++ b/Compiler.hs @@ -0,0 +1,286 @@ +{-# LANGUAGE DeriveFunctor, GeneralizedNewtypeDeriving #-} +module Compiler(IRProgram, compileProgram) where + +import Control.Monad.Except +import Control.Monad.State.Strict +import Data.List +import qualified Data.Map.Strict as Map +import qualified Data.Set as Set +import Debug.Trace + +import AST +import Intermediate + + +data TaggedValue + = TVList [TaggedValue] + | TVNum Int + | TVString String + | TVName Name (Maybe Int) -- Nothing: unknown, Just n: defined n lambdas up (0 = current lambda arg) + | TVQuoted Value + | TVDefine Name TaggedValue + | TVLambda [Name] TaggedValue [Name] -- (args) (body) (closure slot names) + | TVEllipsis + deriving Show + +-- also does some preprocessing, like parsing lambda's and defines +analyseValue :: Value -> TaggedValue +analyseValue = go [] + where + go :: [Set.Set Name] -> Value -> TaggedValue + go scopes (VList [VName "define", VName name, VList args, body]) + | Just names <- mapM fromVName args = go scopes (VList [VName "define", VName name, VLambda names body]) + | otherwise = error "Invalid 'define' shorthand syntax: Invalid argument list" + go scopes (VList [VName "define", VName name, value]) = TVDefine name (go scopes value) + go scopes (VList [VName "lambda", VList args, body]) + | Just names <- mapM fromVName args = go scopes (VLambda names body) + | otherwise = error "Invalid 'lambda' syntax: Invalid argument list" + go _ (VList (VName "lambda" : _)) = error "Invalid 'lambda' syntax" + go scopes (VList values) = TVList (map (go scopes) values) + go _ (VNum n) = TVNum n + go _ (VString s) = TVString s + go scopes (VName name) = TVName name (findIndex id (map (Set.member name) scopes)) + go _ (VQuoted value) = TVQuoted value + go scopes (VLambda args body) = + let t = go (Set.fromList args : scopes) body + in TVLambda args t (Set.toList (collectEscapes 0 t)) + go _ (VBuiltin _) = undefined + go _ VEllipsis = TVEllipsis + + collectEscapes :: Int -> TaggedValue -> Set.Set Name + collectEscapes limit (TVList values) = Set.unions (map (collectEscapes limit) values) + collectEscapes limit (TVName name (Just n)) | n > limit = Set.singleton name + collectEscapes limit (TVLambda _ body _) = collectEscapes (limit + 1) body + collectEscapes _ _ = Set.empty + + +data CompState = CompState + { csNextId :: Int + , csBlocks :: Map.Map Int BB + , csCurrent :: Int + , csScopes :: [Map.Map Name ScopeItem] + , csDefines :: Map.Map Name Ref + , csBuiltins :: Map.Map Name () + , csFunctions :: Map.Map Name GlobFuncDef + , csDatas :: [Value] } + deriving Show + +data ScopeItem = SIParam Int | SIClosure Int | SIGlobal + deriving Show + +newtype CM a = CM {unCM :: StateT CompState (Except String) a} + deriving (Functor, Applicative, Monad, MonadState CompState, MonadError String) + +-- TODO: extra info like number of arguments, dunno, might be useful +builtinMap :: Map.Map Name () +builtinMap = Map.fromList [ + ("+", ()), ("-", ()), ("<=", ()), ("print", ()), + ("list", ()), ("car", ()), ("cdr", ())] + +bbId :: BB -> Int +bbId (BB i _ _) = i + +initState :: CompState +initState = CompState 0 Map.empty undefined [] Map.empty builtinMap Map.empty [] + +runCM :: CM a -> Either String a +runCM act = runExcept $ evalStateT (unCM act) initState + +genId :: CM Int +genId = state $ \s -> (csNextId s, s {csNextId = csNextId s + 1}) + +genTemp :: CM Ref +genTemp = liftM RTemp genId + +newBlock :: CM Int +newBlock = do + i <- genId + modify $ \s -> s {csBlocks = Map.insert i (BB i [] IUnknown) (csBlocks s)} + return i + +switchBlock :: Int -> CM () +switchBlock i = modify $ \s -> s {csCurrent = i} + +newBlockSwitch :: CM Int +newBlockSwitch = do + i <- newBlock + switchBlock i + return i + +rememberBlock :: CM a -> CM a +rememberBlock act = do + b <- gets csCurrent + res <- act + switchBlock b + return res + +modifyBlock :: (BB -> BB) -> CM () +modifyBlock f = do + st <- get + let current = csCurrent st + Just bb = Map.lookup current (csBlocks st) + put $ st {csBlocks = Map.insert current (f bb) (csBlocks st)} + +addIns :: Instruction -> CM () +addIns ins = modifyBlock $ \(BB i inss term) -> BB i (inss ++ [ins]) term + +setTerm :: Terminator -> CM () +setTerm term = modifyBlock $ \(BB i inss _) -> BB i inss term + +lookupVar :: Name -> CM (Either ScopeItem Ref) +lookupVar name = gets csScopes >>= \scopes -> case msum (map (Map.lookup name) scopes) of + Just si -> return (Left si) + Nothing -> gets csDefines >>= \defines -> case Map.lookup name defines of + Just ref -> return (Right ref) + Nothing -> return (Left SIGlobal) + +dataTableAdd :: Value -> CM Int +dataTableAdd v = state $ \ctx -> (length (csDatas ctx), ctx {csDatas = csDatas ctx ++ [v]}) + +functionAdd :: Name -> GlobFuncDef -> CM () +functionAdd name gfd = modify $ \s -> s {csFunctions = Map.insert name gfd (csFunctions s)} + +defineAdd :: Name -> Ref -> CM () +defineAdd name ref = modify $ \s -> s {csDefines = Map.insert name ref (csDefines s)} + +withScope :: Map.Map Name ScopeItem -> CM a -> CM a +withScope sc act = do + modify $ \s -> s {csScopes = sc : csScopes s} + res <- act + modify $ \s -> s {csScopes = tail (csScopes s)} + return res + + +compileProgram :: Program -> Either String IRProgram +compileProgram (Program values) = runCM $ do + bstart <- newBlockSwitch + forM_ values $ \value -> do + bnext <- newBlock + ref <- genTValue (analyseValue value) bnext + switchBlock bnext + addIns (RNone, IDiscard ref) + setTerm IExit + ([firstbb], otherbbs) <- liftM (partition ((== bstart) . bbId) . Map.elems) (gets csBlocks) + funcs <- gets csFunctions + datas <- gets csDatas + return (IRProgram (firstbb : otherbbs) funcs datas) + +genTValue :: TaggedValue -> Int -> CM Ref +genTValue (TVList []) _ = throwError "Empty call" +genTValue (TVList (TVName "do" _ : stmts)) nextnext = do + forM_ (init stmts) $ \stmt -> do + b <- newBlock + r <- genTValue stmt b + switchBlock b + addIns (RNone, IDiscard r) + genTValue (last stmts) nextnext +genTValue (TVList [TVName "if" _, cond, val1, val2]) nextnext = do + b1 <- newBlock + bthen <- newBlock + belse <- newBlock + bthen' <- newBlock + belse' <- newBlock + + condref <- genTValue cond b1 + switchBlock b1 + setTerm $ IBr condref bthen belse + resref <- genTemp + + switchBlock bthen + thenref <- genTValue val1 bthen' + switchBlock bthen' + addIns (resref, IAssign thenref) + setTerm $ IJmp nextnext + + switchBlock belse + elseref <- genTValue val2 belse' + switchBlock belse' + addIns (resref, IAssign elseref) + setTerm $ IJmp nextnext + + return resref +genTValue (TVList (TVName "if" _ : _)) _ = throwError "Invalid 'if' syntax" +genTValue (TVList (funcexpr : args)) nextnext = do + refs <- forM args $ \value -> do + bnext <- newBlock + ref <- genTValue value bnext + switchBlock bnext + return ref + b <- newBlock + funcref <- genTValue funcexpr b + switchBlock b + resref <- genTemp + addIns (resref, ICallC funcref refs) + setTerm $ IJmp nextnext + return resref +genTValue (TVNum n) nextnext = do + setTerm $ IJmp nextnext + return (RConst n) +genTValue (TVString s) nextnext = do + i <- dataTableAdd (VString s) + r <- genTemp + addIns (r, IData i) + setTerm $ IJmp nextnext + return r +genTValue (TVQuoted v) nextnext = do + i <- dataTableAdd v + r <- genTemp + addIns (r, IData i) + setTerm $ IJmp nextnext + return r +genTValue (TVDefine name value) nextnext = do + dref <- genTemp + defineAdd name dref + vref <- genTValue value nextnext + -- traceM $ "Defining '" ++ name ++ "', ref " ++ show dref ++ ", with value " ++ show vref + addIns (dref, IAssign vref) + return RNone +genTValue (TVLambda args body closure) nextnext = do + startb <- rememberBlock $ + withScope (Map.fromList (zip args (map SIParam [0..]) ++ zip closure (map SIClosure [0..]))) $ do + b <- newBlockSwitch + b2 <- newBlock + ref <- genTValue body b2 + switchBlock b2 + setTerm $ IRet ref + return b + uid <- genId + let uname = show uid ++ "L" -- starts with digit, so cannot clash with user-defined name + functionAdd uname (GlobFuncDef startb (length args) closure) + resref <- case closure of + [] -> return (RSClo uname) + _ -> do + refs <- forM closure $ \cname -> do + b <- newBlock + r <- genTValue (TVName cname undefined) b + switchBlock b + return r + r <- genTemp + addIns (r, IAllocClo uname refs) + return r + setTerm $ IJmp nextnext + return resref +genTValue (TVName name _) nextnext = do + r <- genTemp + lookupVar name >>= \si -> case si of + Right ref -> addIns (r, IAssign ref) + Left (SIParam n) -> addIns (r, IParam n) + Left (SIClosure n) -> addIns (r, IClosure n) + Left SIGlobal -> do + funcs <- gets csFunctions + builtins <- gets csBuiltins + case (Map.lookup name funcs, Map.lookup name builtins) of + (Just (GlobFuncDef _ _ []), _) -> addIns (r, IAssign (RSClo name)) + (Just (GlobFuncDef _ _ cs), _) -> do + refs <- foldM (\refs' cname -> do + b <- newBlock + r' <- genTValue (TVName cname undefined) b + switchBlock b + return (r' : refs')) + [] cs + addIns (r, IAllocClo name refs) + (_, Just ()) -> addIns (r, IAssign (RSClo name)) + _ -> throwError $ "Use of undefined name \"" ++ name ++ "\"" + setTerm $ IJmp nextnext + return r +genTValue TVEllipsis _ = throwError "Ellipses not supported in compiler" -- cgit v1.2.3-54-g00ecf