From 694ec05bcad01fd27606aace73b49cdade16945e Mon Sep 17 00:00:00 2001 From: tomsmeding Date: Sat, 19 Aug 2017 11:05:43 +0200 Subject: Initial --- .gitignore | 4 + AST.hs | 173 +++++++++++++++++++++++ BuildIR.hs | 367 +++++++++++++++++++++++++++++++++++++++++++++++++ CodeGen.hs | 388 ++++++++++++++++++++++++++++++++++++++++++++++++++++ Defs.hs | 10 ++ Intermediate.hs | 174 +++++++++++++++++++++++ LifetimeAnalysis.hs | 70 ++++++++++ Main.hs | 56 ++++++++ Makefile | 26 ++++ Optimiser.hs | 252 ++++++++++++++++++++++++++++++++++ Pretty.hs | 14 ++ ProgramParser.hs | 269 ++++++++++++++++++++++++++++++++++++ RegAlloc.hs | 87 ++++++++++++ ReplaceRefs.hs | 36 +++++ TypeCheck.hs | 234 +++++++++++++++++++++++++++++++ TypeRules.hs | 61 +++++++++ Utils.hs | 7 + Verify.hs | 28 ++++ X64.hs | 275 +++++++++++++++++++++++++++++++++++++ arrays.lang | 7 + bf.lang | 105 ++++++++++++++ chartest.lang | 19 +++ fibo.lang | 31 +++++ graph.png | Bin 0 -> 166622 bytes prologue.asm | 134 ++++++++++++++++++ putint.lang | 4 + putstr.lang | 43 ++++++ test.lang | 20 +++ 28 files changed, 2894 insertions(+) create mode 100644 .gitignore create mode 100644 AST.hs create mode 100644 BuildIR.hs create mode 100644 CodeGen.hs create mode 100644 Defs.hs create mode 100644 Intermediate.hs create mode 100644 LifetimeAnalysis.hs create mode 100644 Main.hs create mode 100644 Makefile create mode 100644 Optimiser.hs create mode 100644 Pretty.hs create mode 100644 ProgramParser.hs create mode 100644 RegAlloc.hs create mode 100644 ReplaceRefs.hs create mode 100644 TypeCheck.hs create mode 100644 TypeRules.hs create mode 100644 Utils.hs create mode 100644 Verify.hs create mode 100644 X64.hs create mode 100644 arrays.lang create mode 100644 bf.lang create mode 100644 chartest.lang create mode 100644 fibo.lang create mode 100644 graph.png create mode 100644 prologue.asm create mode 100644 putint.lang create mode 100644 putstr.lang create mode 100644 test.lang diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f220a21 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +main +obj +obsolete +z_output* diff --git a/AST.hs b/AST.hs new file mode 100644 index 0000000..3e80830 --- /dev/null +++ b/AST.hs @@ -0,0 +1,173 @@ +module AST where + +import Data.List + +import Defs +import Pretty + + +data Program = Program [DVar] [DFunc] + deriving (Show, Eq) + +data DVar = DVar Type Name Expression + deriving (Show, Eq) + +data DFunc = DFunc (Maybe Type) Name [(Type, Name)] Block + deriving (Show, Eq) + +data Type + = TInt | TChar | TArr Type (Maybe Size) + deriving (Show, Eq) + +data Block = Block [Statement] + deriving (Show, Eq) + +data Statement + = SDecl Type Name Expression + | SAs AsExpression Expression + | SIf Expression Block Block + | SWhile Expression Block + | SReturn (Maybe Expression) + | SExpr Expression + deriving (Show, Eq) + +data AsExpression + = AEVar Name (Maybe Type) + | AESubscript AsExpression Expression (Maybe Type) + deriving (Show, Eq) + +data Expression + = EBin BinaryOp Expression Expression (Maybe Type) + | EUn UnaryOp Expression (Maybe Type) + | ELit Literal (Maybe Type) + | ESubscript Expression Expression (Maybe Type) + | ECast Type Expression + | ENew Type Expression + deriving (Show, Eq) + +data BinaryOp + = BOAdd | BOSub | BOMul | BODiv | BOMod | BOPow + | BOAnd | BOOr + | BOEq | BONeq | BOGt | BOLt | BOGeq | BOLeq + deriving (Show, Eq) + +data UnaryOp + = UONot | UONeg + deriving (Show, Eq) + +data Literal + = LInt Integer + | LChar Char + | LVar Name + | LCall Name [Expression] + deriving (Show, Eq) + + +sizeof :: Type -> Size +sizeof TInt = 8 +sizeof TChar = 1 +sizeof (TArr _ _) = 8 + + +instance Pretty Program where + prettyI i (Program vars funcs) = + concatMap (++ ("\n" ++ indent i)) $ + map (prettyI i) vars ++ map (prettyI i) funcs + where + indent n = replicate (2*n) ' ' + +instance Pretty DVar where + prettyI i (DVar t n e) = + prettyI i t ++ " " ++ n ++ " := " ++ prettyI i e ++ ";" + +instance Pretty DFunc where + prettyI i (DFunc mt n al b) = + "func" ++ maybe "" ((' ' :) . prettyI i) mt ++ " " ++ n ++ "(" ++ + intercalate "," + (map (\(at,an) -> prettyI i at ++ " " ++ an) al) ++ + ") " ++ prettyI i b + +instance Pretty Type where + prettyI _ TInt = "int" + prettyI _ TChar = "char" + prettyI _ (TArr t Nothing) = pretty t ++ "[]" + prettyI _ (TArr t (Just sz)) = pretty t ++ "[" ++ show sz ++ "]" + +instance Pretty Block where + prettyI _ (Block []) = "{}" + prettyI i (Block l) = + "{" ++ + concatMap (("\n" ++ indent (i+1)) ++) (map (prettyI (i+1)) l) ++ + "\n" ++ indent i ++ "}" + where + indent n = replicate (2*n) ' ' + +instance Pretty Statement where + prettyI i (SDecl t n e) = + prettyI i t ++ " " ++ n ++ " := " ++ prettyI i e ++ ";" + prettyI i (SAs target e) = + prettyI i target ++ " = " ++ prettyI i e ++ ";" + prettyI i (SIf c b1 b2) = + "if " ++ prettyI i c ++ " " ++ prettyI i b1 ++ " else " ++ prettyI i b2 + prettyI i (SWhile c b) = + "while " ++ prettyI i c ++ " " ++ prettyI i b + prettyI _ (SReturn Nothing) = + "return;" + prettyI i (SReturn (Just e)) = + "return " ++ prettyI i e ++ ";" + prettyI i (SExpr e) = prettyI i e ++ ";" + +instance Pretty Expression where + prettyI i (EBin bo a b (Just t)) = + "(" ++ prettyI i (EBin bo a b Nothing) ++ " :: " ++ prettyI i t ++ ")" + prettyI i (EBin bo a b Nothing) = + "(" ++ prettyI i a ++ ") " ++ prettyI i bo ++ + " (" ++ prettyI i b ++ ")" + prettyI i (EUn uo e (Just t)) = + "(" ++ prettyI i (EUn uo e Nothing) ++ " :: " ++ prettyI i t ++ ")" + prettyI i (EUn uo e Nothing) = + prettyI i uo ++ "(" ++ prettyI i e ++ ")" + prettyI i (ELit l (Just t)) = + "(" ++ prettyI i (ELit l Nothing) ++ " :: " ++ prettyI i t ++ ")" + prettyI i (ELit l Nothing) = prettyI i l + prettyI i (ESubscript a b (Just t)) = + "(" ++ prettyI i (ESubscript a b Nothing) ++ " :: " ++ prettyI i t ++ ")" + prettyI i (ESubscript a b Nothing) = "(" ++ prettyI i a ++ ")[" ++ prettyI i b ++ "]" + prettyI i (ECast t e) = prettyI i t ++ "(" ++ prettyI i e ++ ")" + prettyI i (ENew t e) = "new " ++ prettyI i t ++ "[" ++ prettyI i e ++ "]" + +instance Pretty AsExpression where + prettyI i (AEVar n (Just t)) = + "(" ++ prettyI i (AEVar n Nothing) ++ " :: " ++ prettyI i t ++ ")" + prettyI _ (AEVar n Nothing) = n + prettyI i (AESubscript ae e (Just t)) = + "(" ++ prettyI i (AESubscript ae e Nothing) ++ " :: " ++ prettyI i t ++ ")" + prettyI i (AESubscript ae e Nothing) = prettyI i ae ++ "[" ++ prettyI i e ++ "]" + +instance Pretty BinaryOp where + prettyI _ BOAdd = "+" + prettyI _ BOSub = "-" + prettyI _ BOMul = "*" + prettyI _ BODiv = "/" + prettyI _ BOPow = "**" + prettyI _ BOMod = "%" + prettyI _ BOAnd = "&&" + prettyI _ BOOr = "||" + prettyI _ BOEq = "==" + prettyI _ BONeq = "!=" + prettyI _ BOGt = ">" + prettyI _ BOLt = "<" + prettyI _ BOGeq = ">=" + prettyI _ BOLeq = "<=" + + +instance Pretty UnaryOp where + prettyI _ UONot = "!" + prettyI _ UONeg = "-" + +instance Pretty Literal where + prettyI _ (LInt n) = show n + prettyI _ (LChar c) = show c + prettyI _ (LVar n) = n + prettyI i (LCall n al) = + n ++ "(" ++ intercalate ", " (map (prettyI i) al) ++ ")" diff --git a/BuildIR.hs b/BuildIR.hs new file mode 100644 index 0000000..28cbf5e --- /dev/null +++ b/BuildIR.hs @@ -0,0 +1,367 @@ +{-# LANGUAGE DeriveFunctor, GeneralizedNewtypeDeriving #-} + +module BuildIR(buildIR) where + +import Control.Monad.Except +import Control.Monad.State.Strict +import Data.Char +import Data.List +import qualified Data.Map.Strict as Map +import Data.Maybe + +import AST +import Defs +import Intermediate +import Pretty +import TypeRules + + +type Scope = Map.Map Name (Ref, Type) + +data BuildState = BuildState + { nextId :: Id, + scopeStack :: [Scope], + currentBlock :: Id, + blockMap :: Map.Map Id BB } + +initBuildState :: BuildState +initBuildState = BuildState + { nextId = 0, + scopeStack = [], + currentBlock = undefined, + blockMap = Map.empty } + +newtype BuildM a = BuildM {unBuildM :: StateT BuildState (Except String) a} + deriving (Functor, Applicative, Monad, MonadState BuildState, MonadError String) + +genId :: BuildM Id +genId = state $ \s -> (nextId s, s {nextId = nextId s + 1}) + +genTemp :: Size -> BuildM Ref +genTemp sz = liftM (Temp sz) genId + +newBlock :: BuildM Id +newBlock = do + i <- genId + let block = BB i [] ITermNone + modify $ \s -> s {currentBlock = i, blockMap = Map.insert i block (blockMap s)} + return i + +newBlockNoSwitch :: BuildM Id +newBlockNoSwitch = do + i <- genId + let block = BB i [] ITermNone + modify $ \s -> s {blockMap = Map.insert i block (blockMap s)} + return i + +addIns :: IRIns -> BuildM () +addIns ins = + modify $ \s -> s { + blockMap = Map.adjust bbAddIns (currentBlock s) (blockMap s)} + where + bbAddIns :: BB -> BB + bbAddIns (BB bid inss term) = BB bid (inss ++ [ins]) term + +setTerm :: IRTerm -> BuildM () +setTerm term = + modify $ \s -> s { + blockMap = Map.adjust bbSetTerm (currentBlock s) (blockMap s)} + where + bbSetTerm :: BB -> BB + bbSetTerm (BB bid inss oldterm) = case oldterm of + ITermNone -> BB bid inss term + _ -> error "setTerm: oldterm /= ITermNone" + +clearBlockMap :: BuildM () +clearBlockMap = modify $ \s -> s {currentBlock = undefined, blockMap = Map.empty} + +getAllBlocks :: BuildM [BB] +getAllBlocks = liftM Map.elems (gets blockMap) + +switchBlock :: Id -> BuildM () +switchBlock bid = modify $ \s -> s {currentBlock = bid} + +withScope :: BuildM a -> BuildM a +withScope act = do + modify $ \s -> s {scopeStack = Map.empty : scopeStack s} + res <- act + modify $ \s -> s {scopeStack = tail (scopeStack s)} + return res + +modifyScope :: (Scope -> Scope) -> BuildM () +modifyScope f = + modify $ \s -> s {scopeStack = f (head (scopeStack s)) : tail (scopeStack s)} + +scopeInsert :: Name -> Ref -> Type -> BuildM () +scopeInsert n ref t = modifyScope $ Map.insert n (ref, t) + +findVar :: Name -> BuildM (Maybe (Int, (Ref, Type))) +findVar n = do + stk <- gets scopeStack + let results = map (Map.lookup n) stk + return $ fmap (\idx -> (idx, fromJust (results !! idx))) $ + findIndex isJust results + + +buildIR :: Program -> Error IRProgram +buildIR (Program vars funcs) = + runExcept $ evalStateT (unBuildM result) initBuildState + where + goDFunc :: DFunc -> BuildM IRFunc + goDFunc (DFunc rt n al bl) = do + clearBlockMap + firstid <- newBlock + lastid <- newBlockNoSwitch + withScope $ do + forM_ al $ \(at, an) -> scopeInsert an (Argument (sizeof at) an) at + convertBlock bl lastid + switchBlock lastid + setTerm IRet + bblist <- getAllBlocks + return $ IRFunc rt n al bblist firstid + + result :: BuildM IRProgram + result = do + withScope $ do + mapM_ (\(DVar t n _) -> scopeInsert n (Global (sizeof t) n) t) vars + IRProgram vars <$> mapM goDFunc funcs + +convertBlock :: Block -> Id -> BuildM () +convertBlock (Block sts) nextnext = do + withScope $ forM_ sts $ \st -> do + endid <- newBlockNoSwitch + convertStatement st endid + switchBlock endid + setTerm $ IJmp nextnext + +convertStatement :: Statement -> Id -> BuildM () +convertStatement (SDecl t n e) nextnext = do + endid <- newBlockNoSwitch + ref <- convertExpression e endid + varref <- genTemp (sizeof t) + scopeInsert n varref t + switchBlock endid + addIns $ IMov varref ref + setTerm $ IJmp nextnext +convertStatement (SAs ae e) nextnext = do + bl2 <- newBlockNoSwitch + eref <- convertExpression e bl2 + switchBlock bl2 + convertAsExpression ae eref nextnext +convertStatement (SIf c b1 b2) nextnext = do + cend <- newBlockNoSwitch + blThen <- newBlockNoSwitch + blElse <- newBlockNoSwitch + + cref <- convertExpression c cend + switchBlock cend + setTerm $ IJcc CNeq cref (Constant (refSize cref) 0) blThen blElse + switchBlock blThen + convertBlock b1 nextnext + switchBlock blElse + convertBlock b2 nextnext +convertStatement (SWhile c b) nextnext = do + cond <- newBlockNoSwitch + setTerm $ IJmp cond + cend <- newBlockNoSwitch + body <- newBlockNoSwitch + bodyend <- newBlockNoSwitch + + switchBlock cond + cref <- convertExpression c cend + switchBlock cend + setTerm $ IJcc CNeq cref (Constant (refSize cref) 0) body nextnext + switchBlock body + convertBlock b bodyend + switchBlock bodyend + setTerm $ IJmp cond +convertStatement (SReturn Nothing) _ = do + setTerm IRet +convertStatement (SReturn (Just e)) _ = do + bl <- newBlockNoSwitch + ref <- convertExpression e bl + switchBlock bl + setTerm $ IRetr ref +convertStatement (SExpr e) nextnext = do + void $ convertExpression e nextnext + +convertExpression :: Expression -> Id -> BuildM Ref +convertExpression (ELit (LInt n) _) nextnext = do + ref <- genTemp (sizeof TInt) + addIns $ IMov ref (Constant (sizeof TInt) (fromInteger n)) + setTerm $ IJmp nextnext + return ref +convertExpression (ELit (LChar c) _) nextnext = do + ref <- genTemp (sizeof TChar) + addIns $ IMov ref (Constant (sizeof TChar) (fromIntegral $ ord c)) + setTerm $ IJmp nextnext + return ref +convertExpression (ELit (LVar n) _) nextnext = do + mres <- findVar n + case mres of + Just (_, (r, t)) -> do + ref <- genTemp (sizeof t) + addIns $ IMov ref r + setTerm $ IJmp nextnext + return ref + Nothing -> throwError $ "Undefined variable '" ++ n ++ "' referenced" +convertExpression (ELit (LCall n al) mrt) nextnext = do + refs <- withScope $ forM al $ \arg -> do + endid <- newBlockNoSwitch + r <- convertExpression arg endid + switchBlock endid + return r + destref <- case mrt of + Nothing -> do + addIns $ ICall n refs + return $ Temp 0 (-1) + Just typ -> do + r <- genTemp (sizeof typ) + addIns $ ICallr r n refs + return r + setTerm $ IJmp nextnext + return destref +convertExpression (EBin BOAnd e1 e2 _) nextnext = do + destref <- genTemp (sizeof TInt) + bl2 <- newBlockNoSwitch + blTryR <- newBlockNoSwitch + bl3 <- newBlockNoSwitch + blNope <- newBlockNoSwitch + blYes <- newBlockNoSwitch + + ref1 <- convertExpression e1 bl2 + + switchBlock bl2 + setTerm $ IJcc CNeq ref1 (Constant (refSize ref1) 0) blTryR blNope + + switchBlock blTryR + ref2 <- convertExpression e2 bl3 + + switchBlock bl3 + setTerm $ IJcc CNeq ref2 (Constant (refSize ref2) 0) blYes blNope + + switchBlock blYes + addIns $ IMov destref (Constant (refSize destref) 1) + setTerm $ IJmp nextnext + + switchBlock blNope + addIns $ IMov destref (Constant (refSize destref) 0) + setTerm $ IJmp nextnext + + return destref +convertExpression (EBin bo e1 e2 _) nextnext = do + bl2 <- newBlockNoSwitch + ref1 <- convertExpression e1 bl2 + switchBlock bl2 + bl3 <- newBlockNoSwitch + ref2 <- convertExpression e2 bl3 + switchBlock bl3 + ref <- genTemp (sizeof $ fromJust $ retTypeBO bo (fromJust $ typeof e1) (fromJust $ typeof e2)) + case bo of + BOAdd -> addIns $ IAri AAdd ref1 ref2 + BOSub -> addIns $ IAri ASub ref1 ref2 + BOMul -> addIns $ IAri AMul ref1 ref2 + BODiv -> addIns $ IAri ADiv ref1 ref2 + BOMod -> addIns $ IAri AMod ref1 ref2 + BOEq -> addIns $ IAri AEq ref1 ref2 + BONeq -> addIns $ IAri ANeq ref1 ref2 + BOGt -> addIns $ IAri AGt ref1 ref2 + BOLt -> addIns $ IAri ALt ref1 ref2 + BOGeq -> addIns $ IAri AGeq ref1 ref2 + BOLeq -> addIns $ IAri ALeq ref1 ref2 + BOPow -> error $ "Pow operator not implemented" + BOAnd -> undefined + BOOr -> undefined + addIns $ IMov ref ref1 + setTerm $ IJmp nextnext + return ref +convertExpression (EUn UONot e mt) nextnext = + convertExpression (EBin BOEq e (ELit (LInt 0) (typeof e)) mt) nextnext +convertExpression (EUn UONeg e mt) nextnext = + convertExpression (EBin BOSub (ELit (LInt 0) (typeof e)) e mt) nextnext +convertExpression (ESubscript arr sub t) nextnext = do + let elemsz = sizeof $ fromJust t + bl2 <- newBlockNoSwitch + arrref <- convertExpression arr bl2 + switchBlock bl2 + bl3 <- newBlockNoSwitch + subref <- convertExpression sub bl3 + switchBlock bl3 + addIns $ IAri AMul subref (Constant (refSize subref) (fromIntegral elemsz)) + addIns $ IAri AAdd subref (Constant (refSize subref) (fromIntegral $ sizeof TInt)) + addIns $ IAri AAdd arrref subref + ref <- genTemp elemsz + addIns $ ILoad ref arrref + setTerm $ IJmp nextnext + return ref +convertExpression (ECast dt e) nextnext = do + let typ = case typeof e of + Nothing -> error $ "Cast subject " ++ show e ++ " has Nothing type" + Just t -> t + when (not $ isIntegralType typ && isIntegralType dt) $ + error $ "convertExpression: unimplemented cast from " ++ pretty typ ++ " to " ++ pretty dt + ref <- genTemp (sizeof dt) + bl <- newBlockNoSwitch + eref <- convertExpression e bl + switchBlock bl + addIns $ IResize ref eref + setTerm $ IJmp nextnext + return ref +convertExpression (ENew t sze) nextnext = do + when (not $ isBasicType t) $ + throwError $ "Array element type in 'new' expression is not a basic type (" ++ pretty t ++ ")" + bl2 <- newBlockNoSwitch + szref <- convertExpression sze bl2 + switchBlock bl2 + ref <- genTemp (sizeof $ TArr t Nothing) + addIns $ IAri AMul szref (Constant (sizeof TInt) (fromIntegral $ sizeof t)) + addIns $ IAri AAdd szref (Constant (sizeof TInt) (fromIntegral $ sizeof TInt)) + addIns $ ICallr ref "_builtin_malloc" [szref] + setTerm $ IJmp nextnext + return ref + +convertAsExpression :: AsExpression -> Ref -> Id -> BuildM () +convertAsExpression (AEVar n _) valueref nextnext = do + mres <- findVar n + vref <- case mres of + Just (_, (r, _)) -> return r + Nothing -> throwError $ "Undefined variable '" ++ n ++ "'" ++ + " used in assignment expression" + addIns $ IMov vref valueref + setTerm $ IJmp nextnext +convertAsExpression (AESubscript ae2 expr mrt) valueref nextnext = do + let elemsz = sizeof $ fromJust mrt + ae2ref <- goLoad ae2 + bl2 <- newBlockNoSwitch + offref <- convertExpression expr bl2 + switchBlock bl2 + -- TODO: do bounds checking + addIns $ IAri AMul offref (Constant (sizeof TInt) (fromIntegral elemsz)) + addIns $ IAri AAdd offref (Constant (sizeof TInt) (fromIntegral $ sizeof TInt)) + addIns $ IAri AAdd ae2ref offref + addIns $ IStore ae2ref valueref + setTerm $ IJmp nextnext + where + goLoad :: AsExpression -> BuildM Ref + goLoad (AEVar n _) = do + mres <- findVar n + (vref, t) <- case mres of + Just (_, (r, t)) -> return (r, t) + Nothing -> throwError $ "Undefined variable '" ++ n ++ "'" ++ + " used in assignment expression" + ref <- genTemp (sizeof t) + addIns $ IMov ref vref + return ref + goLoad (AESubscript ae expr' _) = do + let elemsz = sizeof $ fromJust $ typeof ae + ref <- goLoad ae + bl2 <- newBlockNoSwitch + eref <- convertExpression expr' bl2 + switchBlock bl2 + -- TODO: do bounds checking + addIns $ IAri AMul eref (Constant (sizeof TInt) (fromIntegral elemsz)) + addIns $ IAri AAdd eref (Constant (sizeof TInt) (fromIntegral $ sizeof TInt)) + addIns $ IAri AAdd ref eref + dstref <- genTemp elemsz + addIns $ ILoad dstref ref + return dstref diff --git a/CodeGen.hs b/CodeGen.hs new file mode 100644 index 0000000..d4c9439 --- /dev/null +++ b/CodeGen.hs @@ -0,0 +1,388 @@ +{-# LANGUAGE DeriveFunctor, GeneralizedNewtypeDeriving, TupleSections, QuasiQuotes, ScopedTypeVariables #-} + +module CodeGen(codegen) where + +import Control.Monad +import Control.Monad.Except +import Control.Monad.State.Strict +import Data.List +import Data.Maybe +import Data.Map.Strict ((!)) +import qualified Data.Map.Strict as Map +import Text.Heredoc +import Debug.Trace + +import AST +import Defs +import Intermediate +import qualified LifetimeAnalysis as LA +import RegAlloc +import Utils +import X64 (Register(..), CondCode(..), XRef(..), Ins(..), xref) +import qualified X64 as X64 + + +data CGState = CGState + { nextId :: Int, + regsToRestore :: [Register], + spillSize :: Size, + x64Result :: X64.Asm } + +newtype CGMonad a = CGMonad { unCGMonad :: StateT CGState (Except String) a } + deriving (Functor, Applicative, Monad, MonadState CGState, MonadError String) + +initState :: CGState +initState = CGState {nextId = 1, regsToRestore = [], spillSize = 0, x64Result = X64.Asm []} + +execCGMonad :: CGMonad a -> Error X64.Asm +execCGMonad = fmap x64Result . runExcept . flip execStateT initState . unCGMonad + +addIns :: X64.Ins -> CGMonad () +addIns ins = modify $ \s -> + let (X64.Asm funcs) = x64Result s + (pre, (lab, inss)) = (init funcs, last funcs) + in s {x64Result = X64.Asm $ pre ++ [(lab, inss ++ [ins])]} + +newLabel :: String -> CGMonad () +newLabel lab = modify $ \s -> + let (X64.Asm funcs) = x64Result s + in s {x64Result = X64.Asm $ funcs ++ [(lab, [])]} + +-- genId :: CGMonad Int +-- genId = state $ \s -> (nextId s, s {nextId = nextId s + 1}) + +setRegsToRestore :: [Register] -> CGMonad () +setRegsToRestore regs = modify $ \s -> s {regsToRestore = regs} + +setSpillSize :: Size -> CGMonad () +setSpillSize sz = modify $ \s -> s {spillSize = sz} + + +codegen :: IRProgram -> Error String +codegen (IRProgram vars funcs) = do + x64 <- execCGMonad $ mapM_ codegenFunc funcs + -- traceShowM x64 + X64.verify x64 + varcg <- liftM unlines $ mapM codegenVar vars + return $ [there|prologue.asm|] ++ "\n" ++ X64.stringify x64 ++ + "\nsection .data\n" ++ (if length vars > 0 then varcg else "db 0 ; keep dyld happy\n") + + +codegenVar :: DVar -> Error String +codegenVar (DVar TInt n (ELit (LInt i) (Just TInt))) = Right $ n ++ ": dq " ++ show i +codegenVar _ = Left "Unsupported global variable declaration" + + +type AllocMap = Map.Map Ref XRef + +codegenFunc :: IRFunc -> CGMonad () +codegenFunc (IRFunc _ name al bbs sid) = do + let temprefsperbb = collectTempRefs bbs + alltemprefs = uniq $ sort $ map LA.unAccess $ concat $ concat $ map fst temprefsperbb + lifespans = map (\r -> (findLifeSpan r, r)) alltemprefs + where findLifeSpan ref = + fromJust $ findFirstLast id $ concat $ LA.lifetimeAnalysis ref temprefsperbb + + aliascandidates = findAliasCandidates bbs :: [(Ref, Ref)] + + gpRegs = [R8, R9, R10, R11, R12, R13, R14, R15] + allocation = regalloc lifespans gpRegs aliascandidates :: Map.Map Ref (Allocation Register) + + spillrefs = map fst $ filter (isAllocMem . snd) $ Map.toList allocation + (spilloffsets, spillsz) = initLast $ scanl (+) 0 $ map refSize spillrefs + spilloffsetmap = Map.fromList $ zip spillrefs spilloffsets + + usedregs = uniq $ sort $ catMaybes $ flip map (Map.toList allocation) $ \(_, a) -> case a of + AllocReg reg -> Just reg + AllocMem -> Nothing + + traceShowM temprefsperbb + traceShowM lifespans + -- traceM $ "ALLOCATION: " ++ show allocation + + let nsaves = length usedregs + allocationXref = flip Map.mapWithKey allocation $ \ref alloc -> case alloc of + AllocReg reg -> XReg (fromIntegral $ refSize ref) reg + AllocMem -> XMem (fromIntegral $ refSize ref) + (Just RSP) (0, RAX) Nothing + (fromIntegral $ spilloffsetmap ! ref) + allocmap = foldl inserter allocationXref (zip al [0::Int ..]) + where + inserter m ((t, n), i) = + let offset = fromIntegral spillsz + 8 * nsaves + 8 {- rbp -} + 8 {- ret addr -} + 8 * i + in Map.insert (Argument (sizeof t) n) + (XMem (fromIntegral $ sizeof t) + (Just RSP) (0, RAX) Nothing + (fromIntegral offset)) + m + + newLabel name + addIns $ PUSH (xref $ XReg 8 RBP) + addIns $ MOV (xref $ XReg 8 RBP) (xref $ XReg 8 RSP) + forM_ usedregs $ \reg -> addIns $ PUSH (xref $ XReg 8 reg) + when (spillsz /= 0) $ addIns $ SUB (xref $ XReg 8 RSP) (xref $ XImm $ fromIntegral spillsz) + setRegsToRestore usedregs + setSpillSize spillsz + + let ([startbb], rest) = partition (\(BB i _ _) -> i == sid) bbs + codegenBB allocmap startbb + mapM_ (codegenBB allocmap) rest + +findAliasCandidates :: [BB] -> [(Ref, Ref)] +findAliasCandidates = concatMap (\(BB _ inss _) -> concatMap goI inss) + where + goI :: IRIns -> [(Ref, Ref)] + goI (IMov d s) = [(d, s)] + goI _ = [] + +findFirstLast :: forall a. (a -> Bool) -> [a] -> Maybe (Int, Int) +findFirstLast f l = go Nothing 0 l + where + go :: Maybe (Int, Int) -> Int -> [a] -> Maybe (Int, Int) + go mr _ [] = mr + go mr i (x:xs) + | f x = go (note mr i) (i+1) xs + | otherwise = go mr (i+1) xs + + note :: Maybe (Int, Int) -> Int -> Maybe (Int, Int) + note Nothing i = Just (i, i) + note (Just (a, _)) i = Just (a, i) + +isAllocMem :: Allocation a -> Bool +isAllocMem AllocMem = True +isAllocMem _ = False + +initLast :: [a] -> ([a], a) +initLast [] = undefined +initLast [x] = ([], x) +initLast (x:xs) = let (acc, l) = initLast xs in (x : acc, l) + +codegenBB :: AllocMap -> BB -> CGMonad () +codegenBB allocmap (BB bid inss term) = do + newLabel $ ".bb" ++ show bid + mapM_ (codegenIns allocmap) inss + codegenTerm allocmap term + +mkxref :: Ref -> AllocMap -> XRef +mkxref (Constant _ v) _ = XImm v +mkxref (Global sz n) _ = XMem (fromIntegral sz) Nothing (0, RAX) (Just n) 0 +mkxref r m = fromJust $ Map.lookup r m + +mkmov :: XRef -> XRef -> X64.Ins +mkmov a@(XReg _ _) b@(XReg _ _) = MOV (xref a) (xref b) +mkmov a@(XReg _ _) b@(XMem _ _ _ _ _) = MOV (xref a) (xref b) +mkmov a@(XReg _ _) b@(XImm _) = MOVi64 (xref a) (xref b) +mkmov a@(XMem _ _ _ _ _) b@(XReg _ _) = MOV (xref a) (xref b) +mkmov a@(XMem _ _ _ _ _) b@(XImm v) | v < 2 ^ (32 :: Int) = MOVi (xref a) (xref b) +mkmov a b = error $ "Invalid mkmov: " ++ show a ++ "; " ++ show b + +mkcmp :: XRef -> XRef -> X64.Ins +mkcmp a b@(XImm _) = CMPi (xref a) (xref b) +mkcmp a b = CMP (xref a) (xref b) + +codegenIns :: AllocMap -> IRIns -> CGMonad () +codegenIns m (IMov d s) + | dm == sm = return () + | X64.isXMem dm && X64.isXMem sm = do + addIns $ mkmov (XReg (fromIntegral $ refSize s) RAX) sm + addIns $ mkmov dm (XReg (fromIntegral $ refSize d) RAX) + | otherwise = addIns $ mkmov dm sm + where dm = mkxref d m + sm = mkxref s m +codegenIns m (IStore d s) = do + sourcexref <- if X64.isXMem sm + then do + addIns $ mkmov (XReg sz RBX) sm + return $ XReg sz RBX + else return sm + destxref <- case dm of + XReg _ r -> return $ XMem sz (Just r) (0, RAX) Nothing 0 + x@(XMem xsz _ _ _ _) -> do + addIns $ mkmov (XReg xsz RAX) x + return $ XMem sz (Just RAX) (0, RAX) Nothing 0 + XImm _ -> throwError $ "IStore to [immediate] not expected" + addIns $ mkmov destxref sourcexref + where dm = mkxref d m + sm = mkxref s m + sz = fromIntegral $ refSize s +codegenIns m (ILoad d s) = do + sourcexref <- case sm of + XReg _ r -> return $ XMem sz (Just r) (0, RAX) Nothing 0 + x@(XMem xsz _ _ _ _) -> do + addIns $ mkmov (XReg xsz RAX) x + return $ XMem sz (Just RAX) (0, RAX) Nothing 0 + XImm _ -> throwError $ "ILoad from [immediate] not expected" + if X64.isXMem dm + then do + addIns $ mkmov (XReg sz RAX) sourcexref + addIns $ mkmov dm (XReg sz RAX) + else do + addIns $ mkmov dm sourcexref + where dm = mkxref d m + sm = mkxref s m + sz = fromIntegral $ refSize d +codegenIns m (IAri AMul d s) = do + let sz = fromIntegral $ refSize d + addIns $ mkmov (XReg sz RAX) (mkxref d m) + addIns $ mkmov (XReg sz RBX) (mkxref s m) + addIns $ IMULDA (xref $ XReg sz RBX) + addIns $ mkmov (mkxref d m) (XReg sz RAX) +codegenIns m (IAri ADiv d s) = do + let sz = fromIntegral $ refSize d + addIns $ XOR (xref $ XReg 4 RDX) (xref $ XReg 4 RDX) + addIns $ mkmov (XReg sz RAX) (mkxref d m) + addIns $ mkmov (XReg sz RBX) (mkxref s m) + addIns $ IDIVDA (xref $ XReg sz RBX) + addIns $ mkmov (mkxref d m) (XReg sz RAX) +codegenIns m (IAri AMod d s) = do + let sz = fromIntegral $ refSize d + addIns $ XOR (xref $ XReg 4 RDX) (xref $ XReg 4 RDX) + addIns $ mkmov (XReg sz RAX) (mkxref d m) + addIns $ mkmov (XReg sz RBX) (mkxref s m) + addIns $ IDIVDA (xref $ XReg sz RBX) + addIns $ mkmov (mkxref d m) (XReg sz RDX) +codegenIns m (IAri at d s) = case arithTypeToCondCode at of + Just cc -> do + arg2 <- if X64.isXMem dm && X64.isXMem sm + then do + addIns $ mkmov (XReg (fromIntegral $ refSize s) RAX) sm + return $ XReg (fromIntegral $ refSize s) RAX + else return sm + addIns $ mkcmp dm arg2 + addIns $ MOVi (xref dm) (xref $ XImm 0) + addIns $ SETCC cc (xref $ X64.xrefSetSize 1 dm) + Nothing -> do + arg2 <- if X64.isXMem dm && X64.isXMem sm + then do + addIns $ mkmov (XReg (fromIntegral $ refSize s) RAX) sm + return $ XReg (fromIntegral $ refSize s) RAX + else return sm + addIns $ fromJust (arithTypeToIns at) dm arg2 + where dm = mkxref d m + sm = mkxref s m +codegenIns m (ICall n rs) = do + forM_ (zip (reverse rs) [1::Int ..]) $ \(r, i) -> + let sz = fromIntegral $ refSize r + src = (mkxref r m) + dst = (XMem sz (Just RSP) (0, RAX) Nothing (fromIntegral $ (-8) * i)) + in if X64.isXMem (mkxref r m) + then do + addIns $ mkmov (XReg sz RAX) src + addIns $ mkmov dst (XReg sz RAX) + else do + addIns $ mkmov dst src + when (length rs > 0) $ addIns $ SUB (xref $ XReg 8 RSP) (xref $ XImm (fromIntegral $ 8 * length rs)) + addIns $ CALL n + when (length rs > 0) $ addIns $ ADD (xref $ XReg 8 RSP) (xref $ XImm (fromIntegral $ 8 * length rs)) +codegenIns m (ICallr d n rs) = do + codegenIns m (ICall n rs) + addIns $ mkmov (mkxref d m) (XReg (fromIntegral $ refSize d) RAX) +codegenIns m fullins@(IResize d s) = do + let dsz = fromIntegral $ refSize d + ssz = fromIntegral $ refSize s + dm = mkxref d m + sm = mkxref s m + when (X64.isXImm sm) $ + throwError $ "Resized value is an immediate in " ++ show fullins ++ + "; (dm = " ++ show dm ++ "; sm = " ++ show sm ++ ")" + case compare dsz ssz of + EQ -> codegenIns m (IMov d s) + GT -> if X64.isXMem dm + then do + addIns $ MOVSX (xref $ XReg dsz RAX) (xref sm) + addIns $ mkmov dm (XReg dsz RAX) + else do + addIns $ MOVSX (xref dm) (xref sm) + LT -> if X64.isXMem dm && X64.isXMem sm + then do + addIns $ mkmov (XReg dsz RAX) (X64.xrefSetSize dsz sm) + addIns $ mkmov dm (XReg dsz RAX) + else do + addIns $ mkmov dm (X64.xrefSetSize dsz sm) +codegenIns _ INop = return () + +arithTypeToCondCode :: ArithType -> Maybe X64.CondCode +arithTypeToCondCode AEq = Just CCE +arithTypeToCondCode ANeq = Just CCNE +arithTypeToCondCode AGt = Just CCG +arithTypeToCondCode ALt = Just CCL +arithTypeToCondCode AGeq = Just CCGE +arithTypeToCondCode ALeq = Just CCLE +arithTypeToCondCode _ = Nothing + +cmpTypeToCondCode :: CmpType -> X64.CondCode +cmpTypeToCondCode CEq = CCE +cmpTypeToCondCode CNeq = CCNE +cmpTypeToCondCode CGt = CCG +cmpTypeToCondCode CLt = CCL +cmpTypeToCondCode CGeq = CCGE +cmpTypeToCondCode CLeq = CCLE + +arithTypeToIns :: ArithType -> Maybe (XRef -> XRef -> X64.Ins) +arithTypeToIns AAdd = Just $ \a b -> ADD (xref a) (xref b) +arithTypeToIns ASub = Just $ \a b -> SUB (xref a) (xref b) +arithTypeToIns AAnd = Just $ \a b -> AND (xref a) (xref b) +arithTypeToIns AOr = Just $ \a b -> OR (xref a) (xref b) +arithTypeToIns AXor = Just $ \a b -> XOR (xref a) (xref b) +arithTypeToIns _ = Nothing + +codegenTerm :: AllocMap -> IRTerm -> CGMonad () +codegenTerm m (IJcc ct a b t e) = do + addIns $ mkcmp (mkxref a m) (mkxref b m) + addIns $ JCC (cmpTypeToCondCode ct) (".bb" ++ show t) + addIns $ JMP (".bb" ++ show e) +codegenTerm _ (IJmp i) = addIns $ JMP (".bb" ++ show i) +codegenTerm _ IRet = do + spillsz <- gets spillSize + when (spillsz /= 0) $ addIns $ ADD (xref $ XReg 8 RSP) (xref $ XImm $ fromIntegral spillsz) + usedregs <- gets regsToRestore + forM_ (reverse usedregs) $ \reg -> addIns $ POP (xref $ XReg 8 reg) + addIns $ mkmov (XReg 8 RSP) (XReg 8 RBP) + addIns $ POP (xref $ XReg 8 RBP) + addIns RET +codegenTerm m (IRetr r) = do + addIns $ mkmov (XReg (fromIntegral $ refSize r) RAX) (mkxref r m) + spillsz <- gets spillSize + when (spillsz /= 0) $ addIns $ ADD (xref $ XReg 8 RSP) (xref $ XImm $ fromIntegral spillsz) + usedregs <- gets regsToRestore + forM_ (reverse usedregs) $ \reg -> addIns $ POP (xref $ XReg 8 reg) + addIns $ mkmov (XReg 8 RSP) (XReg 8 RBP) + addIns $ POP (xref $ XReg 8 RBP) + addIns RET +codegenTerm _ ITermNone = undefined + + +collectTempRefs :: [BB] -> [([[LA.Access Ref]], [Int])] +collectTempRefs bbs = + flip map bbs $ \(BB _ inss term) -> + let refs = map (filter (isTemp . LA.unAccess)) $ concatMap listRefsIns inss ++ listRefsTerm term + nexts = map (\i -> fromJust $ findIndex (\(BB j _ _) -> j == i) bbs) $ listNextIds term + in (refs, nexts) + where + listRefsIns :: IRIns -> [[LA.Access Ref]] + listRefsIns (IMov a b) = [[LA.Read b], [LA.Write a]] + listRefsIns (IStore a b) = [[LA.Read a, LA.Read b]] + listRefsIns (ILoad a b) = [[LA.Read b], [LA.Write a]] + listRefsIns (IAri _ a b) = [[LA.Write a, LA.Read b]] + listRefsIns (ICall _ l) = [map LA.Read l] + listRefsIns (ICallr a _ l) = [LA.Write a : map LA.Read l] + listRefsIns (IResize a b) = [[LA.Read b], [LA.Write a]] + listRefsIns INop = [[]] + + listRefsTerm :: IRTerm -> [[LA.Access Ref]] + listRefsTerm (IJcc _ a b _ _) = [[LA.Read a, LA.Read b]] + listRefsTerm (IJmp _) = [[]] + listRefsTerm IRet = [[]] + listRefsTerm (IRetr a) = [[LA.Read a]] + listRefsTerm ITermNone = undefined + + listNextIds :: IRTerm -> [Id] + listNextIds (IJcc _ _ _ a b) = [a, b] + listNextIds (IJmp a) = [a] + listNextIds IRet = [] + listNextIds (IRetr _) = [] + listNextIds ITermNone = undefined + + isTemp :: Ref -> Bool + isTemp (Temp _ _) = True + isTemp _ = False diff --git a/Defs.hs b/Defs.hs new file mode 100644 index 0000000..4203a82 --- /dev/null +++ b/Defs.hs @@ -0,0 +1,10 @@ +module Defs where + +import Data.Int + + +type Name = String +type Id = Int +type Size = Integer +type Value = Int64 +type Error = Either String diff --git a/Intermediate.hs b/Intermediate.hs new file mode 100644 index 0000000..5f3a9f2 --- /dev/null +++ b/Intermediate.hs @@ -0,0 +1,174 @@ +module Intermediate where + +import Data.Bits +import Data.List + +import AST +import Defs +import Pretty + + +data BB = BB Id [IRIns] IRTerm + deriving (Show, Eq) + +data Ref = Temp Size Int | Argument Size Name | Global Size Name | Constant Size Value + deriving (Show, Eq, Ord) + +data IRProgram = IRProgram [DVar] [IRFunc] + deriving (Show, Eq) + +data IRFunc = IRFunc (Maybe Type) Name [(Type, Name)] [BB] Id + deriving (Show, Eq) + +data IRIns + = IMov Ref Ref + | IStore Ref Ref + | ILoad Ref Ref + | IAri ArithType Ref Ref + | ICall Name [Ref] + | ICallr Ref Name [Ref] + | IResize Ref Ref + | INop + deriving (Show, Eq) + +data IRTerm + = IJcc CmpType Ref Ref Id Id -- Id Id == if-yes if-no + | IJmp Id + | IRet + | IRetr Ref + | ITermNone + deriving (Show, Eq) + +data ArithType + = AAdd | ASub | AMul | ADiv | AMod + | AAnd | AOr | AXor + | AEq | ANeq | AGt | ALt | AGeq | ALeq + deriving (Show, Eq) + +data CmpType + = CEq | CNeq | CGt | CLt | CGeq | CLeq + deriving (Show, Eq) + + +refSize :: Ref -> Size +refSize (Temp sz _) = sz +refSize (Argument sz _) = sz +refSize (Global sz _) = sz +refSize (Constant sz _) = sz + + +instance Pretty BB where + prettyI i (BB bid inss term) = + "{{{(" ++ show bid ++ ")\n" ++ indent (i+1) ++ + intercalate ("\n" ++ indent (i+1)) (map pretty inss) ++ + (if null inss then "" else "\n" ++ indent (i+1)) ++ + pretty term ++ + "\n" ++ indent i ++ "}}}" + where + indent n = replicate (2*n) ' ' + +instance Pretty Ref where + prettyI _ (Temp sz k) = "t" ++ show k ++ pretty_sizeSuffix sz + prettyI _ (Argument sz n) = "a" ++ n ++ pretty_sizeSuffix sz + prettyI _ (Global sz n) = "g" ++ n ++ pretty_sizeSuffix sz + prettyI _ (Constant sz n) = show n ++ pretty_sizeSuffix sz + +pretty_sizeSuffix :: Size -> String +pretty_sizeSuffix 1 = "B" +pretty_sizeSuffix 2 = "W" +pretty_sizeSuffix 4 = "D" +pretty_sizeSuffix 8 = "Q" +pretty_sizeSuffix sz = "<" ++ show sz ++ ">" + +instance Pretty IRProgram where + prettyI i (IRProgram vars funcs) = + intercalate ("\n" ++ indent i) (map (prettyI i) vars) ++ + "\n" ++ indent i ++ + intercalate ("\n" ++ indent i) (map (prettyI (i+1)) funcs) ++ + "\n" + where + indent n = replicate (2*n) ' ' + +instance Pretty IRFunc where + prettyI i (IRFunc mt n al bbs sid) = + "irfunc" ++ maybe "" ((' ' :) . prettyI i) mt ++ " " ++ n ++ "(" ++ + intercalate "," + (map (\(at,an) -> prettyI i at ++ " " ++ an) al) ++ + ")\n" ++ indent i ++ + intercalate ("\n" ++ indent i) (map (prettyI i) sorted) + where + indent n' = replicate (2*n') ' ' + + sorted = uncurry (++) $ partition (\(BB bid _ _) -> bid == sid) bbs + +instance Pretty IRIns where + prettyI _ (IMov d s) = "mov " ++ pretty d ++ " <- " ++ pretty s + prettyI _ (IStore d s) = "store *" ++ pretty d ++ " <- " ++ pretty s + prettyI _ (ILoad d s) = "load " ++ pretty d ++ " <- *" ++ pretty s + prettyI _ (IAri at d s) = + pretty at ++ " " ++ pretty d ++ ", " ++ pretty s + prettyI _ (ICall n al) = + "call " ++ n ++ " (" ++ intercalate ", " (map pretty al) ++ ")" + prettyI _ (ICallr d n al) = + "call " ++ pretty d ++ " <- " ++ n ++ " (" ++ intercalate ", " (map pretty al) ++ ")" + prettyI _ (IResize d s) = "resize " ++ pretty d ++ " <- " ++ pretty s + prettyI _ INop = "nop" + +instance Pretty IRTerm where + prettyI _ (IJcc ct s1 s2 did1 did2) = + pretty ct ++ " " ++ pretty s1 ++ ", " ++ pretty s2 ++ " -> " ++ show did1 ++ " | " ++ show did2 + prettyI _ (IJmp did) = "jmp " ++ show did + prettyI _ IRet = "ret" + prettyI _ (IRetr ref) = "retr " ++ pretty ref + prettyI _ ITermNone = "?NONE?" + +instance Pretty ArithType where + prettyI _ AAdd = "add" + prettyI _ ASub = "sub" + prettyI _ AMul = "mul" + prettyI _ ADiv = "div" + prettyI _ AMod = "mod" + prettyI _ AAnd = "and" + prettyI _ AOr = "or" + prettyI _ AXor = "xor" + prettyI _ AEq = "eq" + prettyI _ ANeq = "neq" + prettyI _ AGt = "gt" + prettyI _ ALt = "lt" + prettyI _ AGeq = "geq" + prettyI _ ALeq = "leq" + +instance Pretty CmpType where + prettyI _ CEq = "jeq" + prettyI _ CNeq = "jne" + prettyI _ CGt = "jg" + prettyI _ CLt = "jl" + prettyI _ CGeq = "jge" + prettyI _ CLeq = "jle" + + +evaluateArith :: ArithType -> Value -> Value -> Value +evaluateArith at a b = case at of + AAdd -> a + b + ASub -> a - b + AMul -> a * b + ADiv -> if b == 0 then error "Division by zero detected" else a `div` b + AMod -> if b == 0 then error "Modulo by zero detected" else a `mod` b + AAnd -> a .&. b + AOr -> a .|. b + AXor -> a `xor` b + AEq -> if a == b then 1 else 0 + ANeq -> if a /= b then 1 else 0 + AGt -> if a > b then 1 else 0 + ALt -> if a < b then 1 else 0 + AGeq -> if a >= b then 1 else 0 + ALeq -> if a <= b then 1 else 0 + +evaluateCmp :: CmpType -> Value -> Value -> Bool +evaluateCmp ct a b = case ct of + CEq -> a == b + CNeq -> a /= b + CGt -> a > b + CLt -> a < b + CGeq -> a >= b + CLeq -> a <= b diff --git a/LifetimeAnalysis.hs b/LifetimeAnalysis.hs new file mode 100644 index 0000000..a590862 --- /dev/null +++ b/LifetimeAnalysis.hs @@ -0,0 +1,70 @@ +module LifetimeAnalysis(lifetimeAnalysis, Access(..), unAccess) where + +import Data.List +import Data.Maybe + + +data Access a = Write a | Read a + deriving (Show, Eq) + +unAccess :: Access a -> a +unAccess (Write x) = x +unAccess (Read x) = x + +type BB a = ([[Access a]], [Int]) + +lifetimeAnalysis :: Eq a => a -> [BB a] -> [[Bool]] +lifetimeAnalysis target bbs = + foldOr2 $ map (\p -> markAliveFrom bbs target p (emptyMark bbs)) occurrences + where + occurrences :: [(Int, Int)] + occurrences = do + (i, bb) <- zip [0..] bbs + (j, ins) <- zip [0..] (fst bb) + if ins `contains` Write target || ins `contains` Read target + then [(i, j)] + else [] + +emptyMark :: [BB a] -> [[Bool]] +emptyMark bbs = flip map bbs $ \(inss, _) -> map (const False) inss + +-- Assumes `target` is known to be alive at `pos`. +markAliveFrom :: Eq a => [BB a] -> a -> (Int, Int) -> [[Bool]] -> [[Bool]] +markAliveFrom bbs target pos topmark = setAt2 (maybe topmark id $ goNoCheck pos topmark) pos True + where + goNoCheck :: (Int, Int) -> [[Bool]] -> Maybe [[Bool]] + goNoCheck (i, j) mark = + let suc = flip filter (successors bbs (i, j)) $ \(i', j') -> + not $ mark !! i' !! j' + markset = setAt2 mark (i, j) True + in case catMaybes [go s markset | s <- suc] of + [] -> Nothing + l -> Just $ foldOr2 l + + go :: (Int, Int) -> [[Bool]] -> Maybe [[Bool]] + go (i, j) mark + | fst (bbs !! i) !! j `contains` Write target = Nothing + | fst (bbs !! i) !! j `contains` Read target = Just $ setAt2 mark (i, j) True + | otherwise = goNoCheck (i, j) mark + +successors :: [BB a] -> (Int, Int) -> [(Int, Int)] +successors bbs (i, j) = + let (inss, nexts) = bbs !! i + in if j < length inss - 1 + then [(i, j + 1)] + else [(n, 0) | n <- nexts] + +contains :: Eq a => [a] -> a -> Bool +contains l v = isJust $ find (== v) l + +modifyAt2 :: [[a]] -> (Int, Int) -> (a -> a) -> [[a]] +modifyAt2 l (i, j) f = modifyAt l i $ \li -> modifyAt li j f + +modifyAt :: [a] -> Int -> (a -> a) -> [a] +modifyAt l i f = let (pre, v : post) = splitAt i l in pre ++ f v : post + +setAt2 :: [[a]] -> (Int, Int) -> a -> [[a]] +setAt2 l p v = modifyAt2 l p (const v) + +foldOr2 :: [[[Bool]]] -> [[Bool]] +foldOr2 = foldl1 (\m1 m2 -> map (map (uncurry (||)) . uncurry zip) (zip m1 m2)) diff --git a/Main.hs b/Main.hs new file mode 100644 index 0000000..f1307c7 --- /dev/null +++ b/Main.hs @@ -0,0 +1,56 @@ +module Main where + +import System.Exit +import System.IO +import System.Process +import Debug.Trace + +import BuildIR +import CodeGen +import Defs +import Optimiser +import Pretty +import ProgramParser +import TypeCheck +import Verify + + +infix 2 +() :: (a -> Error b) -> String -> a -> Error b +f pre = \a -> case f a of + Left e -> Left $ pre ++ ": " ++ e + Right x -> Right x + + +tracePrettyId :: Pretty a => a -> a +tracePrettyId x = trace (pretty x) x + +eitherToIO :: Either String a -> IO a +eitherToIO = either die return + + +main :: IO () +main = do + source <- getContents + + let eres = return source + >>= parseProgram "Parse error" + -- >>= return . traceShowId + >>= typeCheck "Type error" + >>= buildIR "IR building error" + -- >>= return . tracePrettyId + >>= optimise "Error while optimising" + >>= verify "Verify error" + >>= return . tracePrettyId + >>= codegen "Codegen error" + + asm <- eitherToIO eres + -- hPutStr stderr asm + + writeFile "z_output.asm" asm + + hPutStrLn stderr "Assembling with yasm..." + callCommand "yasm -w+all -fmacho64 z_output.asm -o z_output.o" + + hPutStrLn stderr "Linking with ld..." + callCommand "ld z_output.o -o z_output" diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..3ce6d7e --- /dev/null +++ b/Makefile @@ -0,0 +1,26 @@ +RUNFLAGS = +GHCFLAGS = -Wall -Widentities -odir obj -hidir obj +ifneq ($(PROFILE),) + RUNFLAGS += +RTS -xc + GHCFLAGS += -prof -fprof-auto +else + GHCFLAGS += -O3 +endif + +TARGET = main + +.PHONY: all clean run + +all: $(TARGET) + +clean: + rm -f $(TARGET) + rm -rf obj + +run: $(TARGET) + ./$(TARGET) $(RUNFLAGS) + + +$(TARGET): $(wildcard *.hs) + @mkdir -p obj + ghc $(GHCFLAGS) Main.hs -o $@ diff --git a/Optimiser.hs b/Optimiser.hs new file mode 100644 index 0000000..6e6227c --- /dev/null +++ b/Optimiser.hs @@ -0,0 +1,252 @@ +module Optimiser(optimise) where + +import Data.List +import Data.Maybe +import qualified Data.Map.Strict as Map +import Debug.Trace + +import Defs +import Intermediate +import ReplaceRefs +import Utils + + +type Optimisation = IRProgram -> IRProgram +type FuncOptimisation = IRFunc -> IRFunc + +optimise :: IRProgram -> Error IRProgram +optimise prog = + let master = foldl1 (.) (reverse optimisations) {-. trace "-- OPT PASS --"-} + reslist = iterate master prog + pairs = zip reslist (tail reslist) + in Right $ fst $ fromJust $ find (uncurry (==)) pairs + where + optimisations = map funcopt $ + -- [chainJumps, removeUnusedBlocks] + [chainJumps, mergeTerminators, looseJumps, removeUnusedBlocks, identityOps, + constantPropagate, removeNops, movPush, evaluateInstructions, evaluateTerminators] + + +funcopt :: FuncOptimisation -> Optimisation +funcopt fo (IRProgram vars funcs) = IRProgram vars (map fo funcs) + + +chainJumps :: FuncOptimisation +chainJumps (IRFunc rt name al bbs sid) = IRFunc rt name al bbs' sid + where + bbs' = snd $ last $ takeWhile fst $ iterate (mergeChain . snd) (True, bbs) + + mergeChain :: [BB] -> (Bool, [BB]) + mergeChain [] = (False, []) + mergeChain bbs2 = case findIndex isSuitable bbs2 of + Nothing -> (False, bbs2) + Just idx -> + let (BB bid1 inss1 (IJmp target), rest) = + (bbs2 !! idx, take idx bbs2 ++ drop (idx+1) bbs2) + [BB _ inss2 term2] = filter (\(BB bid _ _) -> bid == target) rest + merged = BB bid1 (inss1 ++ inss2) term2 + in (True, merged : rest) + where + hasJmpTo :: Id -> BB -> Bool + hasJmpTo i (BB _ _ (IJmp i')) = i == i' + hasJmpTo _ _ = False + + isSuitable :: BB -> Bool + isSuitable (BB _ _ (IJmp target)) = sum (map (fromEnum . hasJmpTo target) bbs2) == 1 + isSuitable _ = False + +mergeTerminators :: FuncOptimisation +mergeTerminators (IRFunc rt name al bbs sid) = IRFunc rt name al bbs' sid + where + bbs' = flip map bbs $ \bb@(BB bid inss term) -> case term of + IJmp i -> case find ((== i) . fst) singles of + Just (_, t) -> BB bid inss t + Nothing -> bb + _ -> bb + + singles = map (\(BB i _ t) -> (i, t)) $ filter (\(BB _ inss _) -> null inss) bbs + +looseJumps :: FuncOptimisation +looseJumps (IRFunc rt name al bbs sid) = IRFunc rt name al bbs' sid + where + bbs' = flip map bbs $ \bb@(BB bid inss term) -> case term of + IJmp i -> BB bid inss (IJmp (translate i)) + IJcc ct r1 r2 i j -> BB bid inss (IJcc ct r1 r2 (translate i) (translate j)) + _ -> bb + + translate i = fromMaybe i $ Map.lookup i transmap + + transmap = Map.fromList $ catMaybes $ flip map bbs $ \bb -> case bb of + BB bid [] (IJmp i) -> Just (bid, i) + _ -> Nothing + +removeUnusedBlocks :: FuncOptimisation +removeUnusedBlocks (IRFunc rt name al bbs sid) = IRFunc rt name al bbs' sid + where + bbs' = filter isReachable bbs + + isReachable :: BB -> Bool + isReachable (BB bid _ _) + | bid == sid = True + | otherwise = isJust $ flip find bbs $ \(BB _ _ term) -> case term of + IJcc _ _ _ i1 i2 -> i1 == bid || i2 == bid + IJmp i -> i == bid + _ -> False + +identityOps :: FuncOptimisation +identityOps (IRFunc rt name al bbs sid) = IRFunc rt name al (map go bbs) sid + where + go :: BB -> BB + go (BB bid inss term) = BB bid (catMaybes $ map goI inss) term + + goI :: IRIns -> Maybe IRIns + goI (IAri AAdd _ (Constant _ 0)) = Nothing + goI (IAri ASub _ (Constant _ 0)) = Nothing + goI (IAri AMul _ (Constant _ 1)) = Nothing + goI (IAri ADiv _ (Constant _ 1)) = Nothing + goI i = Just i + +constantPropagate :: FuncOptimisation +constantPropagate (IRFunc rt name al bbs sid) = IRFunc rt name al bbs' sid + where + alltemps = findAllTemps' bbs + consttemps = catMaybes $ flip map alltemps $ \ref -> + let locs = findMutations' bbs ref + loc = head locs + ins = insAt bbs loc + + isIMov (IMov _ _) = True + isIMov _ = False + in {-trace ("Muts of " ++ show ref ++ ": " ++ show locs ++ ": " ++ + show (map (insAt bbs) locs)) $-} + if length locs == 1 && isIMov ins + then Just (loc, ins) + else Nothing + + bbs' = case consttemps of + [] -> bbs + ((loc, IMov ref value) : _) -> + replaceRefsBBList ref value (nopifyInsAt bbs loc) + _ -> undefined + +removeNops :: FuncOptimisation +removeNops (IRFunc rt name al bbs sid) = + IRFunc rt name al (map go bbs) sid + where + go (BB bid inss term) = BB bid (filter (not . isNop) inss) term + isNop INop = True + isNop _ = False + +movPush :: FuncOptimisation +movPush (IRFunc rt name al bbs sid) = IRFunc rt name al bbs' sid + where + bbs' = map goBB bbs + + goBB :: BB -> BB + goBB (BB bid inss term) = BB bid (go inss) term + + go :: [IRIns] -> [IRIns] + go [] = [] + go (ins@(IMov d _) : rest) | isJust (find (== d) (findAllRefsInss rest)) = push ins rest + go (ins : rest) = ins : go rest + + push :: IRIns -> [IRIns] -> [IRIns] + push mov [] = [mov] + push mov@(IMov d s) (ins@(IMov d' s') : rest) + | d' == d = if s' == d then push mov rest else push ins rest + | otherwise = replaceRefsIns d s ins : push mov rest + push mov@(IMov d s) (ins@(IResize d' s') : rest) + | d' == d = if s' == d then push mov rest else push ins rest + | otherwise = replaceRefsIns d s ins : push mov rest + push mov@(IMov d s) (ins@(ILoad d' _) : rest) + | d' == d = mov : ins : go rest + | otherwise = replaceRefsIns d s ins : push mov rest + push mov@(IMov d s) (ins@(IAri at d' s') : rest) + | d' == d = case (s, s') of + (Constant sza a, Constant szb b) + | sza == szb -> push (IMov d (Constant sza $ evaluateArith at a b)) rest + | otherwise -> error $ "Inconsistent sizes in " ++ show mov ++ "; " ++ show ins + _ -> mov : ins : go rest + | otherwise = replaceRefsIns d s ins : push mov rest + push mov@(IMov d s) (ins@(ICallr d' _ _) : rest) + | d' == d = mov : ins : go rest + | otherwise = replaceRefsIns d s ins : push mov rest + push mov@(IMov d s) (ins@(IStore _ _) : rest) = replaceRefsIns d s ins : push mov rest + push mov@(IMov d s) (ins@(ICall _ _) : rest) = replaceRefsIns d s ins : push mov rest + push mov (ins@INop : rest) = ins : push mov rest + push _ _ = undefined + +evaluateInstructions :: FuncOptimisation +evaluateInstructions (IRFunc rt name al bbs sid) = IRFunc rt name al (map goBB bbs) sid + where + goBB :: BB -> BB + goBB (BB bid inss term) = BB bid (map goI inss) term + + goI :: IRIns -> IRIns + goI (IResize ref (Constant _ v)) = IMov ref $ Constant (refSize ref) $ truncValue (refSize ref) v + goI ins = ins + + truncValue :: Size -> Value -> Value + truncValue sz v = fromIntegral $ (fromIntegral v :: Integer) `mod` (2 ^ (8 * sz)) + +evaluateTerminators :: FuncOptimisation +evaluateTerminators (IRFunc rt name al bbs sid) = IRFunc rt name al bbs' sid + where + bbs' = map (\(BB bid inss term) -> BB bid inss (go term)) bbs + + go :: IRTerm -> IRTerm + go term@(IJcc ct (Constant sza a) (Constant szb b) i1 i2) + | sza /= szb = error $ "Inconsistent sizes in " ++ show term + | evaluateCmp ct a b = IJmp i1 + | otherwise = IJmp i2 + go term = term + + +insAt :: [BB] -> (Int, Int) -> IRIns +insAt bbs (i, j) = + let (BB _ inss _) = bbs !! i + in inss !! j + +nopifyInsAt :: [BB] -> (Int, Int) -> [BB] +nopifyInsAt bbs (i, j) = + let (pre, BB bid inss term : post) = splitAt i bbs + (ipre, _ : ipost) = splitAt j inss + in pre ++ BB bid (ipre ++ INop : ipost) term : post + +findMutations :: BB -> Ref -> [Int] +findMutations (BB _ inss _) ref = + catMaybes $ flip map (zip inss [0..]) $ \(ins, idx) -> case ins of + (IMov r _) | r == ref -> Just idx + (IAri _ r _) | r == ref -> Just idx + (ICallr r _ _) | r == ref -> Just idx + _ -> Nothing + +findMutations' :: [BB] -> Ref -> [(Int, Int)] +findMutations' bbs ref = + [(i, j) | (bb, i) <- zip bbs [0..], j <- findMutations bb ref] + +findAllRefs :: BB -> [Ref] +findAllRefs (BB _ inss _) = findAllRefsInss inss + +findAllRefsInss :: [IRIns] -> [Ref] +findAllRefsInss inss = uniq $ sort $ concatMap go inss + where + go (IMov a b) = [a, b] + go (IStore a b) = [a, b] + go (ILoad a b) = [a, b] + go (IAri _ a b) = [a, b] + go (ICall _ al) = al + go (ICallr a _ al) = a : al + go (IResize a b) = [a, b] + go INop = [] + +-- findAllRefs' :: [BB] -> [Ref] +-- findAllRefs' = uniq . sort . concatMap findAllRefs + +findAllTemps :: BB -> [Ref] +findAllTemps bb = flip filter (findAllRefs bb) $ \ref -> case ref of + (Temp _ _) -> True + _ -> False + +findAllTemps' :: [BB] -> [Ref] +findAllTemps' = concatMap findAllTemps diff --git a/Pretty.hs b/Pretty.hs new file mode 100644 index 0000000..f2f36d8 --- /dev/null +++ b/Pretty.hs @@ -0,0 +1,14 @@ +module Pretty where + +import Data.List + + +class Pretty a where + pretty :: a -> String + pretty = prettyI 0 + + prettyI :: Int -> a -> String + + +instance Pretty a => Pretty [a] where + prettyI i l = "[" ++ intercalate ", " (map (prettyI i) l) ++ "]" diff --git a/ProgramParser.hs b/ProgramParser.hs new file mode 100644 index 0000000..2cacaf5 --- /dev/null +++ b/ProgramParser.hs @@ -0,0 +1,269 @@ +module ProgramParser(parseProgram) where + +import Control.Monad +import Data.Char +import Text.Parsec +import qualified Text.Parsec.Expr as E + +import AST +import Defs + + +type Parser = Parsec String () + + +parseProgram :: String -> Either String Program +parseProgram s = case parse pProgram "" s of + Left err -> Left $ show err + Right p -> Right p + +pProgram :: Parser Program +pProgram = do + pWhiteComment + decls <- many pDecl + eof + return $ Program (lefts decls) (rights decls) + +pDecl :: Parser (Either DVar DFunc) +pDecl = (Right <$> pDFunc) <|> (Left <$> pDVar) + +pDFunc :: Parser DFunc +pDFunc = do + symbol "func" + rt <- (Just <$> pType) <|> return Nothing + n <- pName + symbol "(" + args <- sepBy pTypeAndName (symbol ",") + symbol ")" + body <- pBlock + return $ DFunc rt n args body + +pDVar :: Parser DVar +pDVar = do + t <- pType + n <- pName + symbol ":=" + e <- pExpression + symbol ";" + return $ DVar t n e + +pTypeAndName :: Parser (Type, Name) +pTypeAndName = (,) <$> pType <*> pName + +pType :: Parser Type +pType = do + t <- pBasicType + (do + symbol "[" + msz <- optionMaybe pInteger + symbol "]" + return $ TArr t msz) <|> return t + +pBasicType :: Parser Type +pBasicType = (symbol "int" >> return TInt) <|> (symbol "char" >> return TChar) + +pBlock :: Parser Block +pBlock = do + symbol "{" + body <- many pStatement + symbol "}" + return $ Block body + +pStatement :: Parser Statement +pStatement = pSIf <|> pSWhile <|> pSReturn <|> pSDecl <|> pSAs <|> pSExpr + +pSDecl :: Parser Statement +pSDecl = do + (t, n) <- try $ do + t <- pType + n <- pName + symbol ":=" + return (t, n) + e <- pExpression + symbol ";" + return $ SDecl t n e + +pSAs :: Parser Statement +pSAs = do + n <- try $ pAsExpression <* symbol "=" + e <- pExpression + symbol ";" + return $ SAs n e + +pSIf :: Parser Statement +pSIf = do + symbol "if" + symbol "(" + cond <- pExpression + symbol ")" + bl1 <- pBlock + bl2 <- try (symbol "else" >> pBlock) <|> return (Block []) + return $ SIf cond bl1 bl2 + +pSWhile :: Parser Statement +pSWhile = do + symbol "while" + symbol "(" + cond <- pExpression + symbol ")" + bl <- pBlock + return $ SWhile cond bl + +pSReturn :: Parser Statement +pSReturn = do + symbol "return" + SReturn <$> ((symbol ";" >> return Nothing) <|> + ((Just <$> pExpression) <* symbol ";")) + +pSExpr :: Parser Statement +pSExpr = do + e <- pExpression + symbol ";" + return $ SExpr e + +pExpression :: Parser Expression +pExpression = E.buildExpressionParser optable litparser + where + optable = + [[E.Infix (symbol "**" >> return (mkEBin BOPow)) E.AssocRight], + [E.Infix (symbol "*" >> return (mkEBin BOMul)) E.AssocLeft, + E.Infix (symbol "/" >> return (mkEBin BODiv)) E.AssocLeft, + E.Infix (symbol "%" >> return (mkEBin BOMod)) E.AssocLeft], + [E.Infix (symbol "+" >> return (mkEBin BOAdd)) E.AssocLeft, + E.Infix (symbol "-" >> return (mkEBin BOSub)) E.AssocLeft], + [E.Infix (symbol ">=" >> return (mkEBin BOGeq)) E.AssocNone, + E.Infix (symbol "<=" >> return (mkEBin BOLeq)) E.AssocNone, + E.Infix (symbol ">" >> return (mkEBin BOGt)) E.AssocNone, + E.Infix (symbol "<" >> return (mkEBin BOLt)) E.AssocNone, + E.Infix (symbol "==" >> return (mkEBin BOEq)) E.AssocNone, + E.Infix (symbol "!=" >> return (mkEBin BONeq)) E.AssocNone], + [E.Infix (symbol "&&" >> return (mkEBin BOAnd)) E.AssocLeft], + [E.Infix (symbol "||" >> return (mkEBin BOOr)) E.AssocLeft]] + + mkEBin :: BinaryOp -> Expression -> Expression -> Expression + mkEBin bo a b = EBin bo a b Nothing + + mkELit :: Literal -> Expression + mkELit l = ELit l Nothing + + litparser :: Parser Expression + litparser = do + pops <- many pPrefixOp + e <- pParenExpr <|> pENew <|> pCastExpr <|> (mkELit <$> pLiteral) + subs <- many $ between (symbol "[") (symbol "]") pExpression + let e' = foldl (\ex sub -> ESubscript ex sub Nothing) e subs + e'' = foldl (\ex pop -> EUn pop ex Nothing) e' pops + return e'' + +pAsExpression :: Parser AsExpression +pAsExpression = do + n <- pName + subs <- many $ between (symbol "[") (symbol "]") pExpression + return $ foldl (\ae expr -> AESubscript ae expr Nothing) (AEVar n Nothing) subs + +pPrefixOp :: Parser UnaryOp +pPrefixOp = (symbol "!" >> return UONot) <|> + (symbol "-" >> return UONeg) + +pParenExpr :: Parser Expression +pParenExpr = do + symbol "(" + e <- pExpression + symbol ")" + return e + +pCastExpr :: Parser Expression +pCastExpr = do + t <- try $ pType <* symbol "(" + e <- pExpression + symbol ")" + return $ ECast t e + +pENew :: Parser Expression +pENew = do + symbol "new" + t <- pBasicType + symbol "[" + e <- pExpression + symbol "]" + return $ ENew t e + +pLiteral :: Parser Literal +pLiteral = (LInt <$> pInteger) <|> (LChar <$> pCharLit) <|> pLCall <|> (LVar <$> pName) + +pCharLit :: Parser Char +pCharLit = do + void $ char '\'' + c <- pStringChar + void $ char '\'' + pWhiteComment + return c + +pStringChar :: Parser Char +pStringChar = + (char '\\' >> ((char 'n' >> return '\n') <|> + (char 'r' >> return '\r') <|> + (char 't' >> return '\t') <|> + (char '0' >> return '\0') <|> + (char 'x' >> pHexDigit >>= \a -> pHexDigit >>= \b -> return (chr $ 16 * a + b)))) <|> + anyToken + where + pHexDigit :: Parser Int + pHexDigit = (subtract 48 . fromEnum <$> digit) + <|> ((+ (10 - 97)) . ord <$> oneOf "abcdef") + <|> ((+ (10 - 65)) . ord <$> oneOf "ABCDEF") + +pLCall :: Parser Literal +pLCall = do + n <- try $ pName <* symbol "(" + al <- sepBy pExpression (symbol ",") + symbol ")" + return $ LCall n al + + +pName :: Parser Name +pName = do + c0 <- satisfy (\c -> isAlpha c || c == '_') + cr <- many $ satisfy (\c -> isAlpha c || isDigit c || c == '_') + pWhiteComment + return $ c0 : cr + +pInteger :: Parser Integer +pInteger = read <$> many1 (satisfy isDigit) <* pWhiteComment + + +symbol :: String -> Parser () +symbol "" = error "symbol \"\"" +symbol s = try $ do + void $ string s + when (isAlpha (last s)) $ void $ notFollowedBy (satisfy isAlpha) + when (isDigit (last s)) $ void $ notFollowedBy (satisfy isDigit) + pWhiteComment + +pWhiteComment :: Parser () +pWhiteComment = void $ pWhite >> endBy pComment pWhite + +pWhite :: Parser () +pWhite = void $ many (oneOf " \t\n") + +pComment :: Parser () +pComment = pLineComment <|> pBlockComment + +pLineComment :: Parser () +pLineComment = do + void $ try $ string "//" + void $ many (satisfy (/= '\n')) + eof <|> void (char '\n') + +pBlockComment :: Parser () +pBlockComment = do + void $ try $ string "/*" + void $ sepEndBy (manyTill anyToken (lookAhead $ try (string "/*") <|> try (string "*/"))) + pBlockComment + void $ string "*/" + +lefts :: [Either a b] -> [a] +lefts = foldr (\e l -> either (:l) (const l) e) [] + +rights :: [Either a b] -> [b] +rights = foldr (\e l -> either (const l) (:l) e) [] diff --git a/RegAlloc.hs b/RegAlloc.hs new file mode 100644 index 0000000..d2b1717 --- /dev/null +++ b/RegAlloc.hs @@ -0,0 +1,87 @@ +module RegAlloc(regalloc, Allocation(..)) where + +import Data.Function +import Data.List +import Data.Maybe +import qualified Data.Map.Strict as Map +import Debug.Trace + +import Utils + + +-- Follows the Linear Scan Register Allocation algorithm specified in: +-- https://www.cs.purdue.edu/homes/suresh/502-Fall2008/papers/linear-scan.pdf + +-- Test case: +-- regalloc [((1, 3), "a"), ((2, 2), "x"), ((2, 10), "b"), ((3, 7), "c"), ((4, 5), "d")] ["ra", "rb"] + + +data Allocation a = AllocReg a | AllocMem + deriving Show + +type Interval = (Int, Int) + +data State a = State {stActive :: [Int], stAlloc :: [Allocation a], stFreeRegs :: [a]} + deriving Show + +regalloc :: (Show a, Show b, Ord a, Ord b) + => [(Interval, b)] -- [(live interval, name of variable)] + -> [a] -- the available registers + -> [(b, b)] -- pairs to be allocated to the same register if possible + -> Map.Map b (Allocation a) -- allocation map +regalloc vars' regs aliaspairs = + let foldfunc = \st' ((int, name), index) -> + let st = expireOldIntervals st' int + wanted = findWantedAliases aliaspairs name + wantedregs = uniq $ sort $ catMaybes $ flip map wanted $ \n -> + case findIndex (== n) intnames of + Nothing -> Nothing + Just idx | idx >= length (stAlloc st) -> Nothing + | otherwise -> case stAlloc st !! idx of + AllocMem -> Nothing + AllocReg r -> Just r + in if length (stActive st) == length regs + then spillAtInterval st index + else let -- ([regchoice], fr) = splitAt 1 (stFreeRegs st) + (regchoice, fr) = case find (`elem` wantedregs) (stFreeRegs st) of + Nothing -> (head (stFreeRegs st), tail (stFreeRegs st)) + Just wr -> trace ("Pair-allocated " ++ show name ++ " in " ++ show wr) $ + (wr, stFreeRegs st \\ [wr]) + allocrev = stAlloc st ++ [AllocReg regchoice] + active = sortBy (compare `on` snd . (ints !!)) $ index : stActive st + in State active allocrev fr + in Map.fromList $ zip intnames $ stAlloc $ + foldl foldfunc (State [] [] regs) (zip vars [0..]) + where + vars = sortBy (compare `on` fst . fst) vars' + (ints, intnames) = (map fst vars, map snd vars) + + expireOldIntervals :: State a -> Interval -> State a + expireOldIntervals st (intstart, _) = + let (dropped, active) = span ((< intstart) . snd . (ints !!)) (stActive st) + fr = selectAllocRegs (map (stAlloc st !!) dropped) ++ stFreeRegs st + in State active (stAlloc st) fr + + spillAtInterval :: State a -> Int -> State a + spillAtInterval st index = + let spill = last (stActive st) + in if snd (ints !! spill) > snd (ints !! index) + then let alloc = setAt spill AllocMem (stAlloc st) ++ [stAlloc st !! spill] + active = sortBy (compare `on` snd . (ints !!)) $ + index : (stActive st \\ [spill]) + in State active alloc (stFreeRegs st) + else State (stActive st) (stAlloc st ++ [AllocMem]) (stFreeRegs st) + + findWantedAliases :: (Ord b) => [(b, b)] -> b -> [b] + findWantedAliases pairs x = + uniq $ sort $ map snd (filter ((== x) . fst) pairs) ++ map fst (filter ((== x) . snd) pairs) + +selectAllocRegs :: [Allocation a] -> [a] +selectAllocRegs allocs = catMaybes $ flip map allocs $ \alloc -> case alloc of + (AllocReg r) -> Just r + AllocMem -> Nothing + +setAt :: Int -> a -> [a] -> [a] +setAt idx v l = + let (pre, _ : post) = splitAt idx l + in pre ++ v : post diff --git a/ReplaceRefs.hs b/ReplaceRefs.hs new file mode 100644 index 0000000..3ab73c3 --- /dev/null +++ b/ReplaceRefs.hs @@ -0,0 +1,36 @@ +module ReplaceRefs + (replaceRefsIns, replaceRefsTerm, replaceRefsBB, replaceRefsBBList) + where + +import Intermediate + + +replaceRefsIns :: Ref -> Ref -> IRIns -> IRIns +replaceRefsIns from to (IMov a b) = IMov (trans from to a) (trans from to b) +replaceRefsIns from to (IStore a b) = IStore (trans from to a) (trans from to b) +replaceRefsIns from to (ILoad a b) = ILoad (trans from to a) (trans from to b) +replaceRefsIns from to (IAri at a b) = IAri at (trans from to a) (trans from to b) +replaceRefsIns from to (ICall n al) = ICall n (map (trans from to) al) +replaceRefsIns from to (ICallr a n al) = ICallr (trans from to a) n (map (trans from to) al) +replaceRefsIns from to (IResize a b) = IResize (trans from to a) (trans from to b) +replaceRefsIns _ _ INop = INop + +replaceRefsTerm :: Ref -> Ref -> IRTerm -> IRTerm +replaceRefsTerm from to (IJcc ct a b i1 i2) = IJcc ct (trans from to a) (trans from to b) i1 i2 +replaceRefsTerm _ _ (IJmp i) = IJmp i +replaceRefsTerm _ _ IRet = IRet +replaceRefsTerm from to (IRetr a) = IRetr (trans from to a) +replaceRefsTerm _ _ ITermNone = ITermNone + +replaceRefsBB :: Ref -> Ref -> BB -> BB +replaceRefsBB from to (BB bid inss term) = + BB bid (map (replaceRefsIns from to) inss) (replaceRefsTerm from to term) + +replaceRefsBBList :: Ref -> Ref -> [BB] -> [BB] +replaceRefsBBList from to bbs = map (\bb -> replaceRefsBB from to bb) bbs + + +trans :: Ref -> Ref -> Ref -> Ref +trans from to ref + | ref == from = to + | otherwise = ref diff --git a/TypeCheck.hs b/TypeCheck.hs new file mode 100644 index 0000000..36d98c8 --- /dev/null +++ b/TypeCheck.hs @@ -0,0 +1,234 @@ +module TypeCheck(typeCheck) where + +import Control.Monad +import Data.Maybe +import qualified Data.Map.Strict as Map + +import AST +import Defs +import Pretty +import TypeRules + + +data DBItem = DBVar Type | DBFunc (Maybe Type) [Type] + +type TypeDB = [Map.Map Name DBItem] + +dbFind :: TypeDB -> Name -> Maybe DBItem +dbFind db name = findJust $ map (Map.lookup name) db + where findJust [] = Nothing + findJust (Just x:_) = Just x + findJust (Nothing:l) = findJust l + +dbFindTop :: TypeDB -> Name -> Maybe DBItem +dbFindTop [] _ = error "dbFindTop on empty scope stack" +dbFindTop (m:_) name = Map.lookup name m + +dbSet :: TypeDB -> Name -> DBItem -> TypeDB +dbSet [] _ _ = error "dbSet on empty scope stack" +dbSet (m:ms) name val = Map.insert name val m : ms + +emptyDB :: TypeDB +emptyDB = [Map.fromList + [("putc", DBFunc Nothing [TChar]), + ("putint", DBFunc Nothing [TInt]), + ("getc", DBFunc (Just TInt) []), + ("_builtin_malloc", DBFunc (Just $ TArr TChar Nothing) [TInt])]] + +withScope :: TypeDB -> (TypeDB -> a) -> a +withScope db f = f (Map.empty : db) + + +typeCheck :: Program -> Error Program +typeCheck (Program vars funcs) = do + db <- foldM registerDVar emptyDB vars + >>= \db' -> foldM registerDFunc db' funcs + vars' <- mapM (annotateDVar db) vars + funcs' <- mapM (annotateDFunc db) funcs + return $ Program vars' funcs' + + +registerDVar :: TypeDB -> DVar -> Error TypeDB +registerDVar db (DVar t n _) = case dbFind db n of + Nothing -> return $ dbSet db n (DBVar t) + Just _ -> Left $ "Duplicate top-level name '" ++ n ++ "'" + +registerDFunc :: TypeDB -> DFunc -> Error TypeDB +registerDFunc db (DFunc rt n al _) = case dbFind db n of + Nothing -> return $ dbSet db n (DBFunc rt (map fst al)) + Just _ -> Left $ "Duplicate top-level name '" ++ n ++ "'" + + +annotateDVar :: TypeDB -> DVar -> Error DVar +annotateDVar db (DVar toptype name expr) = do + expr' <- annotateExpr db expr + when (isNothing (typeof expr')) $ + Left $ "Cannot assign void value in global declaration of " ++ name + let typ = fromJust $ typeof expr' + if canCoerce typ toptype + then return $ DVar toptype name expr' + else Left $ "Cannot assign a value of type " ++ pretty typ ++ + " to a variable of type " ++ pretty toptype + +annotateDFunc :: TypeDB -> DFunc -> Error DFunc +annotateDFunc db dfunc@(DFunc rettype name arglist block) = do + when (name == "main" && rettype /= Just TInt) $ + Left $ "Function 'main' should return an int" + let db' = foldl registerArg db arglist + block' <- annotateBlock dfunc db' block + return $ DFunc rettype name arglist block' + where + registerArg :: TypeDB -> (Type, Name) -> TypeDB + registerArg db' (t, n) = dbSet db' n (DBVar t) + +annotateBlock :: DFunc -> TypeDB -> Block -> Error Block +annotateBlock dfunc db (Block sts) = + Block . snd <$> foldM (\(db', l) st -> + (\(db'', st') -> (db'', l ++ [st'])) <$> annotateStatement dfunc db' st) + (db, []) sts + +annotateStatement :: DFunc -> TypeDB -> Statement -> Error (TypeDB, Statement) +annotateStatement _ db (SDecl toptype name expr) = do + expr' <- annotateExpr db expr + when (isNothing (typeof expr')) $ + Left $ "Cannot assign void value in declaration of " ++ name + when (isJust (dbFindTop db name)) $ + Left $ "Duplicate declaration of variable " ++ name + let typ = fromJust $ typeof expr' + if canCoerce typ toptype + then return $ (dbSet db name (DBVar toptype), SDecl toptype name expr') + else Left $ "Cannot assign a value of type " ++ pretty typ ++ + " to a variable of type " ++ pretty toptype +annotateStatement _ db (SAs ae expr) = do + ae' <- annotateAsExpr db ae + expr' <- annotateExpr db expr + when (isNothing (typeof expr')) $ + Left $ "Cannot assign a void value in assignment of " ++ pretty ae + let typ = fromJust $ typeof expr' + let aetyp = fromJust $ typeof ae' + when (not $ isBasicType aetyp) $ + Left $ "Cannot assign to a location of type " ++ pretty aetyp + when (not $ canCoerce typ aetyp) $ + Left $ "Cannot assign a value of type " ++ pretty typ ++ + " to a location of type " ++ pretty aetyp + return (db, SAs ae' expr') +annotateStatement dfunc db (SIf expr bl1 bl2) = do + expr' <- annotateExpr db expr + when (isNothing (typeof expr')) $ + Left $ "Cannot use void value in 'if' condition" + when (not $ canCoerce (fromJust (typeof expr')) TInt) $ + Left $ "Cannot use type " ++ pretty (fromJust (typeof expr')) ++ " in 'if' condition" + bl1' <- withScope db $ flip (annotateBlock dfunc) bl1 + bl2' <- withScope db $ flip (annotateBlock dfunc) bl2 + return (db, SIf expr' bl1' bl2') +annotateStatement dfunc db (SWhile expr bl) = do + expr' <- annotateExpr db expr + when (isNothing (typeof expr')) $ + Left $ "Cannot use void value in 'while' condition" + when (not $ canCoerce (fromJust (typeof expr')) TInt) $ + Left $ "Cannot use type " ++ pretty (fromJust (typeof expr')) ++ " in 'while' condition" + bl' <- withScope db $ flip (annotateBlock dfunc) bl + return (db, SWhile expr' bl') +annotateStatement (DFunc Nothing _ _ _) db (SReturn Nothing) = + return (db, SReturn Nothing) +annotateStatement (DFunc (Just _) _ _ _) _ (SReturn Nothing) = + Left "Cannot return void value from non-void function" +annotateStatement (DFunc mrt _ _ _) db (SReturn (Just expr)) = do + expr' <- annotateExpr db expr + case mrt of + Nothing -> Left $ "Cannot return non-void value from void function" + Just rt -> do + when (isNothing (typeof expr')) $ + Left $ "Cannot use void value in 'return'" + when (not $ canCoerce (fromJust (typeof expr')) rt) $ + Left $ "Cannot coerce type " ++ pretty (fromJust (typeof expr')) ++ " to " ++ pretty rt ++ + " in 'return'" + return (db, SReturn (Just expr')) +annotateStatement _ db (SExpr expr) = (\expr' -> (db, SExpr expr')) <$> annotateExpr db expr + +annotateExpr :: TypeDB -> Expression -> Error Expression +annotateExpr db (EBin bo e1 e2 _) = do + e1' <- annotateExpr db e1 + e2' <- annotateExpr db e2 + when (isNothing (typeof e1')) $ Left $ "Use of void value in expression: " ++ show e1' + when (isNothing (typeof e2')) $ Left $ "Use of void value in expression: " ++ show e2' + let t1 = fromJust $ typeof e1' + t2 = fromJust $ typeof e2' + rt <- let errval = Left $ "Operator " ++ pretty bo ++ " doesn't take" ++ + " arguments of types " ++ pretty t1 ++ " and " ++ + pretty t2 + in maybe errval return $ retTypeBO bo t1 t2 + return $ EBin bo e1' e2' (Just rt) +annotateExpr db (EUn uo e _) = do + e' <- annotateExpr db e + when (isNothing (typeof e')) $ Left "Use of void value in expression" + let t = fromJust $ typeof e' + rt <- let errval = Left $ "Unary operator " ++ pretty uo ++ " doesn't take" ++ + " an argument of type " ++ pretty t + in maybe errval return $ retTypeUO uo t + return $ EUn uo e' (Just rt) +annotateExpr _ (ELit lit@(LInt _) _) = return $ ELit lit (Just TInt) +annotateExpr _ (ELit lit@(LChar _) _) = return $ ELit lit (Just TChar) +annotateExpr db (ELit lit@(LVar n) _) = case dbFind db n of + Nothing -> Left $ "Use of undeclared variable " ++ n ++ " in expression" + Just (DBVar t) -> return $ ELit lit (Just t) + Just (DBFunc _ _) -> Left $ "Cannot use function " ++ n ++ " in expression" +annotateExpr db (ELit (LCall n as) _) = do + as' <- mapM (annotateExpr db) as + case dbFind db n of + Nothing -> Left $ "Use of undeclared function " ++ n + Just (DBVar t) -> Left $ "Cannot call variable of type " ++ pretty t + Just (DBFunc mrt ats) -> do + when (length as' /= length ats) $ + Left $ "Function '" ++ n ++ "' expected " ++ show (length ats) ++ + " arguments but got " ++ show (length as') + forM_ (zip as' ats) $ \(arg, at) -> do + when (isNothing (typeof arg)) $ + Left "Use of void value in function argument" + if canCoerce (fromJust $ typeof arg) at + then return () + else Left $ "Argument of " ++ n ++ " has type " ++ pretty at ++ + " but value of type " ++ pretty (fromJust $ typeof arg) ++ + " was given" + return $ ELit (LCall n as') mrt +annotateExpr db (ESubscript arr sub _) = do + arr' <- annotateExpr db arr + sub' <- annotateExpr db sub + let subtyp = fromJust (typeof sub') + when (subtyp /= TInt) $ + Left $ "Type of array subscript should be int, but is " ++ pretty subtyp + case fromJust (typeof arr') of + TArr et _ -> return $ ESubscript arr' sub' (Just et) + _ -> Left $ "Subscripted expression is not an array: " ++ pretty arr +annotateExpr db (ECast t e) = do + e' <- annotateExpr db e + let typ = fromJust (typeof e') + if canCast typ t + then return $ ECast t e' + else Left $ "Cannot cast value of type " ++ pretty typ ++ " to type " ++ pretty t +annotateExpr db e@(ENew t sze) = do + sze' <- annotateExpr db sze + case typeof sze' of + Nothing -> Left $ "Use of void value in array length in 'new' expression: " ++ pretty e + Just TInt -> return () + Just szet -> Left $ "Type of array length in 'new' expression should be int, is " ++ pretty szet + if isBasicType t + then return $ ENew t sze' + else Left $ "Can only allocate arrays of basic types using 'new': " ++ pretty e + +annotateAsExpr :: TypeDB -> AsExpression -> Error AsExpression +annotateAsExpr db (AEVar n _) = case dbFind db n of + Nothing -> Left $ "Use of undeclared variable " ++ n ++ " in assignment expression" + Just (DBVar t) -> return $ AEVar n (Just t) + Just (DBFunc _ _) -> Left $ "Cannot use function " ++ n ++ " in assignment expression" +annotateAsExpr db (AESubscript ae expr _) = do + ae' <- annotateAsExpr db ae + expr' <- annotateExpr db expr + case typeof expr' of + Nothing -> Left $ "Use of void value in array index in assignment expression" + Just TInt -> return () + Just t -> Left $ "Use of non-int type " ++ pretty t ++ " in array index in assignment expression" + case fromJust (typeof ae') of + TArr t _ -> return $ AESubscript ae' expr' (Just t) + t -> Left $ "Indexed expression '" ++ pretty ae ++ "' has non-array type " ++ pretty t ++ + " in assignment expression" diff --git a/TypeRules.hs b/TypeRules.hs new file mode 100644 index 0000000..a3e7678 --- /dev/null +++ b/TypeRules.hs @@ -0,0 +1,61 @@ +module TypeRules where + +import AST + + +canCoerce :: Type -> Type -> Bool +canCoerce t1 t2 | t1 == t2 = True +canCoerce (TArr t1 (Just _)) (TArr t2 Nothing) | t1 == t2 = True +canCoerce _ _ = False + +canCast :: Type -> Type -> Bool +canCast t1 t2 | canCoerce t1 t2 = True +canCast t1 t2 | isIntegralType t1 && isIntegralType t2 = True +canCast _ _ = False + +isBasicType :: Type -> Bool +isBasicType = isIntegralType + +isIntegralType :: Type -> Bool +isIntegralType TInt = True +isIntegralType TChar = True +isIntegralType _ = False + +isSimpleArithBO :: BinaryOp -> Bool +isSimpleArithBO = flip elem [BOAdd, BOSub, BOMul, BODiv, BOMod] + +isBoolBO :: BinaryOp -> Bool +isBoolBO = flip elem [BOAnd, BOOr] + +isCompareBO :: BinaryOp -> Bool +isCompareBO = flip elem [BOEq, BONeq, BOGt, BOLt, BOGeq, BOLeq] + +retTypeBO :: BinaryOp -> Type -> Type -> Maybe Type +retTypeBO bo TInt TInt | isSimpleArithBO bo = Just TInt +retTypeBO bo TChar TChar | isSimpleArithBO bo = Just TChar +retTypeBO bo TInt TInt | isBoolBO bo = Just TInt +retTypeBO bo TInt TInt | isCompareBO bo = Just TInt +retTypeBO bo TChar TChar | isCompareBO bo = Just TInt +retTypeBO _ _ _ = Nothing +-- retTypeBO bo t1 t2 = error $ "retTypeBO " ++ show bo ++ " " ++ show t1 ++ " " ++ show t2 + +retTypeUO :: UnaryOp -> Type -> Maybe Type +retTypeUO UONot TInt = Just TInt +retTypeUO UONeg TInt = Just TInt +retTypeUO UONeg TChar = Just TChar +retTypeUO _ _ = Nothing + +class TypeOf a where + typeof :: a -> Maybe Type + +instance TypeOf Expression where + typeof (EBin _ _ _ mt) = mt + typeof (EUn _ _ mt) = mt + typeof (ELit _ mt) = mt + typeof (ESubscript _ _ mt) = mt + typeof (ECast t _) = Just t + typeof (ENew t _) = Just $ TArr t Nothing + +instance TypeOf AsExpression where + typeof (AEVar _ mt) = mt + typeof (AESubscript _ _ mt) = mt diff --git a/Utils.hs b/Utils.hs new file mode 100644 index 0000000..65bb651 --- /dev/null +++ b/Utils.hs @@ -0,0 +1,7 @@ +module Utils where + + +uniq :: Eq a => [a] -> [a] +uniq (a:b:cs) | a == b = uniq (b:cs) + | otherwise = a : uniq (b:cs) +uniq l = l diff --git a/Verify.hs b/Verify.hs new file mode 100644 index 0000000..18f90ce --- /dev/null +++ b/Verify.hs @@ -0,0 +1,28 @@ +module Verify(verify) where + +import AST +import Defs +import Intermediate + + +verify :: IRProgram -> Error IRProgram +verify (IRProgram vars funcs) = IRProgram <$> mapM verifyVar vars <*> mapM verifyFunc funcs + +verifyVar :: DVar -> Error DVar +verifyVar dvar@(DVar _ n e) = case e of + ELit _ _ -> return dvar + _ -> Left $ "Initialisation of global variable " ++ n ++ " is not a literal" + +verifyFunc :: IRFunc -> Error IRFunc +verifyFunc irfunc@(IRFunc mrt name _ bbs _) = + let terms = map (\(BB _ _ term) -> term) bbs + (nret', nretr') = unzip $ flip map terms $ \term -> case term of + IRet -> (1, 0) + IRetr _ -> (0, 1) + _ -> (0, 0) + (nret, nretr) = (sum nret', sum nretr') :: (Int, Int) + in case (mrt, nret, nretr) of + (Nothing, _, 0) -> return irfunc + (Just _, 0, _) -> return irfunc + (Nothing, _, _) -> Left $ "Some code paths of void function '" ++ name ++ "' return a value" + (Just _, _, _) -> Left $ "Not all code paths of non-void function '" ++ name ++ "' return a value" diff --git a/X64.hs b/X64.hs new file mode 100644 index 0000000..a2d63aa --- /dev/null +++ b/X64.hs @@ -0,0 +1,275 @@ +module X64 where + +import Control.Monad +import Data.Char +import Data.Int +import Data.List +import Data.Maybe + + +type Offset = Int64 + +class Stringifiable a where + stringify :: a -> String + +data Register = RAX | RBX | RCX | RDX | RSI | RDI | RSP | RBP | R8 | R9 | R10 | R11 | R12 | R13 | R14 | R15 + deriving (Show, Eq, Ord, Enum, Bounded) + +data XRef = XReg Int Register | XMem Int (Maybe Register) (Int, Register) (Maybe String) Offset | XImm Offset + deriving (Show, Eq) + +newtype Reg = Reg XRef deriving (Show, Eq) +newtype Mem = Mem XRef deriving (Show, Eq) +newtype Imm = Imm XRef deriving (Show, Eq) +newtype RegMem = RegMem XRef deriving (Show, Eq) +newtype RegMemImm = RegMemImm XRef deriving (Show, Eq) + +data CondCode = CCA | CCAE | CCB | CCBE | CCC | CCE | CCG | CCGE | CCL | CCLE | CCNA | CCNAE | CCNB | CCNBE | CCNC | CCNE | CCNG | CCNGE | CCNL | CCNLE + deriving (Show, Eq) + +data Ins + = MOV RegMem RegMem | MOVi RegMem Imm | MOVi64 Reg Imm + | MOVSX Reg RegMem + | ADD RegMem RegMemImm + | SUB RegMem RegMemImm + | AND RegMem RegMemImm + | OR RegMem RegMemImm + | XOR RegMem RegMemImm + | IMULDA RegMem | IMUL Reg RegMem | IMUL3 Reg RegMem Imm + | MULDA RegMem + | IDIVDA RegMem + | DIVDA RegMem + | CMP RegMem RegMem | CMPi RegMem Imm + | SETCC CondCode RegMem + | CALL String + | PUSH RegMemImm + | POP RegMem + | JMP String + | JCC CondCode String + | RET + deriving (Show, Eq) + +data Asm = Asm [(String, [Ins])] + deriving (Show, Eq) + + +class XRefSub a where + xref :: XRef -> a + +instance XRefSub Reg where + xref x@(XReg _ _) = Reg x + xref _ = undefined +instance XRefSub Mem where + xref x@(XMem _ _ _ _ _) = Mem x + xref _ = undefined +instance XRefSub Imm where + xref x@(XImm _) = Imm x + xref _ = undefined +instance XRefSub RegMem where + xref x@(XReg _ _) = RegMem x + xref x@(XMem _ _ _ _ _) = RegMem x + xref _ = undefined +instance XRefSub RegMemImm where + xref x = RegMemImm x + + +verify :: Asm -> Either String () +verify (Asm funcs) = mapM_ (\(_, inss) -> mapM_ goI inss) funcs + where + goI :: Ins -> Either String () + goI (MOV (RegMem a) (RegMem b)) = ckRegMem a >> ckRegMem b >> ck2mem a b >> ckSizes a b + goI (MOVi (RegMem a) (Imm b)) = ckRegMem a >> ckImm b >> ckSizes a b + goI (MOVi64 (Reg a) (Imm b)) = ckReg a >> ckImm b >> ckSizes64 a b + goI (MOVSX (Reg a) (RegMem b)) = ckReg a >> ckRegMem b >> ckMovsx a b + goI (ADD (RegMem a) (RegMemImm b)) = ckRegMem a >> ckRegMemImm b >> ck2mem a b >> ckSizes a b + goI (SUB (RegMem a) (RegMemImm b)) = ckRegMem a >> ckRegMemImm b >> ck2mem a b >> ckSizes a b + goI (AND (RegMem a) (RegMemImm b)) = ckRegMem a >> ckRegMemImm b >> ck2mem a b >> ckSizes a b + goI (OR (RegMem a) (RegMemImm b)) = ckRegMem a >> ckRegMemImm b >> ck2mem a b >> ckSizes a b + goI (XOR (RegMem a) (RegMemImm b)) = ckRegMem a >> ckRegMemImm b >> ck2mem a b >> ckSizes a b + goI (IMULDA (RegMem a)) = ckRegMem a + goI (IMUL (Reg a) (RegMem b)) = ckReg a >> ckRegMem b >> ck2mem a b >> ckSizes a b + goI (IMUL3 (Reg a) (RegMem b) (Imm c)) = ckReg a >> ckRegMem b >> ckImm c >> ckSizes a b >> ckSizes a c + goI (MULDA (RegMem a)) = ckRegMem a + goI (IDIVDA (RegMem a)) = ckRegMem a + goI (DIVDA (RegMem a)) = ckRegMem a + goI (CMP (RegMem a) (RegMem b)) = ckRegMem a >> ckRegMem b >> ck2mem a b >> ckSizes a b + goI (CMPi (RegMem a) (Imm b)) = ckRegMem a >> ckImm b >> ckSizes a b + goI (SETCC _ (RegMem a)) = ckRegMem a >> ckSizeEq 1 a + goI (CALL s) = when (null s) $ Left "Empty call target" + goI (PUSH (RegMemImm a)) = ckRegMemImm a + goI (POP (RegMem a)) = ckRegMem a + goI (JMP s) = when (null s) $ Left "Empty jump target" + goI (JCC _ s) = when (null s) $ Left "Empty jcc target" + goI RET = return () + + ckReg (XReg _ _) = return () + ckReg _ = Left "Argument is not a Reg" + + ckImm (XImm _) = return () + ckImm _ = Left "Argument is not an Imm" + + ckRegMem (XReg _ _) = return () + ckRegMem (XMem _ _ _ _ _) = return () + ckRegMem _ = Left "Argument is not a Reg or a Mem" + + ckRegMemImm _ = return () + + ck2mem x@(XMem _ _ _ _ _) y@(XMem _ _ _ _ _) = + Left $ "Invalid double-memory operands: " ++ show x ++ "; " ++ show y + ck2mem _ _ = return () + + ckSizes64 x@(XReg a _) y@(XReg b _) = + when (a /= b) $ Left $ "Inconsistent operand sizes: " ++ show x ++ "; " ++ show y + ckSizes64 x@(XReg a _) y@(XMem b _ _ _ _) = + when (a /= b) $ Left $ "Inconsistent operand sizes: " ++ show x ++ "; " ++ show y + ckSizes64 x@(XMem a _ _ _ _) y@(XReg b _) = + when (a /= b) $ Left $ "Inconsistent operand sizes: " ++ show x ++ "; " ++ show y + ckSizes64 x@(XMem a _ _ _ _) y@(XMem b _ _ _ _) = + when (a /= b) $ Left $ "Inconsistent operand sizes: " ++ show x ++ "; " ++ show y + ckSizes64 x@(XReg a _) y@(XImm v) = + when (not $ fitsIn v a) $ Left $ "Immediate too large: " ++ show x ++ "; " ++ show y + ckSizes64 x@(XMem a _ _ _ _) y@(XImm v) = + when (not $ fitsIn v a) $ Left $ "Immediate too large: " ++ show x ++ "; " ++ show y + ckSizes64 _ _ = undefined + + ckSizes a b@(XImm v) = when (v >= 2 ^ (32::Int)) (Left "Immediate too large") >> ckSizes64 a b + ckSizes a b = ckSizes64 a b + + ckMovsx (XReg a _) (XReg b _) = when (a <= b) $ Left "MOVSX with shrinking operands" + ckMovsx (XReg a _) (XMem b _ _ _ _) = when (a <= b) $ Left "MOVSX with shrinking operands" + ckMovsx (XMem a _ _ _ _) (XReg b _) = when (a <= b) $ Left "MOVSX with shrinking operands" + ckMovsx (XMem a _ _ _ _) (XMem b _ _ _ _) = when (a <= b) $ Left "MOVSX with shrinking operands" + ckMovsx _ _ = undefined + + ckSizeEq sz (XReg a _) = when (a /= sz) $ Left "Invalid operand size" + ckSizeEq sz (XMem a _ _ _ _) = when (a /= sz) $ Left "Invalid operand size" + ckSizeEq sz (XImm v) = when (v >= 2 ^ (8 * sz)) $ Left "Invalid operand size (immediate)" + + fitsIn v sz = (fromIntegral v :: Integer) < ((2 :: Integer) ^ (8 * sz)) + + +instance Stringifiable XRef where + stringify (XReg sz reg) = + let n = fromEnum reg + in case n of + _ | n < 4 -> erPrefix $ lxSuffix $ [chr (n + ord 'a')] + | n < 8 -> erPrefix $ lSuffix $ ["si", "di", "sp", "bp"] !! (n - 4) + | otherwise -> bwdSuffix $ 'r' : show n + where + erPrefix s = case sz of + 4 -> 'e' : s + 8 -> 'r' : s + _ -> s + lxSuffix s = case sz of + 1 -> s ++ "l" + _ -> s ++ "x" + lSuffix s = case sz of + 1 -> s ++ "l" + _ -> s + bwdSuffix s = case sz of + 1 -> s ++ "b" + 2 -> s ++ "w" + 4 -> s ++ "d" + _ -> s + + stringify (XMem _ _ (mult, _) _ _) | not (mult `elem` [0,1,2,4,8]) = + error $ "Register multiplier has invalid value " ++ show mult ++ " in XMem" + stringify (XMem sz mr pair lab off) = + let res = intercalate "+" $ catMaybes [goR1 mr, goPair pair, goLab lab, goOff off] + in szword sz ++ " " ++ if null res then "[0]" else "[" ++ res ++ "]" + where + szword 1 = "byte" + szword 2 = "word" + szword 4 = "dword" + szword 8 = "qword" + szword _ = undefined + goR1 Nothing = Nothing + goR1 (Just r) = Just $ stringify (XReg 8 r) + goPair (0, _) = Nothing + goPair (mult, r) = Just $ show mult ++ "*" ++ stringify (XReg 8 r) + goLab = id + goOff 0 = Nothing + goOff o = Just $ show o + + stringify (XImm imm) = show imm + +instance Stringifiable Reg where stringify (Reg x) = stringify x +instance Stringifiable Mem where stringify (Mem x) = stringify x +instance Stringifiable Imm where stringify (Imm x) = stringify x +instance Stringifiable RegMem where stringify (RegMem x) = stringify x +instance Stringifiable RegMemImm where stringify (RegMemImm x) = stringify x + +instance Stringifiable CondCode where + stringify CCA = "a" + stringify CCAE = "ae" + stringify CCB = "b" + stringify CCBE = "be" + stringify CCC = "c" + stringify CCE = "e" + stringify CCG = "g" + stringify CCGE = "ge" + stringify CCL = "l" + stringify CCLE = "le" + stringify CCNA = "na" + stringify CCNAE = "nae" + stringify CCNB = "nb" + stringify CCNBE = "nbe" + stringify CCNC = "nc" + stringify CCNE = "ne" + stringify CCNG = "ng" + stringify CCNGE = "nge" + stringify CCNL = "nl" + stringify CCNLE = "nle" + +instance Stringifiable Ins where + stringify (MOV a b) = "mov " ++ stringify a ++ ", " ++ stringify b + stringify (MOVi a b) = "mov " ++ stringify a ++ ", " ++ stringify b + stringify (MOVi64 a b) = "mov " ++ stringify a ++ ", " ++ stringify b + stringify (MOVSX a b@(RegMem bx)) = case compare (xrefGetSize bx) 4 of + EQ -> "movsxd " ++ stringify a ++ ", " ++ stringify b + LT -> "movsx " ++ stringify a ++ ", " ++ stringify b + GT -> undefined + stringify (ADD a b) = "add " ++ stringify a ++ ", " ++ stringify b + stringify (SUB a b) = "sub " ++ stringify a ++ ", " ++ stringify b + stringify (AND a b) = "and " ++ stringify a ++ ", " ++ stringify b + stringify (OR a b) = "or " ++ stringify a ++ ", " ++ stringify b + stringify (XOR a b) = "xor " ++ stringify a ++ ", " ++ stringify b + stringify (IMULDA a) = "imul " ++ stringify a + stringify (IMUL a b) = "imul " ++ stringify a ++ ", " ++ stringify b + stringify (IMUL3 a b c) = "imul " ++ stringify a ++ ", " ++ stringify b ++ ", " ++ stringify c + stringify (MULDA a) = "mul " ++ stringify a + stringify (IDIVDA a) = "idiv " ++ stringify a + stringify (DIVDA a) = "div " ++ stringify a + stringify (CMP a b) = "cmp " ++ stringify a ++ ", " ++ stringify b + stringify (CMPi a b) = "cmp " ++ stringify a ++ ", " ++ stringify b + stringify (SETCC cc a) = "set" ++ stringify cc ++ " " ++ stringify a + stringify (CALL a) = "call " ++ a + stringify (PUSH a) = "push " ++ stringify a + stringify (POP a) = "pop " ++ stringify a + stringify (JMP s) = "jmp " ++ s + stringify (JCC cc s) = "j" ++ stringify cc ++ " " ++ s + stringify RET = "ret" + +instance Stringifiable Asm where + stringify (Asm funcs) = intercalate "\n" $ map goF funcs + where + goF :: (String, [Ins]) -> String + goF (name, inss) = name ++ ":\n" ++ unlines (map (('\t' :) . stringify) inss) + +xrefGetSize :: XRef -> Int +xrefGetSize (XReg s _) = s +xrefGetSize (XMem s _ _ _ _) = s +xrefGetSize (XImm _) = undefined + +xrefSetSize :: Int -> XRef -> XRef +xrefSetSize sz (XReg _ r) = XReg sz r +xrefSetSize sz (XMem _ a b c d) = XMem sz a b c d +xrefSetSize _ x@(XImm _) = x + +isXMem :: XRef -> Bool +isXMem (XMem _ _ _ _ _) = True +isXMem _ = False + +isXImm :: XRef -> Bool +isXImm (XImm _) = True +isXImm _ = False diff --git a/arrays.lang b/arrays.lang new file mode 100644 index 0000000..8cab05f --- /dev/null +++ b/arrays.lang @@ -0,0 +1,7 @@ +func int main() { + int[] arr := new int[5]; + arr[0] = 97; + arr[1] = 98; + putc(char(arr[0])); + return 0; +} diff --git a/bf.lang b/bf.lang new file mode 100644 index 0000000..d9366fb --- /dev/null +++ b/bf.lang @@ -0,0 +1,105 @@ +func putstr(char[] str) { + int i := 0; + while (str[i] != '\0') { + putc(str[i]); + i = i + 1; + } +} + +func int strlen(char[] str) { + int i := 0; + char c := str[i]; + while (c != '\0') { + i = i + 1; + c = str[i]; + } + return i; +} + +func int[] makejumpmap(char[] src, int srclen) { + int[] jm := new int[srclen]; + int[] stack := new int[srclen]; + int stkp := 0; + int i := 0; + while (i < srclen) { + putint(i); putc(' '); putint(stkp); putc(' '); putc(src[i]); putc('\n'); + if (src[i] == '[') { + stack[stkp] = i; + stkp = stkp + 1; + } + if (src[i] == ']') { + stkp = stkp - 1; + int j := stack[stkp]; + jm[i] = j; + jm[j] = i; + } + i = i + 1; + } + /*i = 0; + while (i < srclen) { + putint(jm[i]); + putc(' '); + i = i + 1; + } + putc('\n');*/ + return jm; +} + +func interpret(char[] src, int srclen) { + int[] jm := makejumpmap(src, srclen); + char[] mem := new char[4088]; + int ip := 0; + int memp := 0; + while (src[ip] != '\0') { + // putint(ip); putc('\n'); + char c := src[ip]; + if (c == '+') {mem[memp] = mem[memp] + char(1);} + if (c == '-') {mem[memp] = mem[memp] - char(1);} + if (c == '>') {memp = memp + 1;} + if (c == '<') {memp = memp - 1;} + if (c == '.') {putc(mem[memp]);} + if (c == ',') {mem[memp] = char(getc());} + if (c == '[') { + if (mem[memp] == '\0') { + ip = jm[ip]; + } + } + if (c == ']') { + if (mem[memp] != '\0') { + ip = jm[ip]; + } + } + ip = ip + 1; + } + + memp = 0; + while (memp < 10) { + putint(int(mem[memp])); putc(' '); + memp = memp + 1; + } + putc('\n'); +} + +func int main() { + int bufsize := 4088; + char[] source := new char[4088]; + + int sourcelen := 0; + int done := 0; + while (done != 1) { + int c := getc(); + if (c < 0) { + done = 1; + } else { + source[sourcelen] = char(c); + sourcelen = sourcelen + 1; + if (sourcelen >= bufsize - 1) { + done = 1; + } + } + } + source[sourcelen] = '\0'; + + interpret(source, sourcelen); + return 0; +} diff --git a/chartest.lang b/chartest.lang new file mode 100644 index 0000000..208bcab --- /dev/null +++ b/chartest.lang @@ -0,0 +1,19 @@ +int glob := 42; + +func f(char x) { + putc(x); +} + +func char g() { + return '\n'; +} + +func int main() { + char c := 'a'; + f(c); + putc('\n'); + char d := g(); + c = d + d + d + d + d + d + d + d + 'a'; + putc(c); + return 0; +} diff --git a/fibo.lang b/fibo.lang new file mode 100644 index 0000000..e1a7a4a --- /dev/null +++ b/fibo.lang @@ -0,0 +1,31 @@ +func printnum(int n) { + if (n == 0) {putc('0'); return;} + if (n < 0) {putc('-'); n = -n;} + while (n > 0) { + putc('0' + char(n % 10)); + n = n / 10; + } +} + +func int fibo(int n) { + int a := 0; + int b := 1; + int i := 0; + while (i < n) { + int c := a + b; + a = b; + b = c; + i = i + 1; + } + return b; +} + +func int main() { + int i := 0; + while (i <= 20) { + printnum(fibo(i)); + putc('\n'); + i = i + 1; + } + return 0; +} diff --git a/graph.png b/graph.png new file mode 100644 index 0000000..0356b69 Binary files /dev/null and b/graph.png differ diff --git a/prologue.asm b/prologue.asm new file mode 100644 index 0000000..74d9de3 --- /dev/null +++ b/prologue.asm @@ -0,0 +1,134 @@ +; SYS_EXIT equ 0x2000001 ;code +; SYS_FORK equ 0x2000002 ;-- +; SYS_READ equ 0x2000003 ;fd, buf, len +; SYS_WRITE equ 0x2000004 ;fd, buf, len + +global start +default rel + +section .text +start: + call main + mov rdi, rax + mov eax, 0x2000001 + syscall + jmp $ + +putc: + push rax + push rdi + push rsi + push rdx + push rcx + push r11 + mov eax, 0x2000004 + mov edi, 1 + lea rsi, [rsp+56] + mov edx, edi + syscall + pop r11 + pop rcx + pop rdx + pop rsi + pop rdi + pop rax + ret + +putint: + push rdi + push rsi + push rax + push rbx + push rcx + push rdx + push r11 + mov rax, [rsp+64] + mov ebx, 18 + test rax, rax + jz .numzero + + mov ecx, 10 + +.strlp: + xor edx, edx + div rcx + add dl, '0' + dec rbx + mov [rsp+rbx], dl + test rax, rax + jnz .strlp + jmp .strdone + +.numzero: + dec rbx + mov byte [rsp+rbx], '0' + jmp .strdone + +.strdone: + mov eax, 0x2000004 + mov edi, 1 + lea rsi, [rsp+rbx] + mov edx, 18 + sub rdx, rbx + syscall + + pop r11 + pop rdx + pop rcx + pop rbx + pop rax + pop rsi + pop rdi + ret + +getc: + push rdi + push rsi + push rdx + push rcx + push r11 + mov eax, 0x2000003 + xor edi, edi + mov rsi, rsp + mov edx, 1 + syscall + cmp rax, 1 + jne .fail + mov rax, [rsp] +.finish: + pop r11 + pop rcx + pop rdx + pop rsi + pop rdi + ret +.fail: + mov rax, -1 + jmp .finish + +_builtin_malloc: + push rdi + push rsi + push rdx + push r8 + push r9 + push r10 + push r11 + push rcx + xor edi, edi + mov rsi, [rsp+72] + mov edx, 0x03 + mov r10d, 0x1001 + mov r8d, -1 + xor r9d, r9d + mov eax, 0x20000C5 + syscall + pop rcx + pop r11 + pop r10 + pop r9 + pop r8 + pop rdx + pop rsi + pop rdi + ret diff --git a/putint.lang b/putint.lang new file mode 100644 index 0000000..198b4ee --- /dev/null +++ b/putint.lang @@ -0,0 +1,4 @@ +func int main() { + putint(-12345678); putc('\n'); + return 0; +} diff --git a/putstr.lang b/putstr.lang new file mode 100644 index 0000000..b13e2c5 --- /dev/null +++ b/putstr.lang @@ -0,0 +1,43 @@ +func putstr(char[] str) { + int i := 0; + while (str[i] != '\0') { + putc(str[i]); + i = i + 1; + } +} + +func int main() {return 0;} + + + +/* +irfunc putstr(char[] str) + {{{(0) + mov t5Q <- 0Q + jmp 7 + }}} + {{{(7) + mov t15Q <- t5Q + add t15Q, 8Q + mov t13Q <- astrQ + add t13Q, t15Q + load t16B <- *t13Q + neq t16B, 0B + jne t16B, 0Q -> 9 | 6 + }}} + {{{(9) + mov t25Q <- t5Q + add t25Q, 8Q + mov t23Q <- astrQ + add t23Q, t25Q + load t26B <- *t23Q + call putc (t26B) + mov t30Q <- t5Q + add t30Q, 1Q + mov t5Q <- t30Q + jmp 7 + }}} + {{{(6) + ret + }}} +*/ diff --git a/test.lang b/test.lang new file mode 100644 index 0000000..f03b3e3 --- /dev/null +++ b/test.lang @@ -0,0 +1,20 @@ +int glob := 100; + +func f() { + glob = glob + 10; +} + +func g(int x , int y) { + glob = glob + x + 2 * y; +} + +func int main() { + int i := 0; /* block /* comments */ nest! */ + // int[4] arr := 2; + i = i + 1; + f(); + if (i > 0 && i < 10) { + int i := 20; + g(i , 2 * i); + } +} -- cgit v1.2.3-54-g00ecf