diff options
-rw-r--r-- | .gitignore | 4 | ||||
-rw-r--r-- | AST.hs | 173 | ||||
-rw-r--r-- | BuildIR.hs | 367 | ||||
-rw-r--r-- | CodeGen.hs | 388 | ||||
-rw-r--r-- | Defs.hs | 10 | ||||
-rw-r--r-- | Intermediate.hs | 174 | ||||
-rw-r--r-- | LifetimeAnalysis.hs | 70 | ||||
-rw-r--r-- | Main.hs | 56 | ||||
-rw-r--r-- | Makefile | 26 | ||||
-rw-r--r-- | Optimiser.hs | 252 | ||||
-rw-r--r-- | Pretty.hs | 14 | ||||
-rw-r--r-- | ProgramParser.hs | 269 | ||||
-rw-r--r-- | RegAlloc.hs | 87 | ||||
-rw-r--r-- | ReplaceRefs.hs | 36 | ||||
-rw-r--r-- | TypeCheck.hs | 234 | ||||
-rw-r--r-- | TypeRules.hs | 61 | ||||
-rw-r--r-- | Utils.hs | 7 | ||||
-rw-r--r-- | Verify.hs | 28 | ||||
-rw-r--r-- | X64.hs | 275 | ||||
-rw-r--r-- | arrays.lang | 7 | ||||
-rw-r--r-- | bf.lang | 105 | ||||
-rw-r--r-- | chartest.lang | 19 | ||||
-rw-r--r-- | fibo.lang | 31 | ||||
-rw-r--r-- | graph.png | bin | 0 -> 166622 bytes | |||
-rw-r--r-- | prologue.asm | 134 | ||||
-rw-r--r-- | putint.lang | 4 | ||||
-rw-r--r-- | putstr.lang | 43 | ||||
-rw-r--r-- | test.lang | 20 |
28 files changed, 2894 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f220a21 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +main +obj +obsolete +z_output* @@ -0,0 +1,173 @@ +module AST where + +import Data.List + +import Defs +import Pretty + + +data Program = Program [DVar] [DFunc] + deriving (Show, Eq) + +data DVar = DVar Type Name Expression + deriving (Show, Eq) + +data DFunc = DFunc (Maybe Type) Name [(Type, Name)] Block + deriving (Show, Eq) + +data Type + = TInt | TChar | TArr Type (Maybe Size) + deriving (Show, Eq) + +data Block = Block [Statement] + deriving (Show, Eq) + +data Statement + = SDecl Type Name Expression + | SAs AsExpression Expression + | SIf Expression Block Block + | SWhile Expression Block + | SReturn (Maybe Expression) + | SExpr Expression + deriving (Show, Eq) + +data AsExpression + = AEVar Name (Maybe Type) + | AESubscript AsExpression Expression (Maybe Type) + deriving (Show, Eq) + +data Expression + = EBin BinaryOp Expression Expression (Maybe Type) + | EUn UnaryOp Expression (Maybe Type) + | ELit Literal (Maybe Type) + | ESubscript Expression Expression (Maybe Type) + | ECast Type Expression + | ENew Type Expression + deriving (Show, Eq) + +data BinaryOp + = BOAdd | BOSub | BOMul | BODiv | BOMod | BOPow + | BOAnd | BOOr + | BOEq | BONeq | BOGt | BOLt | BOGeq | BOLeq + deriving (Show, Eq) + +data UnaryOp + = UONot | UONeg + deriving (Show, Eq) + +data Literal + = LInt Integer + | LChar Char + | LVar Name + | LCall Name [Expression] + deriving (Show, Eq) + + +sizeof :: Type -> Size +sizeof TInt = 8 +sizeof TChar = 1 +sizeof (TArr _ _) = 8 + + +instance Pretty Program where + prettyI i (Program vars funcs) = + concatMap (++ ("\n" ++ indent i)) $ + map (prettyI i) vars ++ map (prettyI i) funcs + where + indent n = replicate (2*n) ' ' + +instance Pretty DVar where + prettyI i (DVar t n e) = + prettyI i t ++ " " ++ n ++ " := " ++ prettyI i e ++ ";" + +instance Pretty DFunc where + prettyI i (DFunc mt n al b) = + "func" ++ maybe "" ((' ' :) . prettyI i) mt ++ " " ++ n ++ "(" ++ + intercalate "," + (map (\(at,an) -> prettyI i at ++ " " ++ an) al) ++ + ") " ++ prettyI i b + +instance Pretty Type where + prettyI _ TInt = "int" + prettyI _ TChar = "char" + prettyI _ (TArr t Nothing) = pretty t ++ "[]" + prettyI _ (TArr t (Just sz)) = pretty t ++ "[" ++ show sz ++ "]" + +instance Pretty Block where + prettyI _ (Block []) = "{}" + prettyI i (Block l) = + "{" ++ + concatMap (("\n" ++ indent (i+1)) ++) (map (prettyI (i+1)) l) ++ + "\n" ++ indent i ++ "}" + where + indent n = replicate (2*n) ' ' + +instance Pretty Statement where + prettyI i (SDecl t n e) = + prettyI i t ++ " " ++ n ++ " := " ++ prettyI i e ++ ";" + prettyI i (SAs target e) = + prettyI i target ++ " = " ++ prettyI i e ++ ";" + prettyI i (SIf c b1 b2) = + "if " ++ prettyI i c ++ " " ++ prettyI i b1 ++ " else " ++ prettyI i b2 + prettyI i (SWhile c b) = + "while " ++ prettyI i c ++ " " ++ prettyI i b + prettyI _ (SReturn Nothing) = + "return;" + prettyI i (SReturn (Just e)) = + "return " ++ prettyI i e ++ ";" + prettyI i (SExpr e) = prettyI i e ++ ";" + +instance Pretty Expression where + prettyI i (EBin bo a b (Just t)) = + "(" ++ prettyI i (EBin bo a b Nothing) ++ " :: " ++ prettyI i t ++ ")" + prettyI i (EBin bo a b Nothing) = + "(" ++ prettyI i a ++ ") " ++ prettyI i bo ++ + " (" ++ prettyI i b ++ ")" + prettyI i (EUn uo e (Just t)) = + "(" ++ prettyI i (EUn uo e Nothing) ++ " :: " ++ prettyI i t ++ ")" + prettyI i (EUn uo e Nothing) = + prettyI i uo ++ "(" ++ prettyI i e ++ ")" + prettyI i (ELit l (Just t)) = + "(" ++ prettyI i (ELit l Nothing) ++ " :: " ++ prettyI i t ++ ")" + prettyI i (ELit l Nothing) = prettyI i l + prettyI i (ESubscript a b (Just t)) = + "(" ++ prettyI i (ESubscript a b Nothing) ++ " :: " ++ prettyI i t ++ ")" + prettyI i (ESubscript a b Nothing) = "(" ++ prettyI i a ++ ")[" ++ prettyI i b ++ "]" + prettyI i (ECast t e) = prettyI i t ++ "(" ++ prettyI i e ++ ")" + prettyI i (ENew t e) = "new " ++ prettyI i t ++ "[" ++ prettyI i e ++ "]" + +instance Pretty AsExpression where + prettyI i (AEVar n (Just t)) = + "(" ++ prettyI i (AEVar n Nothing) ++ " :: " ++ prettyI i t ++ ")" + prettyI _ (AEVar n Nothing) = n + prettyI i (AESubscript ae e (Just t)) = + "(" ++ prettyI i (AESubscript ae e Nothing) ++ " :: " ++ prettyI i t ++ ")" + prettyI i (AESubscript ae e Nothing) = prettyI i ae ++ "[" ++ prettyI i e ++ "]" + +instance Pretty BinaryOp where + prettyI _ BOAdd = "+" + prettyI _ BOSub = "-" + prettyI _ BOMul = "*" + prettyI _ BODiv = "/" + prettyI _ BOPow = "**" + prettyI _ BOMod = "%" + prettyI _ BOAnd = "&&" + prettyI _ BOOr = "||" + prettyI _ BOEq = "==" + prettyI _ BONeq = "!=" + prettyI _ BOGt = ">" + prettyI _ BOLt = "<" + prettyI _ BOGeq = ">=" + prettyI _ BOLeq = "<=" + + +instance Pretty UnaryOp where + prettyI _ UONot = "!" + prettyI _ UONeg = "-" + +instance Pretty Literal where + prettyI _ (LInt n) = show n + prettyI _ (LChar c) = show c + prettyI _ (LVar n) = n + prettyI i (LCall n al) = + n ++ "(" ++ intercalate ", " (map (prettyI i) al) ++ ")" diff --git a/BuildIR.hs b/BuildIR.hs new file mode 100644 index 0000000..28cbf5e --- /dev/null +++ b/BuildIR.hs @@ -0,0 +1,367 @@ +{-# LANGUAGE DeriveFunctor, GeneralizedNewtypeDeriving #-} + +module BuildIR(buildIR) where + +import Control.Monad.Except +import Control.Monad.State.Strict +import Data.Char +import Data.List +import qualified Data.Map.Strict as Map +import Data.Maybe + +import AST +import Defs +import Intermediate +import Pretty +import TypeRules + + +type Scope = Map.Map Name (Ref, Type) + +data BuildState = BuildState + { nextId :: Id, + scopeStack :: [Scope], + currentBlock :: Id, + blockMap :: Map.Map Id BB } + +initBuildState :: BuildState +initBuildState = BuildState + { nextId = 0, + scopeStack = [], + currentBlock = undefined, + blockMap = Map.empty } + +newtype BuildM a = BuildM {unBuildM :: StateT BuildState (Except String) a} + deriving (Functor, Applicative, Monad, MonadState BuildState, MonadError String) + +genId :: BuildM Id +genId = state $ \s -> (nextId s, s {nextId = nextId s + 1}) + +genTemp :: Size -> BuildM Ref +genTemp sz = liftM (Temp sz) genId + +newBlock :: BuildM Id +newBlock = do + i <- genId + let block = BB i [] ITermNone + modify $ \s -> s {currentBlock = i, blockMap = Map.insert i block (blockMap s)} + return i + +newBlockNoSwitch :: BuildM Id +newBlockNoSwitch = do + i <- genId + let block = BB i [] ITermNone + modify $ \s -> s {blockMap = Map.insert i block (blockMap s)} + return i + +addIns :: IRIns -> BuildM () +addIns ins = + modify $ \s -> s { + blockMap = Map.adjust bbAddIns (currentBlock s) (blockMap s)} + where + bbAddIns :: BB -> BB + bbAddIns (BB bid inss term) = BB bid (inss ++ [ins]) term + +setTerm :: IRTerm -> BuildM () +setTerm term = + modify $ \s -> s { + blockMap = Map.adjust bbSetTerm (currentBlock s) (blockMap s)} + where + bbSetTerm :: BB -> BB + bbSetTerm (BB bid inss oldterm) = case oldterm of + ITermNone -> BB bid inss term + _ -> error "setTerm: oldterm /= ITermNone" + +clearBlockMap :: BuildM () +clearBlockMap = modify $ \s -> s {currentBlock = undefined, blockMap = Map.empty} + +getAllBlocks :: BuildM [BB] +getAllBlocks = liftM Map.elems (gets blockMap) + +switchBlock :: Id -> BuildM () +switchBlock bid = modify $ \s -> s {currentBlock = bid} + +withScope :: BuildM a -> BuildM a +withScope act = do + modify $ \s -> s {scopeStack = Map.empty : scopeStack s} + res <- act + modify $ \s -> s {scopeStack = tail (scopeStack s)} + return res + +modifyScope :: (Scope -> Scope) -> BuildM () +modifyScope f = + modify $ \s -> s {scopeStack = f (head (scopeStack s)) : tail (scopeStack s)} + +scopeInsert :: Name -> Ref -> Type -> BuildM () +scopeInsert n ref t = modifyScope $ Map.insert n (ref, t) + +findVar :: Name -> BuildM (Maybe (Int, (Ref, Type))) +findVar n = do + stk <- gets scopeStack + let results = map (Map.lookup n) stk + return $ fmap (\idx -> (idx, fromJust (results !! idx))) $ + findIndex isJust results + + +buildIR :: Program -> Error IRProgram +buildIR (Program vars funcs) = + runExcept $ evalStateT (unBuildM result) initBuildState + where + goDFunc :: DFunc -> BuildM IRFunc + goDFunc (DFunc rt n al bl) = do + clearBlockMap + firstid <- newBlock + lastid <- newBlockNoSwitch + withScope $ do + forM_ al $ \(at, an) -> scopeInsert an (Argument (sizeof at) an) at + convertBlock bl lastid + switchBlock lastid + setTerm IRet + bblist <- getAllBlocks + return $ IRFunc rt n al bblist firstid + + result :: BuildM IRProgram + result = do + withScope $ do + mapM_ (\(DVar t n _) -> scopeInsert n (Global (sizeof t) n) t) vars + IRProgram vars <$> mapM goDFunc funcs + +convertBlock :: Block -> Id -> BuildM () +convertBlock (Block sts) nextnext = do + withScope $ forM_ sts $ \st -> do + endid <- newBlockNoSwitch + convertStatement st endid + switchBlock endid + setTerm $ IJmp nextnext + +convertStatement :: Statement -> Id -> BuildM () +convertStatement (SDecl t n e) nextnext = do + endid <- newBlockNoSwitch + ref <- convertExpression e endid + varref <- genTemp (sizeof t) + scopeInsert n varref t + switchBlock endid + addIns $ IMov varref ref + setTerm $ IJmp nextnext +convertStatement (SAs ae e) nextnext = do + bl2 <- newBlockNoSwitch + eref <- convertExpression e bl2 + switchBlock bl2 + convertAsExpression ae eref nextnext +convertStatement (SIf c b1 b2) nextnext = do + cend <- newBlockNoSwitch + blThen <- newBlockNoSwitch + blElse <- newBlockNoSwitch + + cref <- convertExpression c cend + switchBlock cend + setTerm $ IJcc CNeq cref (Constant (refSize cref) 0) blThen blElse + switchBlock blThen + convertBlock b1 nextnext + switchBlock blElse + convertBlock b2 nextnext +convertStatement (SWhile c b) nextnext = do + cond <- newBlockNoSwitch + setTerm $ IJmp cond + cend <- newBlockNoSwitch + body <- newBlockNoSwitch + bodyend <- newBlockNoSwitch + + switchBlock cond + cref <- convertExpression c cend + switchBlock cend + setTerm $ IJcc CNeq cref (Constant (refSize cref) 0) body nextnext + switchBlock body + convertBlock b bodyend + switchBlock bodyend + setTerm $ IJmp cond +convertStatement (SReturn Nothing) _ = do + setTerm IRet +convertStatement (SReturn (Just e)) _ = do + bl <- newBlockNoSwitch + ref <- convertExpression e bl + switchBlock bl + setTerm $ IRetr ref +convertStatement (SExpr e) nextnext = do + void $ convertExpression e nextnext + +convertExpression :: Expression -> Id -> BuildM Ref +convertExpression (ELit (LInt n) _) nextnext = do + ref <- genTemp (sizeof TInt) + addIns $ IMov ref (Constant (sizeof TInt) (fromInteger n)) + setTerm $ IJmp nextnext + return ref +convertExpression (ELit (LChar c) _) nextnext = do + ref <- genTemp (sizeof TChar) + addIns $ IMov ref (Constant (sizeof TChar) (fromIntegral $ ord c)) + setTerm $ IJmp nextnext + return ref +convertExpression (ELit (LVar n) _) nextnext = do + mres <- findVar n + case mres of + Just (_, (r, t)) -> do + ref <- genTemp (sizeof t) + addIns $ IMov ref r + setTerm $ IJmp nextnext + return ref + Nothing -> throwError $ "Undefined variable '" ++ n ++ "' referenced" +convertExpression (ELit (LCall n al) mrt) nextnext = do + refs <- withScope $ forM al $ \arg -> do + endid <- newBlockNoSwitch + r <- convertExpression arg endid + switchBlock endid + return r + destref <- case mrt of + Nothing -> do + addIns $ ICall n refs + return $ Temp 0 (-1) + Just typ -> do + r <- genTemp (sizeof typ) + addIns $ ICallr r n refs + return r + setTerm $ IJmp nextnext + return destref +convertExpression (EBin BOAnd e1 e2 _) nextnext = do + destref <- genTemp (sizeof TInt) + bl2 <- newBlockNoSwitch + blTryR <- newBlockNoSwitch + bl3 <- newBlockNoSwitch + blNope <- newBlockNoSwitch + blYes <- newBlockNoSwitch + + ref1 <- convertExpression e1 bl2 + + switchBlock bl2 + setTerm $ IJcc CNeq ref1 (Constant (refSize ref1) 0) blTryR blNope + + switchBlock blTryR + ref2 <- convertExpression e2 bl3 + + switchBlock bl3 + setTerm $ IJcc CNeq ref2 (Constant (refSize ref2) 0) blYes blNope + + switchBlock blYes + addIns $ IMov destref (Constant (refSize destref) 1) + setTerm $ IJmp nextnext + + switchBlock blNope + addIns $ IMov destref (Constant (refSize destref) 0) + setTerm $ IJmp nextnext + + return destref +convertExpression (EBin bo e1 e2 _) nextnext = do + bl2 <- newBlockNoSwitch + ref1 <- convertExpression e1 bl2 + switchBlock bl2 + bl3 <- newBlockNoSwitch + ref2 <- convertExpression e2 bl3 + switchBlock bl3 + ref <- genTemp (sizeof $ fromJust $ retTypeBO bo (fromJust $ typeof e1) (fromJust $ typeof e2)) + case bo of + BOAdd -> addIns $ IAri AAdd ref1 ref2 + BOSub -> addIns $ IAri ASub ref1 ref2 + BOMul -> addIns $ IAri AMul ref1 ref2 + BODiv -> addIns $ IAri ADiv ref1 ref2 + BOMod -> addIns $ IAri AMod ref1 ref2 + BOEq -> addIns $ IAri AEq ref1 ref2 + BONeq -> addIns $ IAri ANeq ref1 ref2 + BOGt -> addIns $ IAri AGt ref1 ref2 + BOLt -> addIns $ IAri ALt ref1 ref2 + BOGeq -> addIns $ IAri AGeq ref1 ref2 + BOLeq -> addIns $ IAri ALeq ref1 ref2 + BOPow -> error $ "Pow operator not implemented" + BOAnd -> undefined + BOOr -> undefined + addIns $ IMov ref ref1 + setTerm $ IJmp nextnext + return ref +convertExpression (EUn UONot e mt) nextnext = + convertExpression (EBin BOEq e (ELit (LInt 0) (typeof e)) mt) nextnext +convertExpression (EUn UONeg e mt) nextnext = + convertExpression (EBin BOSub (ELit (LInt 0) (typeof e)) e mt) nextnext +convertExpression (ESubscript arr sub t) nextnext = do + let elemsz = sizeof $ fromJust t + bl2 <- newBlockNoSwitch + arrref <- convertExpression arr bl2 + switchBlock bl2 + bl3 <- newBlockNoSwitch + subref <- convertExpression sub bl3 + switchBlock bl3 + addIns $ IAri AMul subref (Constant (refSize subref) (fromIntegral elemsz)) + addIns $ IAri AAdd subref (Constant (refSize subref) (fromIntegral $ sizeof TInt)) + addIns $ IAri AAdd arrref subref + ref <- genTemp elemsz + addIns $ ILoad ref arrref + setTerm $ IJmp nextnext + return ref +convertExpression (ECast dt e) nextnext = do + let typ = case typeof e of + Nothing -> error $ "Cast subject " ++ show e ++ " has Nothing type" + Just t -> t + when (not $ isIntegralType typ && isIntegralType dt) $ + error $ "convertExpression: unimplemented cast from " ++ pretty typ ++ " to " ++ pretty dt + ref <- genTemp (sizeof dt) + bl <- newBlockNoSwitch + eref <- convertExpression e bl + switchBlock bl + addIns $ IResize ref eref + setTerm $ IJmp nextnext + return ref +convertExpression (ENew t sze) nextnext = do + when (not $ isBasicType t) $ + throwError $ "Array element type in 'new' expression is not a basic type (" ++ pretty t ++ ")" + bl2 <- newBlockNoSwitch + szref <- convertExpression sze bl2 + switchBlock bl2 + ref <- genTemp (sizeof $ TArr t Nothing) + addIns $ IAri AMul szref (Constant (sizeof TInt) (fromIntegral $ sizeof t)) + addIns $ IAri AAdd szref (Constant (sizeof TInt) (fromIntegral $ sizeof TInt)) + addIns $ ICallr ref "_builtin_malloc" [szref] + setTerm $ IJmp nextnext + return ref + +convertAsExpression :: AsExpression -> Ref -> Id -> BuildM () +convertAsExpression (AEVar n _) valueref nextnext = do + mres <- findVar n + vref <- case mres of + Just (_, (r, _)) -> return r + Nothing -> throwError $ "Undefined variable '" ++ n ++ "'" ++ + " used in assignment expression" + addIns $ IMov vref valueref + setTerm $ IJmp nextnext +convertAsExpression (AESubscript ae2 expr mrt) valueref nextnext = do + let elemsz = sizeof $ fromJust mrt + ae2ref <- goLoad ae2 + bl2 <- newBlockNoSwitch + offref <- convertExpression expr bl2 + switchBlock bl2 + -- TODO: do bounds checking + addIns $ IAri AMul offref (Constant (sizeof TInt) (fromIntegral elemsz)) + addIns $ IAri AAdd offref (Constant (sizeof TInt) (fromIntegral $ sizeof TInt)) + addIns $ IAri AAdd ae2ref offref + addIns $ IStore ae2ref valueref + setTerm $ IJmp nextnext + where + goLoad :: AsExpression -> BuildM Ref + goLoad (AEVar n _) = do + mres <- findVar n + (vref, t) <- case mres of + Just (_, (r, t)) -> return (r, t) + Nothing -> throwError $ "Undefined variable '" ++ n ++ "'" ++ + " used in assignment expression" + ref <- genTemp (sizeof t) + addIns $ IMov ref vref + return ref + goLoad (AESubscript ae expr' _) = do + let elemsz = sizeof $ fromJust $ typeof ae + ref <- goLoad ae + bl2 <- newBlockNoSwitch + eref <- convertExpression expr' bl2 + switchBlock bl2 + -- TODO: do bounds checking + addIns $ IAri AMul eref (Constant (sizeof TInt) (fromIntegral elemsz)) + addIns $ IAri AAdd eref (Constant (sizeof TInt) (fromIntegral $ sizeof TInt)) + addIns $ IAri AAdd ref eref + dstref <- genTemp elemsz + addIns $ ILoad dstref ref + return dstref diff --git a/CodeGen.hs b/CodeGen.hs new file mode 100644 index 0000000..d4c9439 --- /dev/null +++ b/CodeGen.hs @@ -0,0 +1,388 @@ +{-# LANGUAGE DeriveFunctor, GeneralizedNewtypeDeriving, TupleSections, QuasiQuotes, ScopedTypeVariables #-} + +module CodeGen(codegen) where + +import Control.Monad +import Control.Monad.Except +import Control.Monad.State.Strict +import Data.List +import Data.Maybe +import Data.Map.Strict ((!)) +import qualified Data.Map.Strict as Map +import Text.Heredoc +import Debug.Trace + +import AST +import Defs +import Intermediate +import qualified LifetimeAnalysis as LA +import RegAlloc +import Utils +import X64 (Register(..), CondCode(..), XRef(..), Ins(..), xref) +import qualified X64 as X64 + + +data CGState = CGState + { nextId :: Int, + regsToRestore :: [Register], + spillSize :: Size, + x64Result :: X64.Asm } + +newtype CGMonad a = CGMonad { unCGMonad :: StateT CGState (Except String) a } + deriving (Functor, Applicative, Monad, MonadState CGState, MonadError String) + +initState :: CGState +initState = CGState {nextId = 1, regsToRestore = [], spillSize = 0, x64Result = X64.Asm []} + +execCGMonad :: CGMonad a -> Error X64.Asm +execCGMonad = fmap x64Result . runExcept . flip execStateT initState . unCGMonad + +addIns :: X64.Ins -> CGMonad () +addIns ins = modify $ \s -> + let (X64.Asm funcs) = x64Result s + (pre, (lab, inss)) = (init funcs, last funcs) + in s {x64Result = X64.Asm $ pre ++ [(lab, inss ++ [ins])]} + +newLabel :: String -> CGMonad () +newLabel lab = modify $ \s -> + let (X64.Asm funcs) = x64Result s + in s {x64Result = X64.Asm $ funcs ++ [(lab, [])]} + +-- genId :: CGMonad Int +-- genId = state $ \s -> (nextId s, s {nextId = nextId s + 1}) + +setRegsToRestore :: [Register] -> CGMonad () +setRegsToRestore regs = modify $ \s -> s {regsToRestore = regs} + +setSpillSize :: Size -> CGMonad () +setSpillSize sz = modify $ \s -> s {spillSize = sz} + + +codegen :: IRProgram -> Error String +codegen (IRProgram vars funcs) = do + x64 <- execCGMonad $ mapM_ codegenFunc funcs + -- traceShowM x64 + X64.verify x64 + varcg <- liftM unlines $ mapM codegenVar vars + return $ [there|prologue.asm|] ++ "\n" ++ X64.stringify x64 ++ + "\nsection .data\n" ++ (if length vars > 0 then varcg else "db 0 ; keep dyld happy\n") + + +codegenVar :: DVar -> Error String +codegenVar (DVar TInt n (ELit (LInt i) (Just TInt))) = Right $ n ++ ": dq " ++ show i +codegenVar _ = Left "Unsupported global variable declaration" + + +type AllocMap = Map.Map Ref XRef + +codegenFunc :: IRFunc -> CGMonad () +codegenFunc (IRFunc _ name al bbs sid) = do + let temprefsperbb = collectTempRefs bbs + alltemprefs = uniq $ sort $ map LA.unAccess $ concat $ concat $ map fst temprefsperbb + lifespans = map (\r -> (findLifeSpan r, r)) alltemprefs + where findLifeSpan ref = + fromJust $ findFirstLast id $ concat $ LA.lifetimeAnalysis ref temprefsperbb + + aliascandidates = findAliasCandidates bbs :: [(Ref, Ref)] + + gpRegs = [R8, R9, R10, R11, R12, R13, R14, R15] + allocation = regalloc lifespans gpRegs aliascandidates :: Map.Map Ref (Allocation Register) + + spillrefs = map fst $ filter (isAllocMem . snd) $ Map.toList allocation + (spilloffsets, spillsz) = initLast $ scanl (+) 0 $ map refSize spillrefs + spilloffsetmap = Map.fromList $ zip spillrefs spilloffsets + + usedregs = uniq $ sort $ catMaybes $ flip map (Map.toList allocation) $ \(_, a) -> case a of + AllocReg reg -> Just reg + AllocMem -> Nothing + + traceShowM temprefsperbb + traceShowM lifespans + -- traceM $ "ALLOCATION: " ++ show allocation + + let nsaves = length usedregs + allocationXref = flip Map.mapWithKey allocation $ \ref alloc -> case alloc of + AllocReg reg -> XReg (fromIntegral $ refSize ref) reg + AllocMem -> XMem (fromIntegral $ refSize ref) + (Just RSP) (0, RAX) Nothing + (fromIntegral $ spilloffsetmap ! ref) + allocmap = foldl inserter allocationXref (zip al [0::Int ..]) + where + inserter m ((t, n), i) = + let offset = fromIntegral spillsz + 8 * nsaves + 8 {- rbp -} + 8 {- ret addr -} + 8 * i + in Map.insert (Argument (sizeof t) n) + (XMem (fromIntegral $ sizeof t) + (Just RSP) (0, RAX) Nothing + (fromIntegral offset)) + m + + newLabel name + addIns $ PUSH (xref $ XReg 8 RBP) + addIns $ MOV (xref $ XReg 8 RBP) (xref $ XReg 8 RSP) + forM_ usedregs $ \reg -> addIns $ PUSH (xref $ XReg 8 reg) + when (spillsz /= 0) $ addIns $ SUB (xref $ XReg 8 RSP) (xref $ XImm $ fromIntegral spillsz) + setRegsToRestore usedregs + setSpillSize spillsz + + let ([startbb], rest) = partition (\(BB i _ _) -> i == sid) bbs + codegenBB allocmap startbb + mapM_ (codegenBB allocmap) rest + +findAliasCandidates :: [BB] -> [(Ref, Ref)] +findAliasCandidates = concatMap (\(BB _ inss _) -> concatMap goI inss) + where + goI :: IRIns -> [(Ref, Ref)] + goI (IMov d s) = [(d, s)] + goI _ = [] + +findFirstLast :: forall a. (a -> Bool) -> [a] -> Maybe (Int, Int) +findFirstLast f l = go Nothing 0 l + where + go :: Maybe (Int, Int) -> Int -> [a] -> Maybe (Int, Int) + go mr _ [] = mr + go mr i (x:xs) + | f x = go (note mr i) (i+1) xs + | otherwise = go mr (i+1) xs + + note :: Maybe (Int, Int) -> Int -> Maybe (Int, Int) + note Nothing i = Just (i, i) + note (Just (a, _)) i = Just (a, i) + +isAllocMem :: Allocation a -> Bool +isAllocMem AllocMem = True +isAllocMem _ = False + +initLast :: [a] -> ([a], a) +initLast [] = undefined +initLast [x] = ([], x) +initLast (x:xs) = let (acc, l) = initLast xs in (x : acc, l) + +codegenBB :: AllocMap -> BB -> CGMonad () +codegenBB allocmap (BB bid inss term) = do + newLabel $ ".bb" ++ show bid + mapM_ (codegenIns allocmap) inss + codegenTerm allocmap term + +mkxref :: Ref -> AllocMap -> XRef +mkxref (Constant _ v) _ = XImm v +mkxref (Global sz n) _ = XMem (fromIntegral sz) Nothing (0, RAX) (Just n) 0 +mkxref r m = fromJust $ Map.lookup r m + +mkmov :: XRef -> XRef -> X64.Ins +mkmov a@(XReg _ _) b@(XReg _ _) = MOV (xref a) (xref b) +mkmov a@(XReg _ _) b@(XMem _ _ _ _ _) = MOV (xref a) (xref b) +mkmov a@(XReg _ _) b@(XImm _) = MOVi64 (xref a) (xref b) +mkmov a@(XMem _ _ _ _ _) b@(XReg _ _) = MOV (xref a) (xref b) +mkmov a@(XMem _ _ _ _ _) b@(XImm v) | v < 2 ^ (32 :: Int) = MOVi (xref a) (xref b) +mkmov a b = error $ "Invalid mkmov: " ++ show a ++ "; " ++ show b + +mkcmp :: XRef -> XRef -> X64.Ins +mkcmp a b@(XImm _) = CMPi (xref a) (xref b) +mkcmp a b = CMP (xref a) (xref b) + +codegenIns :: AllocMap -> IRIns -> CGMonad () +codegenIns m (IMov d s) + | dm == sm = return () + | X64.isXMem dm && X64.isXMem sm = do + addIns $ mkmov (XReg (fromIntegral $ refSize s) RAX) sm + addIns $ mkmov dm (XReg (fromIntegral $ refSize d) RAX) + | otherwise = addIns $ mkmov dm sm + where dm = mkxref d m + sm = mkxref s m +codegenIns m (IStore d s) = do + sourcexref <- if X64.isXMem sm + then do + addIns $ mkmov (XReg sz RBX) sm + return $ XReg sz RBX + else return sm + destxref <- case dm of + XReg _ r -> return $ XMem sz (Just r) (0, RAX) Nothing 0 + x@(XMem xsz _ _ _ _) -> do + addIns $ mkmov (XReg xsz RAX) x + return $ XMem sz (Just RAX) (0, RAX) Nothing 0 + XImm _ -> throwError $ "IStore to [immediate] not expected" + addIns $ mkmov destxref sourcexref + where dm = mkxref d m + sm = mkxref s m + sz = fromIntegral $ refSize s +codegenIns m (ILoad d s) = do + sourcexref <- case sm of + XReg _ r -> return $ XMem sz (Just r) (0, RAX) Nothing 0 + x@(XMem xsz _ _ _ _) -> do + addIns $ mkmov (XReg xsz RAX) x + return $ XMem sz (Just RAX) (0, RAX) Nothing 0 + XImm _ -> throwError $ "ILoad from [immediate] not expected" + if X64.isXMem dm + then do + addIns $ mkmov (XReg sz RAX) sourcexref + addIns $ mkmov dm (XReg sz RAX) + else do + addIns $ mkmov dm sourcexref + where dm = mkxref d m + sm = mkxref s m + sz = fromIntegral $ refSize d +codegenIns m (IAri AMul d s) = do + let sz = fromIntegral $ refSize d + addIns $ mkmov (XReg sz RAX) (mkxref d m) + addIns $ mkmov (XReg sz RBX) (mkxref s m) + addIns $ IMULDA (xref $ XReg sz RBX) + addIns $ mkmov (mkxref d m) (XReg sz RAX) +codegenIns m (IAri ADiv d s) = do + let sz = fromIntegral $ refSize d + addIns $ XOR (xref $ XReg 4 RDX) (xref $ XReg 4 RDX) + addIns $ mkmov (XReg sz RAX) (mkxref d m) + addIns $ mkmov (XReg sz RBX) (mkxref s m) + addIns $ IDIVDA (xref $ XReg sz RBX) + addIns $ mkmov (mkxref d m) (XReg sz RAX) +codegenIns m (IAri AMod d s) = do + let sz = fromIntegral $ refSize d + addIns $ XOR (xref $ XReg 4 RDX) (xref $ XReg 4 RDX) + addIns $ mkmov (XReg sz RAX) (mkxref d m) + addIns $ mkmov (XReg sz RBX) (mkxref s m) + addIns $ IDIVDA (xref $ XReg sz RBX) + addIns $ mkmov (mkxref d m) (XReg sz RDX) +codegenIns m (IAri at d s) = case arithTypeToCondCode at of + Just cc -> do + arg2 <- if X64.isXMem dm && X64.isXMem sm + then do + addIns $ mkmov (XReg (fromIntegral $ refSize s) RAX) sm + return $ XReg (fromIntegral $ refSize s) RAX + else return sm + addIns $ mkcmp dm arg2 + addIns $ MOVi (xref dm) (xref $ XImm 0) + addIns $ SETCC cc (xref $ X64.xrefSetSize 1 dm) + Nothing -> do + arg2 <- if X64.isXMem dm && X64.isXMem sm + then do + addIns $ mkmov (XReg (fromIntegral $ refSize s) RAX) sm + return $ XReg (fromIntegral $ refSize s) RAX + else return sm + addIns $ fromJust (arithTypeToIns at) dm arg2 + where dm = mkxref d m + sm = mkxref s m +codegenIns m (ICall n rs) = do + forM_ (zip (reverse rs) [1::Int ..]) $ \(r, i) -> + let sz = fromIntegral $ refSize r + src = (mkxref r m) + dst = (XMem sz (Just RSP) (0, RAX) Nothing (fromIntegral $ (-8) * i)) + in if X64.isXMem (mkxref r m) + then do + addIns $ mkmov (XReg sz RAX) src + addIns $ mkmov dst (XReg sz RAX) + else do + addIns $ mkmov dst src + when (length rs > 0) $ addIns $ SUB (xref $ XReg 8 RSP) (xref $ XImm (fromIntegral $ 8 * length rs)) + addIns $ CALL n + when (length rs > 0) $ addIns $ ADD (xref $ XReg 8 RSP) (xref $ XImm (fromIntegral $ 8 * length rs)) +codegenIns m (ICallr d n rs) = do + codegenIns m (ICall n rs) + addIns $ mkmov (mkxref d m) (XReg (fromIntegral $ refSize d) RAX) +codegenIns m fullins@(IResize d s) = do + let dsz = fromIntegral $ refSize d + ssz = fromIntegral $ refSize s + dm = mkxref d m + sm = mkxref s m + when (X64.isXImm sm) $ + throwError $ "Resized value is an immediate in " ++ show fullins ++ + "; (dm = " ++ show dm ++ "; sm = " ++ show sm ++ ")" + case compare dsz ssz of + EQ -> codegenIns m (IMov d s) + GT -> if X64.isXMem dm + then do + addIns $ MOVSX (xref $ XReg dsz RAX) (xref sm) + addIns $ mkmov dm (XReg dsz RAX) + else do + addIns $ MOVSX (xref dm) (xref sm) + LT -> if X64.isXMem dm && X64.isXMem sm + then do + addIns $ mkmov (XReg dsz RAX) (X64.xrefSetSize dsz sm) + addIns $ mkmov dm (XReg dsz RAX) + else do + addIns $ mkmov dm (X64.xrefSetSize dsz sm) +codegenIns _ INop = return () + +arithTypeToCondCode :: ArithType -> Maybe X64.CondCode +arithTypeToCondCode AEq = Just CCE +arithTypeToCondCode ANeq = Just CCNE +arithTypeToCondCode AGt = Just CCG +arithTypeToCondCode ALt = Just CCL +arithTypeToCondCode AGeq = Just CCGE +arithTypeToCondCode ALeq = Just CCLE +arithTypeToCondCode _ = Nothing + +cmpTypeToCondCode :: CmpType -> X64.CondCode +cmpTypeToCondCode CEq = CCE +cmpTypeToCondCode CNeq = CCNE +cmpTypeToCondCode CGt = CCG +cmpTypeToCondCode CLt = CCL +cmpTypeToCondCode CGeq = CCGE +cmpTypeToCondCode CLeq = CCLE + +arithTypeToIns :: ArithType -> Maybe (XRef -> XRef -> X64.Ins) +arithTypeToIns AAdd = Just $ \a b -> ADD (xref a) (xref b) +arithTypeToIns ASub = Just $ \a b -> SUB (xref a) (xref b) +arithTypeToIns AAnd = Just $ \a b -> AND (xref a) (xref b) +arithTypeToIns AOr = Just $ \a b -> OR (xref a) (xref b) +arithTypeToIns AXor = Just $ \a b -> XOR (xref a) (xref b) +arithTypeToIns _ = Nothing + +codegenTerm :: AllocMap -> IRTerm -> CGMonad () +codegenTerm m (IJcc ct a b t e) = do + addIns $ mkcmp (mkxref a m) (mkxref b m) + addIns $ JCC (cmpTypeToCondCode ct) (".bb" ++ show t) + addIns $ JMP (".bb" ++ show e) +codegenTerm _ (IJmp i) = addIns $ JMP (".bb" ++ show i) +codegenTerm _ IRet = do + spillsz <- gets spillSize + when (spillsz /= 0) $ addIns $ ADD (xref $ XReg 8 RSP) (xref $ XImm $ fromIntegral spillsz) + usedregs <- gets regsToRestore + forM_ (reverse usedregs) $ \reg -> addIns $ POP (xref $ XReg 8 reg) + addIns $ mkmov (XReg 8 RSP) (XReg 8 RBP) + addIns $ POP (xref $ XReg 8 RBP) + addIns RET +codegenTerm m (IRetr r) = do + addIns $ mkmov (XReg (fromIntegral $ refSize r) RAX) (mkxref r m) + spillsz <- gets spillSize + when (spillsz /= 0) $ addIns $ ADD (xref $ XReg 8 RSP) (xref $ XImm $ fromIntegral spillsz) + usedregs <- gets regsToRestore + forM_ (reverse usedregs) $ \reg -> addIns $ POP (xref $ XReg 8 reg) + addIns $ mkmov (XReg 8 RSP) (XReg 8 RBP) + addIns $ POP (xref $ XReg 8 RBP) + addIns RET +codegenTerm _ ITermNone = undefined + + +collectTempRefs :: [BB] -> [([[LA.Access Ref]], [Int])] +collectTempRefs bbs = + flip map bbs $ \(BB _ inss term) -> + let refs = map (filter (isTemp . LA.unAccess)) $ concatMap listRefsIns inss ++ listRefsTerm term + nexts = map (\i -> fromJust $ findIndex (\(BB j _ _) -> j == i) bbs) $ listNextIds term + in (refs, nexts) + where + listRefsIns :: IRIns -> [[LA.Access Ref]] + listRefsIns (IMov a b) = [[LA.Read b], [LA.Write a]] + listRefsIns (IStore a b) = [[LA.Read a, LA.Read b]] + listRefsIns (ILoad a b) = [[LA.Read b], [LA.Write a]] + listRefsIns (IAri _ a b) = [[LA.Write a, LA.Read b]] + listRefsIns (ICall _ l) = [map LA.Read l] + listRefsIns (ICallr a _ l) = [LA.Write a : map LA.Read l] + listRefsIns (IResize a b) = [[LA.Read b], [LA.Write a]] + listRefsIns INop = [[]] + + listRefsTerm :: IRTerm -> [[LA.Access Ref]] + listRefsTerm (IJcc _ a b _ _) = [[LA.Read a, LA.Read b]] + listRefsTerm (IJmp _) = [[]] + listRefsTerm IRet = [[]] + listRefsTerm (IRetr a) = [[LA.Read a]] + listRefsTerm ITermNone = undefined + + listNextIds :: IRTerm -> [Id] + listNextIds (IJcc _ _ _ a b) = [a, b] + listNextIds (IJmp a) = [a] + listNextIds IRet = [] + listNextIds (IRetr _) = [] + listNextIds ITermNone = undefined + + isTemp :: Ref -> Bool + isTemp (Temp _ _) = True + isTemp _ = False @@ -0,0 +1,10 @@ +module Defs where + +import Data.Int + + +type Name = String +type Id = Int +type Size = Integer +type Value = Int64 +type Error = Either String diff --git a/Intermediate.hs b/Intermediate.hs new file mode 100644 index 0000000..5f3a9f2 --- /dev/null +++ b/Intermediate.hs @@ -0,0 +1,174 @@ +module Intermediate where + +import Data.Bits +import Data.List + +import AST +import Defs +import Pretty + + +data BB = BB Id [IRIns] IRTerm + deriving (Show, Eq) + +data Ref = Temp Size Int | Argument Size Name | Global Size Name | Constant Size Value + deriving (Show, Eq, Ord) + +data IRProgram = IRProgram [DVar] [IRFunc] + deriving (Show, Eq) + +data IRFunc = IRFunc (Maybe Type) Name [(Type, Name)] [BB] Id + deriving (Show, Eq) + +data IRIns + = IMov Ref Ref + | IStore Ref Ref + | ILoad Ref Ref + | IAri ArithType Ref Ref + | ICall Name [Ref] + | ICallr Ref Name [Ref] + | IResize Ref Ref + | INop + deriving (Show, Eq) + +data IRTerm + = IJcc CmpType Ref Ref Id Id -- Id Id == if-yes if-no + | IJmp Id + | IRet + | IRetr Ref + | ITermNone + deriving (Show, Eq) + +data ArithType + = AAdd | ASub | AMul | ADiv | AMod + | AAnd | AOr | AXor + | AEq | ANeq | AGt | ALt | AGeq | ALeq + deriving (Show, Eq) + +data CmpType + = CEq | CNeq | CGt | CLt | CGeq | CLeq + deriving (Show, Eq) + + +refSize :: Ref -> Size +refSize (Temp sz _) = sz +refSize (Argument sz _) = sz +refSize (Global sz _) = sz +refSize (Constant sz _) = sz + + +instance Pretty BB where + prettyI i (BB bid inss term) = + "{{{(" ++ show bid ++ ")\n" ++ indent (i+1) ++ + intercalate ("\n" ++ indent (i+1)) (map pretty inss) ++ + (if null inss then "" else "\n" ++ indent (i+1)) ++ + pretty term ++ + "\n" ++ indent i ++ "}}}" + where + indent n = replicate (2*n) ' ' + +instance Pretty Ref where + prettyI _ (Temp sz k) = "t" ++ show k ++ pretty_sizeSuffix sz + prettyI _ (Argument sz n) = "a" ++ n ++ pretty_sizeSuffix sz + prettyI _ (Global sz n) = "g" ++ n ++ pretty_sizeSuffix sz + prettyI _ (Constant sz n) = show n ++ pretty_sizeSuffix sz + +pretty_sizeSuffix :: Size -> String +pretty_sizeSuffix 1 = "B" +pretty_sizeSuffix 2 = "W" +pretty_sizeSuffix 4 = "D" +pretty_sizeSuffix 8 = "Q" +pretty_sizeSuffix sz = "<" ++ show sz ++ ">" + +instance Pretty IRProgram where + prettyI i (IRProgram vars funcs) = + intercalate ("\n" ++ indent i) (map (prettyI i) vars) ++ + "\n" ++ indent i ++ + intercalate ("\n" ++ indent i) (map (prettyI (i+1)) funcs) ++ + "\n" + where + indent n = replicate (2*n) ' ' + +instance Pretty IRFunc where + prettyI i (IRFunc mt n al bbs sid) = + "irfunc" ++ maybe "" ((' ' :) . prettyI i) mt ++ " " ++ n ++ "(" ++ + intercalate "," + (map (\(at,an) -> prettyI i at ++ " " ++ an) al) ++ + ")\n" ++ indent i ++ + intercalate ("\n" ++ indent i) (map (prettyI i) sorted) + where + indent n' = replicate (2*n') ' ' + + sorted = uncurry (++) $ partition (\(BB bid _ _) -> bid == sid) bbs + +instance Pretty IRIns where + prettyI _ (IMov d s) = "mov " ++ pretty d ++ " <- " ++ pretty s + prettyI _ (IStore d s) = "store *" ++ pretty d ++ " <- " ++ pretty s + prettyI _ (ILoad d s) = "load " ++ pretty d ++ " <- *" ++ pretty s + prettyI _ (IAri at d s) = + pretty at ++ " " ++ pretty d ++ ", " ++ pretty s + prettyI _ (ICall n al) = + "call " ++ n ++ " (" ++ intercalate ", " (map pretty al) ++ ")" + prettyI _ (ICallr d n al) = + "call " ++ pretty d ++ " <- " ++ n ++ " (" ++ intercalate ", " (map pretty al) ++ ")" + prettyI _ (IResize d s) = "resize " ++ pretty d ++ " <- " ++ pretty s + prettyI _ INop = "nop" + +instance Pretty IRTerm where + prettyI _ (IJcc ct s1 s2 did1 did2) = + pretty ct ++ " " ++ pretty s1 ++ ", " ++ pretty s2 ++ " -> " ++ show did1 ++ " | " ++ show did2 + prettyI _ (IJmp did) = "jmp " ++ show did + prettyI _ IRet = "ret" + prettyI _ (IRetr ref) = "retr " ++ pretty ref + prettyI _ ITermNone = "?NONE?" + +instance Pretty ArithType where + prettyI _ AAdd = "add" + prettyI _ ASub = "sub" + prettyI _ AMul = "mul" + prettyI _ ADiv = "div" + prettyI _ AMod = "mod" + prettyI _ AAnd = "and" + prettyI _ AOr = "or" + prettyI _ AXor = "xor" + prettyI _ AEq = "eq" + prettyI _ ANeq = "neq" + prettyI _ AGt = "gt" + prettyI _ ALt = "lt" + prettyI _ AGeq = "geq" + prettyI _ ALeq = "leq" + +instance Pretty CmpType where + prettyI _ CEq = "jeq" + prettyI _ CNeq = "jne" + prettyI _ CGt = "jg" + prettyI _ CLt = "jl" + prettyI _ CGeq = "jge" + prettyI _ CLeq = "jle" + + +evaluateArith :: ArithType -> Value -> Value -> Value +evaluateArith at a b = case at of + AAdd -> a + b + ASub -> a - b + AMul -> a * b + ADiv -> if b == 0 then error "Division by zero detected" else a `div` b + AMod -> if b == 0 then error "Modulo by zero detected" else a `mod` b + AAnd -> a .&. b + AOr -> a .|. b + AXor -> a `xor` b + AEq -> if a == b then 1 else 0 + ANeq -> if a /= b then 1 else 0 + AGt -> if a > b then 1 else 0 + ALt -> if a < b then 1 else 0 + AGeq -> if a >= b then 1 else 0 + ALeq -> if a <= b then 1 else 0 + +evaluateCmp :: CmpType -> Value -> Value -> Bool +evaluateCmp ct a b = case ct of + CEq -> a == b + CNeq -> a /= b + CGt -> a > b + CLt -> a < b + CGeq -> a >= b + CLeq -> a <= b diff --git a/LifetimeAnalysis.hs b/LifetimeAnalysis.hs new file mode 100644 index 0000000..a590862 --- /dev/null +++ b/LifetimeAnalysis.hs @@ -0,0 +1,70 @@ +module LifetimeAnalysis(lifetimeAnalysis, Access(..), unAccess) where + +import Data.List +import Data.Maybe + + +data Access a = Write a | Read a + deriving (Show, Eq) + +unAccess :: Access a -> a +unAccess (Write x) = x +unAccess (Read x) = x + +type BB a = ([[Access a]], [Int]) + +lifetimeAnalysis :: Eq a => a -> [BB a] -> [[Bool]] +lifetimeAnalysis target bbs = + foldOr2 $ map (\p -> markAliveFrom bbs target p (emptyMark bbs)) occurrences + where + occurrences :: [(Int, Int)] + occurrences = do + (i, bb) <- zip [0..] bbs + (j, ins) <- zip [0..] (fst bb) + if ins `contains` Write target || ins `contains` Read target + then [(i, j)] + else [] + +emptyMark :: [BB a] -> [[Bool]] +emptyMark bbs = flip map bbs $ \(inss, _) -> map (const False) inss + +-- Assumes `target` is known to be alive at `pos`. +markAliveFrom :: Eq a => [BB a] -> a -> (Int, Int) -> [[Bool]] -> [[Bool]] +markAliveFrom bbs target pos topmark = setAt2 (maybe topmark id $ goNoCheck pos topmark) pos True + where + goNoCheck :: (Int, Int) -> [[Bool]] -> Maybe [[Bool]] + goNoCheck (i, j) mark = + let suc = flip filter (successors bbs (i, j)) $ \(i', j') -> + not $ mark !! i' !! j' + markset = setAt2 mark (i, j) True + in case catMaybes [go s markset | s <- suc] of + [] -> Nothing + l -> Just $ foldOr2 l + + go :: (Int, Int) -> [[Bool]] -> Maybe [[Bool]] + go (i, j) mark + | fst (bbs !! i) !! j `contains` Write target = Nothing + | fst (bbs !! i) !! j `contains` Read target = Just $ setAt2 mark (i, j) True + | otherwise = goNoCheck (i, j) mark + +successors :: [BB a] -> (Int, Int) -> [(Int, Int)] +successors bbs (i, j) = + let (inss, nexts) = bbs !! i + in if j < length inss - 1 + then [(i, j + 1)] + else [(n, 0) | n <- nexts] + +contains :: Eq a => [a] -> a -> Bool +contains l v = isJust $ find (== v) l + +modifyAt2 :: [[a]] -> (Int, Int) -> (a -> a) -> [[a]] +modifyAt2 l (i, j) f = modifyAt l i $ \li -> modifyAt li j f + +modifyAt :: [a] -> Int -> (a -> a) -> [a] +modifyAt l i f = let (pre, v : post) = splitAt i l in pre ++ f v : post + +setAt2 :: [[a]] -> (Int, Int) -> a -> [[a]] +setAt2 l p v = modifyAt2 l p (const v) + +foldOr2 :: [[[Bool]]] -> [[Bool]] +foldOr2 = foldl1 (\m1 m2 -> map (map (uncurry (||)) . uncurry zip) (zip m1 m2)) @@ -0,0 +1,56 @@ +module Main where + +import System.Exit +import System.IO +import System.Process +import Debug.Trace + +import BuildIR +import CodeGen +import Defs +import Optimiser +import Pretty +import ProgramParser +import TypeCheck +import Verify + + +infix 2 <?> +(<?>) :: (a -> Error b) -> String -> a -> Error b +f <?> pre = \a -> case f a of + Left e -> Left $ pre ++ ": " ++ e + Right x -> Right x + + +tracePrettyId :: Pretty a => a -> a +tracePrettyId x = trace (pretty x) x + +eitherToIO :: Either String a -> IO a +eitherToIO = either die return + + +main :: IO () +main = do + source <- getContents + + let eres = return source + >>= parseProgram <?> "Parse error" + -- >>= return . traceShowId + >>= typeCheck <?> "Type error" + >>= buildIR <?> "IR building error" + -- >>= return . tracePrettyId + >>= optimise <?> "Error while optimising" + >>= verify <?> "Verify error" + >>= return . tracePrettyId + >>= codegen <?> "Codegen error" + + asm <- eitherToIO eres + -- hPutStr stderr asm + + writeFile "z_output.asm" asm + + hPutStrLn stderr "Assembling with yasm..." + callCommand "yasm -w+all -fmacho64 z_output.asm -o z_output.o" + + hPutStrLn stderr "Linking with ld..." + callCommand "ld z_output.o -o z_output" diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..3ce6d7e --- /dev/null +++ b/Makefile @@ -0,0 +1,26 @@ +RUNFLAGS = +GHCFLAGS = -Wall -Widentities -odir obj -hidir obj +ifneq ($(PROFILE),) + RUNFLAGS += +RTS -xc + GHCFLAGS += -prof -fprof-auto +else + GHCFLAGS += -O3 +endif + +TARGET = main + +.PHONY: all clean run + +all: $(TARGET) + +clean: + rm -f $(TARGET) + rm -rf obj + +run: $(TARGET) + ./$(TARGET) $(RUNFLAGS) + + +$(TARGET): $(wildcard *.hs) + @mkdir -p obj + ghc $(GHCFLAGS) Main.hs -o $@ diff --git a/Optimiser.hs b/Optimiser.hs new file mode 100644 index 0000000..6e6227c --- /dev/null +++ b/Optimiser.hs @@ -0,0 +1,252 @@ +module Optimiser(optimise) where + +import Data.List +import Data.Maybe +import qualified Data.Map.Strict as Map +import Debug.Trace + +import Defs +import Intermediate +import ReplaceRefs +import Utils + + +type Optimisation = IRProgram -> IRProgram +type FuncOptimisation = IRFunc -> IRFunc + +optimise :: IRProgram -> Error IRProgram +optimise prog = + let master = foldl1 (.) (reverse optimisations) {-. trace "-- OPT PASS --"-} + reslist = iterate master prog + pairs = zip reslist (tail reslist) + in Right $ fst $ fromJust $ find (uncurry (==)) pairs + where + optimisations = map funcopt $ + -- [chainJumps, removeUnusedBlocks] + [chainJumps, mergeTerminators, looseJumps, removeUnusedBlocks, identityOps, + constantPropagate, removeNops, movPush, evaluateInstructions, evaluateTerminators] + + +funcopt :: FuncOptimisation -> Optimisation +funcopt fo (IRProgram vars funcs) = IRProgram vars (map fo funcs) + + +chainJumps :: FuncOptimisation +chainJumps (IRFunc rt name al bbs sid) = IRFunc rt name al bbs' sid + where + bbs' = snd $ last $ takeWhile fst $ iterate (mergeChain . snd) (True, bbs) + + mergeChain :: [BB] -> (Bool, [BB]) + mergeChain [] = (False, []) + mergeChain bbs2 = case findIndex isSuitable bbs2 of + Nothing -> (False, bbs2) + Just idx -> + let (BB bid1 inss1 (IJmp target), rest) = + (bbs2 !! idx, take idx bbs2 ++ drop (idx+1) bbs2) + [BB _ inss2 term2] = filter (\(BB bid _ _) -> bid == target) rest + merged = BB bid1 (inss1 ++ inss2) term2 + in (True, merged : rest) + where + hasJmpTo :: Id -> BB -> Bool + hasJmpTo i (BB _ _ (IJmp i')) = i == i' + hasJmpTo _ _ = False + + isSuitable :: BB -> Bool + isSuitable (BB _ _ (IJmp target)) = sum (map (fromEnum . hasJmpTo target) bbs2) == 1 + isSuitable _ = False + +mergeTerminators :: FuncOptimisation +mergeTerminators (IRFunc rt name al bbs sid) = IRFunc rt name al bbs' sid + where + bbs' = flip map bbs $ \bb@(BB bid inss term) -> case term of + IJmp i -> case find ((== i) . fst) singles of + Just (_, t) -> BB bid inss t + Nothing -> bb + _ -> bb + + singles = map (\(BB i _ t) -> (i, t)) $ filter (\(BB _ inss _) -> null inss) bbs + +looseJumps :: FuncOptimisation +looseJumps (IRFunc rt name al bbs sid) = IRFunc rt name al bbs' sid + where + bbs' = flip map bbs $ \bb@(BB bid inss term) -> case term of + IJmp i -> BB bid inss (IJmp (translate i)) + IJcc ct r1 r2 i j -> BB bid inss (IJcc ct r1 r2 (translate i) (translate j)) + _ -> bb + + translate i = fromMaybe i $ Map.lookup i transmap + + transmap = Map.fromList $ catMaybes $ flip map bbs $ \bb -> case bb of + BB bid [] (IJmp i) -> Just (bid, i) + _ -> Nothing + +removeUnusedBlocks :: FuncOptimisation +removeUnusedBlocks (IRFunc rt name al bbs sid) = IRFunc rt name al bbs' sid + where + bbs' = filter isReachable bbs + + isReachable :: BB -> Bool + isReachable (BB bid _ _) + | bid == sid = True + | otherwise = isJust $ flip find bbs $ \(BB _ _ term) -> case term of + IJcc _ _ _ i1 i2 -> i1 == bid || i2 == bid + IJmp i -> i == bid + _ -> False + +identityOps :: FuncOptimisation +identityOps (IRFunc rt name al bbs sid) = IRFunc rt name al (map go bbs) sid + where + go :: BB -> BB + go (BB bid inss term) = BB bid (catMaybes $ map goI inss) term + + goI :: IRIns -> Maybe IRIns + goI (IAri AAdd _ (Constant _ 0)) = Nothing + goI (IAri ASub _ (Constant _ 0)) = Nothing + goI (IAri AMul _ (Constant _ 1)) = Nothing + goI (IAri ADiv _ (Constant _ 1)) = Nothing + goI i = Just i + +constantPropagate :: FuncOptimisation +constantPropagate (IRFunc rt name al bbs sid) = IRFunc rt name al bbs' sid + where + alltemps = findAllTemps' bbs + consttemps = catMaybes $ flip map alltemps $ \ref -> + let locs = findMutations' bbs ref + loc = head locs + ins = insAt bbs loc + + isIMov (IMov _ _) = True + isIMov _ = False + in {-trace ("Muts of " ++ show ref ++ ": " ++ show locs ++ ": " ++ + show (map (insAt bbs) locs)) $-} + if length locs == 1 && isIMov ins + then Just (loc, ins) + else Nothing + + bbs' = case consttemps of + [] -> bbs + ((loc, IMov ref value) : _) -> + replaceRefsBBList ref value (nopifyInsAt bbs loc) + _ -> undefined + +removeNops :: FuncOptimisation +removeNops (IRFunc rt name al bbs sid) = + IRFunc rt name al (map go bbs) sid + where + go (BB bid inss term) = BB bid (filter (not . isNop) inss) term + isNop INop = True + isNop _ = False + +movPush :: FuncOptimisation +movPush (IRFunc rt name al bbs sid) = IRFunc rt name al bbs' sid + where + bbs' = map goBB bbs + + goBB :: BB -> BB + goBB (BB bid inss term) = BB bid (go inss) term + + go :: [IRIns] -> [IRIns] + go [] = [] + go (ins@(IMov d _) : rest) | isJust (find (== d) (findAllRefsInss rest)) = push ins rest + go (ins : rest) = ins : go rest + + push :: IRIns -> [IRIns] -> [IRIns] + push mov [] = [mov] + push mov@(IMov d s) (ins@(IMov d' s') : rest) + | d' == d = if s' == d then push mov rest else push ins rest + | otherwise = replaceRefsIns d s ins : push mov rest + push mov@(IMov d s) (ins@(IResize d' s') : rest) + | d' == d = if s' == d then push mov rest else push ins rest + | otherwise = replaceRefsIns d s ins : push mov rest + push mov@(IMov d s) (ins@(ILoad d' _) : rest) + | d' == d = mov : ins : go rest + | otherwise = replaceRefsIns d s ins : push mov rest + push mov@(IMov d s) (ins@(IAri at d' s') : rest) + | d' == d = case (s, s') of + (Constant sza a, Constant szb b) + | sza == szb -> push (IMov d (Constant sza $ evaluateArith at a b)) rest + | otherwise -> error $ "Inconsistent sizes in " ++ show mov ++ "; " ++ show ins + _ -> mov : ins : go rest + | otherwise = replaceRefsIns d s ins : push mov rest + push mov@(IMov d s) (ins@(ICallr d' _ _) : rest) + | d' == d = mov : ins : go rest + | otherwise = replaceRefsIns d s ins : push mov rest + push mov@(IMov d s) (ins@(IStore _ _) : rest) = replaceRefsIns d s ins : push mov rest + push mov@(IMov d s) (ins@(ICall _ _) : rest) = replaceRefsIns d s ins : push mov rest + push mov (ins@INop : rest) = ins : push mov rest + push _ _ = undefined + +evaluateInstructions :: FuncOptimisation +evaluateInstructions (IRFunc rt name al bbs sid) = IRFunc rt name al (map goBB bbs) sid + where + goBB :: BB -> BB + goBB (BB bid inss term) = BB bid (map goI inss) term + + goI :: IRIns -> IRIns + goI (IResize ref (Constant _ v)) = IMov ref $ Constant (refSize ref) $ truncValue (refSize ref) v + goI ins = ins + + truncValue :: Size -> Value -> Value + truncValue sz v = fromIntegral $ (fromIntegral v :: Integer) `mod` (2 ^ (8 * sz)) + +evaluateTerminators :: FuncOptimisation +evaluateTerminators (IRFunc rt name al bbs sid) = IRFunc rt name al bbs' sid + where + bbs' = map (\(BB bid inss term) -> BB bid inss (go term)) bbs + + go :: IRTerm -> IRTerm + go term@(IJcc ct (Constant sza a) (Constant szb b) i1 i2) + | sza /= szb = error $ "Inconsistent sizes in " ++ show term + | evaluateCmp ct a b = IJmp i1 + | otherwise = IJmp i2 + go term = term + + +insAt :: [BB] -> (Int, Int) -> IRIns +insAt bbs (i, j) = + let (BB _ inss _) = bbs !! i + in inss !! j + +nopifyInsAt :: [BB] -> (Int, Int) -> [BB] +nopifyInsAt bbs (i, j) = + let (pre, BB bid inss term : post) = splitAt i bbs + (ipre, _ : ipost) = splitAt j inss + in pre ++ BB bid (ipre ++ INop : ipost) term : post + +findMutations :: BB -> Ref -> [Int] +findMutations (BB _ inss _) ref = + catMaybes $ flip map (zip inss [0..]) $ \(ins, idx) -> case ins of + (IMov r _) | r == ref -> Just idx + (IAri _ r _) | r == ref -> Just idx + (ICallr r _ _) | r == ref -> Just idx + _ -> Nothing + +findMutations' :: [BB] -> Ref -> [(Int, Int)] +findMutations' bbs ref = + [(i, j) | (bb, i) <- zip bbs [0..], j <- findMutations bb ref] + +findAllRefs :: BB -> [Ref] +findAllRefs (BB _ inss _) = findAllRefsInss inss + +findAllRefsInss :: [IRIns] -> [Ref] +findAllRefsInss inss = uniq $ sort $ concatMap go inss + where + go (IMov a b) = [a, b] + go (IStore a b) = [a, b] + go (ILoad a b) = [a, b] + go (IAri _ a b) = [a, b] + go (ICall _ al) = al + go (ICallr a _ al) = a : al + go (IResize a b) = [a, b] + go INop = [] + +-- findAllRefs' :: [BB] -> [Ref] +-- findAllRefs' = uniq . sort . concatMap findAllRefs + +findAllTemps :: BB -> [Ref] +findAllTemps bb = flip filter (findAllRefs bb) $ \ref -> case ref of + (Temp _ _) -> True + _ -> False + +findAllTemps' :: [BB] -> [Ref] +findAllTemps' = concatMap findAllTemps diff --git a/Pretty.hs b/Pretty.hs new file mode 100644 index 0000000..f2f36d8 --- /dev/null +++ b/Pretty.hs @@ -0,0 +1,14 @@ +module Pretty where + +import Data.List + + +class Pretty a where + pretty :: a -> String + pretty = prettyI 0 + + prettyI :: Int -> a -> String + + +instance Pretty a => Pretty [a] where + prettyI i l = "[" ++ intercalate ", " (map (prettyI i) l) ++ "]" diff --git a/ProgramParser.hs b/ProgramParser.hs new file mode 100644 index 0000000..2cacaf5 --- /dev/null +++ b/ProgramParser.hs @@ -0,0 +1,269 @@ +module ProgramParser(parseProgram) where + +import Control.Monad +import Data.Char +import Text.Parsec +import qualified Text.Parsec.Expr as E + +import AST +import Defs + + +type Parser = Parsec String () + + +parseProgram :: String -> Either String Program +parseProgram s = case parse pProgram "" s of + Left err -> Left $ show err + Right p -> Right p + +pProgram :: Parser Program +pProgram = do + pWhiteComment + decls <- many pDecl + eof + return $ Program (lefts decls) (rights decls) + +pDecl :: Parser (Either DVar DFunc) +pDecl = (Right <$> pDFunc) <|> (Left <$> pDVar) + +pDFunc :: Parser DFunc +pDFunc = do + symbol "func" + rt <- (Just <$> pType) <|> return Nothing + n <- pName + symbol "(" + args <- sepBy pTypeAndName (symbol ",") + symbol ")" + body <- pBlock + return $ DFunc rt n args body + +pDVar :: Parser DVar +pDVar = do + t <- pType + n <- pName + symbol ":=" + e <- pExpression + symbol ";" + return $ DVar t n e + +pTypeAndName :: Parser (Type, Name) +pTypeAndName = (,) <$> pType <*> pName + +pType :: Parser Type +pType = do + t <- pBasicType + (do + symbol "[" + msz <- optionMaybe pInteger + symbol "]" + return $ TArr t msz) <|> return t + +pBasicType :: Parser Type +pBasicType = (symbol "int" >> return TInt) <|> (symbol "char" >> return TChar) + +pBlock :: Parser Block +pBlock = do + symbol "{" + body <- many pStatement + symbol "}" + return $ Block body + +pStatement :: Parser Statement +pStatement = pSIf <|> pSWhile <|> pSReturn <|> pSDecl <|> pSAs <|> pSExpr + +pSDecl :: Parser Statement +pSDecl = do + (t, n) <- try $ do + t <- pType + n <- pName + symbol ":=" + return (t, n) + e <- pExpression + symbol ";" + return $ SDecl t n e + +pSAs :: Parser Statement +pSAs = do + n <- try $ pAsExpression <* symbol "=" + e <- pExpression + symbol ";" + return $ SAs n e + +pSIf :: Parser Statement +pSIf = do + symbol "if" + symbol "(" + cond <- pExpression + symbol ")" + bl1 <- pBlock + bl2 <- try (symbol "else" >> pBlock) <|> return (Block []) + return $ SIf cond bl1 bl2 + +pSWhile :: Parser Statement +pSWhile = do + symbol "while" + symbol "(" + cond <- pExpression + symbol ")" + bl <- pBlock + return $ SWhile cond bl + +pSReturn :: Parser Statement +pSReturn = do + symbol "return" + SReturn <$> ((symbol ";" >> return Nothing) <|> + ((Just <$> pExpression) <* symbol ";")) + +pSExpr :: Parser Statement +pSExpr = do + e <- pExpression + symbol ";" + return $ SExpr e + +pExpression :: Parser Expression +pExpression = E.buildExpressionParser optable litparser + where + optable = + [[E.Infix (symbol "**" >> return (mkEBin BOPow)) E.AssocRight], + [E.Infix (symbol "*" >> return (mkEBin BOMul)) E.AssocLeft, + E.Infix (symbol "/" >> return (mkEBin BODiv)) E.AssocLeft, + E.Infix (symbol "%" >> return (mkEBin BOMod)) E.AssocLeft], + [E.Infix (symbol "+" >> return (mkEBin BOAdd)) E.AssocLeft, + E.Infix (symbol "-" >> return (mkEBin BOSub)) E.AssocLeft], + [E.Infix (symbol ">=" >> return (mkEBin BOGeq)) E.AssocNone, + E.Infix (symbol "<=" >> return (mkEBin BOLeq)) E.AssocNone, + E.Infix (symbol ">" >> return (mkEBin BOGt)) E.AssocNone, + E.Infix (symbol "<" >> return (mkEBin BOLt)) E.AssocNone, + E.Infix (symbol "==" >> return (mkEBin BOEq)) E.AssocNone, + E.Infix (symbol "!=" >> return (mkEBin BONeq)) E.AssocNone], + [E.Infix (symbol "&&" >> return (mkEBin BOAnd)) E.AssocLeft], + [E.Infix (symbol "||" >> return (mkEBin BOOr)) E.AssocLeft]] + + mkEBin :: BinaryOp -> Expression -> Expression -> Expression + mkEBin bo a b = EBin bo a b Nothing + + mkELit :: Literal -> Expression + mkELit l = ELit l Nothing + + litparser :: Parser Expression + litparser = do + pops <- many pPrefixOp + e <- pParenExpr <|> pENew <|> pCastExpr <|> (mkELit <$> pLiteral) + subs <- many $ between (symbol "[") (symbol "]") pExpression + let e' = foldl (\ex sub -> ESubscript ex sub Nothing) e subs + e'' = foldl (\ex pop -> EUn pop ex Nothing) e' pops + return e'' + +pAsExpression :: Parser AsExpression +pAsExpression = do + n <- pName + subs <- many $ between (symbol "[") (symbol "]") pExpression + return $ foldl (\ae expr -> AESubscript ae expr Nothing) (AEVar n Nothing) subs + +pPrefixOp :: Parser UnaryOp +pPrefixOp = (symbol "!" >> return UONot) <|> + (symbol "-" >> return UONeg) + +pParenExpr :: Parser Expression +pParenExpr = do + symbol "(" + e <- pExpression + symbol ")" + return e + +pCastExpr :: Parser Expression +pCastExpr = do + t <- try $ pType <* symbol "(" + e <- pExpression + symbol ")" + return $ ECast t e + +pENew :: Parser Expression +pENew = do + symbol "new" + t <- pBasicType + symbol "[" + e <- pExpression + symbol "]" + return $ ENew t e + +pLiteral :: Parser Literal +pLiteral = (LInt <$> pInteger) <|> (LChar <$> pCharLit) <|> pLCall <|> (LVar <$> pName) + +pCharLit :: Parser Char +pCharLit = do + void $ char '\'' + c <- pStringChar + void $ char '\'' + pWhiteComment + return c + +pStringChar :: Parser Char +pStringChar = + (char '\\' >> ((char 'n' >> return '\n') <|> + (char 'r' >> return '\r') <|> + (char 't' >> return '\t') <|> + (char '0' >> return '\0') <|> + (char 'x' >> pHexDigit >>= \a -> pHexDigit >>= \b -> return (chr $ 16 * a + b)))) <|> + anyToken + where + pHexDigit :: Parser Int + pHexDigit = (subtract 48 . fromEnum <$> digit) + <|> ((+ (10 - 97)) . ord <$> oneOf "abcdef") + <|> ((+ (10 - 65)) . ord <$> oneOf "ABCDEF") + +pLCall :: Parser Literal +pLCall = do + n <- try $ pName <* symbol "(" + al <- sepBy pExpression (symbol ",") + symbol ")" + return $ LCall n al + + +pName :: Parser Name +pName = do + c0 <- satisfy (\c -> isAlpha c || c == '_') + cr <- many $ satisfy (\c -> isAlpha c || isDigit c || c == '_') + pWhiteComment + return $ c0 : cr + +pInteger :: Parser Integer +pInteger = read <$> many1 (satisfy isDigit) <* pWhiteComment + + +symbol :: String -> Parser () +symbol "" = error "symbol \"\"" +symbol s = try $ do + void $ string s + when (isAlpha (last s)) $ void $ notFollowedBy (satisfy isAlpha) + when (isDigit (last s)) $ void $ notFollowedBy (satisfy isDigit) + pWhiteComment + +pWhiteComment :: Parser () +pWhiteComment = void $ pWhite >> endBy pComment pWhite + +pWhite :: Parser () +pWhite = void $ many (oneOf " \t\n") + +pComment :: Parser () +pComment = pLineComment <|> pBlockComment + +pLineComment :: Parser () +pLineComment = do + void $ try $ string "//" + void $ many (satisfy (/= '\n')) + eof <|> void (char '\n') + +pBlockComment :: Parser () +pBlockComment = do + void $ try $ string "/*" + void $ sepEndBy (manyTill anyToken (lookAhead $ try (string "/*") <|> try (string "*/"))) + pBlockComment + void $ string "*/" + +lefts :: [Either a b] -> [a] +lefts = foldr (\e l -> either (:l) (const l) e) [] + +rights :: [Either a b] -> [b] +rights = foldr (\e l -> either (const l) (:l) e) [] diff --git a/RegAlloc.hs b/RegAlloc.hs new file mode 100644 index 0000000..d2b1717 --- /dev/null +++ b/RegAlloc.hs @@ -0,0 +1,87 @@ +module RegAlloc(regalloc, Allocation(..)) where + +import Data.Function +import Data.List +import Data.Maybe +import qualified Data.Map.Strict as Map +import Debug.Trace + +import Utils + + +-- Follows the Linear Scan Register Allocation algorithm specified in: +-- https://www.cs.purdue.edu/homes/suresh/502-Fall2008/papers/linear-scan.pdf + +-- Test case: +-- regalloc [((1, 3), "a"), ((2, 2), "x"), ((2, 10), "b"), ((3, 7), "c"), ((4, 5), "d")] ["ra", "rb"] + + +data Allocation a = AllocReg a | AllocMem + deriving Show + +type Interval = (Int, Int) + +data State a = State {stActive :: [Int], stAlloc :: [Allocation a], stFreeRegs :: [a]} + deriving Show + +regalloc :: (Show a, Show b, Ord a, Ord b) + => [(Interval, b)] -- [(live interval, name of variable)] + -> [a] -- the available registers + -> [(b, b)] -- pairs to be allocated to the same register if possible + -> Map.Map b (Allocation a) -- allocation map +regalloc vars' regs aliaspairs = + let foldfunc = \st' ((int, name), index) -> + let st = expireOldIntervals st' int + wanted = findWantedAliases aliaspairs name + wantedregs = uniq $ sort $ catMaybes $ flip map wanted $ \n -> + case findIndex (== n) intnames of + Nothing -> Nothing + Just idx | idx >= length (stAlloc st) -> Nothing + | otherwise -> case stAlloc st !! idx of + AllocMem -> Nothing + AllocReg r -> Just r + in if length (stActive st) == length regs + then spillAtInterval st index + else let -- ([regchoice], fr) = splitAt 1 (stFreeRegs st) + (regchoice, fr) = case find (`elem` wantedregs) (stFreeRegs st) of + Nothing -> (head (stFreeRegs st), tail (stFreeRegs st)) + Just wr -> trace ("Pair-allocated " ++ show name ++ " in " ++ show wr) $ + (wr, stFreeRegs st \\ [wr]) + allocrev = stAlloc st ++ [AllocReg regchoice] + active = sortBy (compare `on` snd . (ints !!)) $ index : stActive st + in State active allocrev fr + in Map.fromList $ zip intnames $ stAlloc $ + foldl foldfunc (State [] [] regs) (zip vars [0..]) + where + vars = sortBy (compare `on` fst . fst) vars' + (ints, intnames) = (map fst vars, map snd vars) + + expireOldIntervals :: State a -> Interval -> State a + expireOldIntervals st (intstart, _) = + let (dropped, active) = span ((< intstart) . snd . (ints !!)) (stActive st) + fr = selectAllocRegs (map (stAlloc st !!) dropped) ++ stFreeRegs st + in State active (stAlloc st) fr + + spillAtInterval :: State a -> Int -> State a + spillAtInterval st index = + let spill = last (stActive st) + in if snd (ints !! spill) > snd (ints !! index) + then let alloc = setAt spill AllocMem (stAlloc st) ++ [stAlloc st !! spill] + active = sortBy (compare `on` snd . (ints !!)) $ + index : (stActive st \\ [spill]) + in State active alloc (stFreeRegs st) + else State (stActive st) (stAlloc st ++ [AllocMem]) (stFreeRegs st) + + findWantedAliases :: (Ord b) => [(b, b)] -> b -> [b] + findWantedAliases pairs x = + uniq $ sort $ map snd (filter ((== x) . fst) pairs) ++ map fst (filter ((== x) . snd) pairs) + +selectAllocRegs :: [Allocation a] -> [a] +selectAllocRegs allocs = catMaybes $ flip map allocs $ \alloc -> case alloc of + (AllocReg r) -> Just r + AllocMem -> Nothing + +setAt :: Int -> a -> [a] -> [a] +setAt idx v l = + let (pre, _ : post) = splitAt idx l + in pre ++ v : post diff --git a/ReplaceRefs.hs b/ReplaceRefs.hs new file mode 100644 index 0000000..3ab73c3 --- /dev/null +++ b/ReplaceRefs.hs @@ -0,0 +1,36 @@ +module ReplaceRefs + (replaceRefsIns, replaceRefsTerm, replaceRefsBB, replaceRefsBBList) + where + +import Intermediate + + +replaceRefsIns :: Ref -> Ref -> IRIns -> IRIns +replaceRefsIns from to (IMov a b) = IMov (trans from to a) (trans from to b) +replaceRefsIns from to (IStore a b) = IStore (trans from to a) (trans from to b) +replaceRefsIns from to (ILoad a b) = ILoad (trans from to a) (trans from to b) +replaceRefsIns from to (IAri at a b) = IAri at (trans from to a) (trans from to b) +replaceRefsIns from to (ICall n al) = ICall n (map (trans from to) al) +replaceRefsIns from to (ICallr a n al) = ICallr (trans from to a) n (map (trans from to) al) +replaceRefsIns from to (IResize a b) = IResize (trans from to a) (trans from to b) +replaceRefsIns _ _ INop = INop + +replaceRefsTerm :: Ref -> Ref -> IRTerm -> IRTerm +replaceRefsTerm from to (IJcc ct a b i1 i2) = IJcc ct (trans from to a) (trans from to b) i1 i2 +replaceRefsTerm _ _ (IJmp i) = IJmp i +replaceRefsTerm _ _ IRet = IRet +replaceRefsTerm from to (IRetr a) = IRetr (trans from to a) +replaceRefsTerm _ _ ITermNone = ITermNone + +replaceRefsBB :: Ref -> Ref -> BB -> BB +replaceRefsBB from to (BB bid inss term) = + BB bid (map (replaceRefsIns from to) inss) (replaceRefsTerm from to term) + +replaceRefsBBList :: Ref -> Ref -> [BB] -> [BB] +replaceRefsBBList from to bbs = map (\bb -> replaceRefsBB from to bb) bbs + + +trans :: Ref -> Ref -> Ref -> Ref +trans from to ref + | ref == from = to + | otherwise = ref diff --git a/TypeCheck.hs b/TypeCheck.hs new file mode 100644 index 0000000..36d98c8 --- /dev/null +++ b/TypeCheck.hs @@ -0,0 +1,234 @@ +module TypeCheck(typeCheck) where + +import Control.Monad +import Data.Maybe +import qualified Data.Map.Strict as Map + +import AST +import Defs +import Pretty +import TypeRules + + +data DBItem = DBVar Type | DBFunc (Maybe Type) [Type] + +type TypeDB = [Map.Map Name DBItem] + +dbFind :: TypeDB -> Name -> Maybe DBItem +dbFind db name = findJust $ map (Map.lookup name) db + where findJust [] = Nothing + findJust (Just x:_) = Just x + findJust (Nothing:l) = findJust l + +dbFindTop :: TypeDB -> Name -> Maybe DBItem +dbFindTop [] _ = error "dbFindTop on empty scope stack" +dbFindTop (m:_) name = Map.lookup name m + +dbSet :: TypeDB -> Name -> DBItem -> TypeDB +dbSet [] _ _ = error "dbSet on empty scope stack" +dbSet (m:ms) name val = Map.insert name val m : ms + +emptyDB :: TypeDB +emptyDB = [Map.fromList + [("putc", DBFunc Nothing [TChar]), + ("putint", DBFunc Nothing [TInt]), + ("getc", DBFunc (Just TInt) []), + ("_builtin_malloc", DBFunc (Just $ TArr TChar Nothing) [TInt])]] + +withScope :: TypeDB -> (TypeDB -> a) -> a +withScope db f = f (Map.empty : db) + + +typeCheck :: Program -> Error Program +typeCheck (Program vars funcs) = do + db <- foldM registerDVar emptyDB vars + >>= \db' -> foldM registerDFunc db' funcs + vars' <- mapM (annotateDVar db) vars + funcs' <- mapM (annotateDFunc db) funcs + return $ Program vars' funcs' + + +registerDVar :: TypeDB -> DVar -> Error TypeDB +registerDVar db (DVar t n _) = case dbFind db n of + Nothing -> return $ dbSet db n (DBVar t) + Just _ -> Left $ "Duplicate top-level name '" ++ n ++ "'" + +registerDFunc :: TypeDB -> DFunc -> Error TypeDB +registerDFunc db (DFunc rt n al _) = case dbFind db n of + Nothing -> return $ dbSet db n (DBFunc rt (map fst al)) + Just _ -> Left $ "Duplicate top-level name '" ++ n ++ "'" + + +annotateDVar :: TypeDB -> DVar -> Error DVar +annotateDVar db (DVar toptype name expr) = do + expr' <- annotateExpr db expr + when (isNothing (typeof expr')) $ + Left $ "Cannot assign void value in global declaration of " ++ name + let typ = fromJust $ typeof expr' + if canCoerce typ toptype + then return $ DVar toptype name expr' + else Left $ "Cannot assign a value of type " ++ pretty typ ++ + " to a variable of type " ++ pretty toptype + +annotateDFunc :: TypeDB -> DFunc -> Error DFunc +annotateDFunc db dfunc@(DFunc rettype name arglist block) = do + when (name == "main" && rettype /= Just TInt) $ + Left $ "Function 'main' should return an int" + let db' = foldl registerArg db arglist + block' <- annotateBlock dfunc db' block + return $ DFunc rettype name arglist block' + where + registerArg :: TypeDB -> (Type, Name) -> TypeDB + registerArg db' (t, n) = dbSet db' n (DBVar t) + +annotateBlock :: DFunc -> TypeDB -> Block -> Error Block +annotateBlock dfunc db (Block sts) = + Block . snd <$> foldM (\(db', l) st -> + (\(db'', st') -> (db'', l ++ [st'])) <$> annotateStatement dfunc db' st) + (db, []) sts + +annotateStatement :: DFunc -> TypeDB -> Statement -> Error (TypeDB, Statement) +annotateStatement _ db (SDecl toptype name expr) = do + expr' <- annotateExpr db expr + when (isNothing (typeof expr')) $ + Left $ "Cannot assign void value in declaration of " ++ name + when (isJust (dbFindTop db name)) $ + Left $ "Duplicate declaration of variable " ++ name + let typ = fromJust $ typeof expr' + if canCoerce typ toptype + then return $ (dbSet db name (DBVar toptype), SDecl toptype name expr') + else Left $ "Cannot assign a value of type " ++ pretty typ ++ + " to a variable of type " ++ pretty toptype +annotateStatement _ db (SAs ae expr) = do + ae' <- annotateAsExpr db ae + expr' <- annotateExpr db expr + when (isNothing (typeof expr')) $ + Left $ "Cannot assign a void value in assignment of " ++ pretty ae + let typ = fromJust $ typeof expr' + let aetyp = fromJust $ typeof ae' + when (not $ isBasicType aetyp) $ + Left $ "Cannot assign to a location of type " ++ pretty aetyp + when (not $ canCoerce typ aetyp) $ + Left $ "Cannot assign a value of type " ++ pretty typ ++ + " to a location of type " ++ pretty aetyp + return (db, SAs ae' expr') +annotateStatement dfunc db (SIf expr bl1 bl2) = do + expr' <- annotateExpr db expr + when (isNothing (typeof expr')) $ + Left $ "Cannot use void value in 'if' condition" + when (not $ canCoerce (fromJust (typeof expr')) TInt) $ + Left $ "Cannot use type " ++ pretty (fromJust (typeof expr')) ++ " in 'if' condition" + bl1' <- withScope db $ flip (annotateBlock dfunc) bl1 + bl2' <- withScope db $ flip (annotateBlock dfunc) bl2 + return (db, SIf expr' bl1' bl2') +annotateStatement dfunc db (SWhile expr bl) = do + expr' <- annotateExpr db expr + when (isNothing (typeof expr')) $ + Left $ "Cannot use void value in 'while' condition" + when (not $ canCoerce (fromJust (typeof expr')) TInt) $ + Left $ "Cannot use type " ++ pretty (fromJust (typeof expr')) ++ " in 'while' condition" + bl' <- withScope db $ flip (annotateBlock dfunc) bl + return (db, SWhile expr' bl') +annotateStatement (DFunc Nothing _ _ _) db (SReturn Nothing) = + return (db, SReturn Nothing) +annotateStatement (DFunc (Just _) _ _ _) _ (SReturn Nothing) = + Left "Cannot return void value from non-void function" +annotateStatement (DFunc mrt _ _ _) db (SReturn (Just expr)) = do + expr' <- annotateExpr db expr + case mrt of + Nothing -> Left $ "Cannot return non-void value from void function" + Just rt -> do + when (isNothing (typeof expr')) $ + Left $ "Cannot use void value in 'return'" + when (not $ canCoerce (fromJust (typeof expr')) rt) $ + Left $ "Cannot coerce type " ++ pretty (fromJust (typeof expr')) ++ " to " ++ pretty rt ++ + " in 'return'" + return (db, SReturn (Just expr')) +annotateStatement _ db (SExpr expr) = (\expr' -> (db, SExpr expr')) <$> annotateExpr db expr + +annotateExpr :: TypeDB -> Expression -> Error Expression +annotateExpr db (EBin bo e1 e2 _) = do + e1' <- annotateExpr db e1 + e2' <- annotateExpr db e2 + when (isNothing (typeof e1')) $ Left $ "Use of void value in expression: " ++ show e1' + when (isNothing (typeof e2')) $ Left $ "Use of void value in expression: " ++ show e2' + let t1 = fromJust $ typeof e1' + t2 = fromJust $ typeof e2' + rt <- let errval = Left $ "Operator " ++ pretty bo ++ " doesn't take" ++ + " arguments of types " ++ pretty t1 ++ " and " ++ + pretty t2 + in maybe errval return $ retTypeBO bo t1 t2 + return $ EBin bo e1' e2' (Just rt) +annotateExpr db (EUn uo e _) = do + e' <- annotateExpr db e + when (isNothing (typeof e')) $ Left "Use of void value in expression" + let t = fromJust $ typeof e' + rt <- let errval = Left $ "Unary operator " ++ pretty uo ++ " doesn't take" ++ + " an argument of type " ++ pretty t + in maybe errval return $ retTypeUO uo t + return $ EUn uo e' (Just rt) +annotateExpr _ (ELit lit@(LInt _) _) = return $ ELit lit (Just TInt) +annotateExpr _ (ELit lit@(LChar _) _) = return $ ELit lit (Just TChar) +annotateExpr db (ELit lit@(LVar n) _) = case dbFind db n of + Nothing -> Left $ "Use of undeclared variable " ++ n ++ " in expression" + Just (DBVar t) -> return $ ELit lit (Just t) + Just (DBFunc _ _) -> Left $ "Cannot use function " ++ n ++ " in expression" +annotateExpr db (ELit (LCall n as) _) = do + as' <- mapM (annotateExpr db) as + case dbFind db n of + Nothing -> Left $ "Use of undeclared function " ++ n + Just (DBVar t) -> Left $ "Cannot call variable of type " ++ pretty t + Just (DBFunc mrt ats) -> do + when (length as' /= length ats) $ + Left $ "Function '" ++ n ++ "' expected " ++ show (length ats) ++ + " arguments but got " ++ show (length as') + forM_ (zip as' ats) $ \(arg, at) -> do + when (isNothing (typeof arg)) $ + Left "Use of void value in function argument" + if canCoerce (fromJust $ typeof arg) at + then return () + else Left $ "Argument of " ++ n ++ " has type " ++ pretty at ++ + " but value of type " ++ pretty (fromJust $ typeof arg) ++ + " was given" + return $ ELit (LCall n as') mrt +annotateExpr db (ESubscript arr sub _) = do + arr' <- annotateExpr db arr + sub' <- annotateExpr db sub + let subtyp = fromJust (typeof sub') + when (subtyp /= TInt) $ + Left $ "Type of array subscript should be int, but is " ++ pretty subtyp + case fromJust (typeof arr') of + TArr et _ -> return $ ESubscript arr' sub' (Just et) + _ -> Left $ "Subscripted expression is not an array: " ++ pretty arr +annotateExpr db (ECast t e) = do + e' <- annotateExpr db e + let typ = fromJust (typeof e') + if canCast typ t + then return $ ECast t e' + else Left $ "Cannot cast value of type " ++ pretty typ ++ " to type " ++ pretty t +annotateExpr db e@(ENew t sze) = do + sze' <- annotateExpr db sze + case typeof sze' of + Nothing -> Left $ "Use of void value in array length in 'new' expression: " ++ pretty e + Just TInt -> return () + Just szet -> Left $ "Type of array length in 'new' expression should be int, is " ++ pretty szet + if isBasicType t + then return $ ENew t sze' + else Left $ "Can only allocate arrays of basic types using 'new': " ++ pretty e + +annotateAsExpr :: TypeDB -> AsExpression -> Error AsExpression +annotateAsExpr db (AEVar n _) = case dbFind db n of + Nothing -> Left $ "Use of undeclared variable " ++ n ++ " in assignment expression" + Just (DBVar t) -> return $ AEVar n (Just t) + Just (DBFunc _ _) -> Left $ "Cannot use function " ++ n ++ " in assignment expression" +annotateAsExpr db (AESubscript ae expr _) = do + ae' <- annotateAsExpr db ae + expr' <- annotateExpr db expr + case typeof expr' of + Nothing -> Left $ "Use of void value in array index in assignment expression" + Just TInt -> return () + Just t -> Left $ "Use of non-int type " ++ pretty t ++ " in array index in assignment expression" + case fromJust (typeof ae') of + TArr t _ -> return $ AESubscript ae' expr' (Just t) + t -> Left $ "Indexed expression '" ++ pretty ae ++ "' has non-array type " ++ pretty t ++ + " in assignment expression" diff --git a/TypeRules.hs b/TypeRules.hs new file mode 100644 index 0000000..a3e7678 --- /dev/null +++ b/TypeRules.hs @@ -0,0 +1,61 @@ +module TypeRules where + +import AST + + +canCoerce :: Type -> Type -> Bool +canCoerce t1 t2 | t1 == t2 = True +canCoerce (TArr t1 (Just _)) (TArr t2 Nothing) | t1 == t2 = True +canCoerce _ _ = False + +canCast :: Type -> Type -> Bool +canCast t1 t2 | canCoerce t1 t2 = True +canCast t1 t2 | isIntegralType t1 && isIntegralType t2 = True +canCast _ _ = False + +isBasicType :: Type -> Bool +isBasicType = isIntegralType + +isIntegralType :: Type -> Bool +isIntegralType TInt = True +isIntegralType TChar = True +isIntegralType _ = False + +isSimpleArithBO :: BinaryOp -> Bool +isSimpleArithBO = flip elem [BOAdd, BOSub, BOMul, BODiv, BOMod] + +isBoolBO :: BinaryOp -> Bool +isBoolBO = flip elem [BOAnd, BOOr] + +isCompareBO :: BinaryOp -> Bool +isCompareBO = flip elem [BOEq, BONeq, BOGt, BOLt, BOGeq, BOLeq] + +retTypeBO :: BinaryOp -> Type -> Type -> Maybe Type +retTypeBO bo TInt TInt | isSimpleArithBO bo = Just TInt +retTypeBO bo TChar TChar | isSimpleArithBO bo = Just TChar +retTypeBO bo TInt TInt | isBoolBO bo = Just TInt +retTypeBO bo TInt TInt | isCompareBO bo = Just TInt +retTypeBO bo TChar TChar | isCompareBO bo = Just TInt +retTypeBO _ _ _ = Nothing +-- retTypeBO bo t1 t2 = error $ "retTypeBO " ++ show bo ++ " " ++ show t1 ++ " " ++ show t2 + +retTypeUO :: UnaryOp -> Type -> Maybe Type +retTypeUO UONot TInt = Just TInt +retTypeUO UONeg TInt = Just TInt +retTypeUO UONeg TChar = Just TChar +retTypeUO _ _ = Nothing + +class TypeOf a where + typeof :: a -> Maybe Type + +instance TypeOf Expression where + typeof (EBin _ _ _ mt) = mt + typeof (EUn _ _ mt) = mt + typeof (ELit _ mt) = mt + typeof (ESubscript _ _ mt) = mt + typeof (ECast t _) = Just t + typeof (ENew t _) = Just $ TArr t Nothing + +instance TypeOf AsExpression where + typeof (AEVar _ mt) = mt + typeof (AESubscript _ _ mt) = mt diff --git a/Utils.hs b/Utils.hs new file mode 100644 index 0000000..65bb651 --- /dev/null +++ b/Utils.hs @@ -0,0 +1,7 @@ +module Utils where + + +uniq :: Eq a => [a] -> [a] +uniq (a:b:cs) | a == b = uniq (b:cs) + | otherwise = a : uniq (b:cs) +uniq l = l diff --git a/Verify.hs b/Verify.hs new file mode 100644 index 0000000..18f90ce --- /dev/null +++ b/Verify.hs @@ -0,0 +1,28 @@ +module Verify(verify) where + +import AST +import Defs +import Intermediate + + +verify :: IRProgram -> Error IRProgram +verify (IRProgram vars funcs) = IRProgram <$> mapM verifyVar vars <*> mapM verifyFunc funcs + +verifyVar :: DVar -> Error DVar +verifyVar dvar@(DVar _ n e) = case e of + ELit _ _ -> return dvar + _ -> Left $ "Initialisation of global variable " ++ n ++ " is not a literal" + +verifyFunc :: IRFunc -> Error IRFunc +verifyFunc irfunc@(IRFunc mrt name _ bbs _) = + let terms = map (\(BB _ _ term) -> term) bbs + (nret', nretr') = unzip $ flip map terms $ \term -> case term of + IRet -> (1, 0) + IRetr _ -> (0, 1) + _ -> (0, 0) + (nret, nretr) = (sum nret', sum nretr') :: (Int, Int) + in case (mrt, nret, nretr) of + (Nothing, _, 0) -> return irfunc + (Just _, 0, _) -> return irfunc + (Nothing, _, _) -> Left $ "Some code paths of void function '" ++ name ++ "' return a value" + (Just _, _, _) -> Left $ "Not all code paths of non-void function '" ++ name ++ "' return a value" @@ -0,0 +1,275 @@ +module X64 where + +import Control.Monad +import Data.Char +import Data.Int +import Data.List +import Data.Maybe + + +type Offset = Int64 + +class Stringifiable a where + stringify :: a -> String + +data Register = RAX | RBX | RCX | RDX | RSI | RDI | RSP | RBP | R8 | R9 | R10 | R11 | R12 | R13 | R14 | R15 + deriving (Show, Eq, Ord, Enum, Bounded) + +data XRef = XReg Int Register | XMem Int (Maybe Register) (Int, Register) (Maybe String) Offset | XImm Offset + deriving (Show, Eq) + +newtype Reg = Reg XRef deriving (Show, Eq) +newtype Mem = Mem XRef deriving (Show, Eq) +newtype Imm = Imm XRef deriving (Show, Eq) +newtype RegMem = RegMem XRef deriving (Show, Eq) +newtype RegMemImm = RegMemImm XRef deriving (Show, Eq) + +data CondCode = CCA | CCAE | CCB | CCBE | CCC | CCE | CCG | CCGE | CCL | CCLE | CCNA | CCNAE | CCNB | CCNBE | CCNC | CCNE | CCNG | CCNGE | CCNL | CCNLE + deriving (Show, Eq) + +data Ins + = MOV RegMem RegMem | MOVi RegMem Imm | MOVi64 Reg Imm + | MOVSX Reg RegMem + | ADD RegMem RegMemImm + | SUB RegMem RegMemImm + | AND RegMem RegMemImm + | OR RegMem RegMemImm + | XOR RegMem RegMemImm + | IMULDA RegMem | IMUL Reg RegMem | IMUL3 Reg RegMem Imm + | MULDA RegMem + | IDIVDA RegMem + | DIVDA RegMem + | CMP RegMem RegMem | CMPi RegMem Imm + | SETCC CondCode RegMem + | CALL String + | PUSH RegMemImm + | POP RegMem + | JMP String + | JCC CondCode String + | RET + deriving (Show, Eq) + +data Asm = Asm [(String, [Ins])] + deriving (Show, Eq) + + +class XRefSub a where + xref :: XRef -> a + +instance XRefSub Reg where + xref x@(XReg _ _) = Reg x + xref _ = undefined +instance XRefSub Mem where + xref x@(XMem _ _ _ _ _) = Mem x + xref _ = undefined +instance XRefSub Imm where + xref x@(XImm _) = Imm x + xref _ = undefined +instance XRefSub RegMem where + xref x@(XReg _ _) = RegMem x + xref x@(XMem _ _ _ _ _) = RegMem x + xref _ = undefined +instance XRefSub RegMemImm where + xref x = RegMemImm x + + +verify :: Asm -> Either String () +verify (Asm funcs) = mapM_ (\(_, inss) -> mapM_ goI inss) funcs + where + goI :: Ins -> Either String () + goI (MOV (RegMem a) (RegMem b)) = ckRegMem a >> ckRegMem b >> ck2mem a b >> ckSizes a b + goI (MOVi (RegMem a) (Imm b)) = ckRegMem a >> ckImm b >> ckSizes a b + goI (MOVi64 (Reg a) (Imm b)) = ckReg a >> ckImm b >> ckSizes64 a b + goI (MOVSX (Reg a) (RegMem b)) = ckReg a >> ckRegMem b >> ckMovsx a b + goI (ADD (RegMem a) (RegMemImm b)) = ckRegMem a >> ckRegMemImm b >> ck2mem a b >> ckSizes a b + goI (SUB (RegMem a) (RegMemImm b)) = ckRegMem a >> ckRegMemImm b >> ck2mem a b >> ckSizes a b + goI (AND (RegMem a) (RegMemImm b)) = ckRegMem a >> ckRegMemImm b >> ck2mem a b >> ckSizes a b + goI (OR (RegMem a) (RegMemImm b)) = ckRegMem a >> ckRegMemImm b >> ck2mem a b >> ckSizes a b + goI (XOR (RegMem a) (RegMemImm b)) = ckRegMem a >> ckRegMemImm b >> ck2mem a b >> ckSizes a b + goI (IMULDA (RegMem a)) = ckRegMem a + goI (IMUL (Reg a) (RegMem b)) = ckReg a >> ckRegMem b >> ck2mem a b >> ckSizes a b + goI (IMUL3 (Reg a) (RegMem b) (Imm c)) = ckReg a >> ckRegMem b >> ckImm c >> ckSizes a b >> ckSizes a c + goI (MULDA (RegMem a)) = ckRegMem a + goI (IDIVDA (RegMem a)) = ckRegMem a + goI (DIVDA (RegMem a)) = ckRegMem a + goI (CMP (RegMem a) (RegMem b)) = ckRegMem a >> ckRegMem b >> ck2mem a b >> ckSizes a b + goI (CMPi (RegMem a) (Imm b)) = ckRegMem a >> ckImm b >> ckSizes a b + goI (SETCC _ (RegMem a)) = ckRegMem a >> ckSizeEq 1 a + goI (CALL s) = when (null s) $ Left "Empty call target" + goI (PUSH (RegMemImm a)) = ckRegMemImm a + goI (POP (RegMem a)) = ckRegMem a + goI (JMP s) = when (null s) $ Left "Empty jump target" + goI (JCC _ s) = when (null s) $ Left "Empty jcc target" + goI RET = return () + + ckReg (XReg _ _) = return () + ckReg _ = Left "Argument is not a Reg" + + ckImm (XImm _) = return () + ckImm _ = Left "Argument is not an Imm" + + ckRegMem (XReg _ _) = return () + ckRegMem (XMem _ _ _ _ _) = return () + ckRegMem _ = Left "Argument is not a Reg or a Mem" + + ckRegMemImm _ = return () + + ck2mem x@(XMem _ _ _ _ _) y@(XMem _ _ _ _ _) = + Left $ "Invalid double-memory operands: " ++ show x ++ "; " ++ show y + ck2mem _ _ = return () + + ckSizes64 x@(XReg a _) y@(XReg b _) = + when (a /= b) $ Left $ "Inconsistent operand sizes: " ++ show x ++ "; " ++ show y + ckSizes64 x@(XReg a _) y@(XMem b _ _ _ _) = + when (a /= b) $ Left $ "Inconsistent operand sizes: " ++ show x ++ "; " ++ show y + ckSizes64 x@(XMem a _ _ _ _) y@(XReg b _) = + when (a /= b) $ Left $ "Inconsistent operand sizes: " ++ show x ++ "; " ++ show y + ckSizes64 x@(XMem a _ _ _ _) y@(XMem b _ _ _ _) = + when (a /= b) $ Left $ "Inconsistent operand sizes: " ++ show x ++ "; " ++ show y + ckSizes64 x@(XReg a _) y@(XImm v) = + when (not $ fitsIn v a) $ Left $ "Immediate too large: " ++ show x ++ "; " ++ show y + ckSizes64 x@(XMem a _ _ _ _) y@(XImm v) = + when (not $ fitsIn v a) $ Left $ "Immediate too large: " ++ show x ++ "; " ++ show y + ckSizes64 _ _ = undefined + + ckSizes a b@(XImm v) = when (v >= 2 ^ (32::Int)) (Left "Immediate too large") >> ckSizes64 a b + ckSizes a b = ckSizes64 a b + + ckMovsx (XReg a _) (XReg b _) = when (a <= b) $ Left "MOVSX with shrinking operands" + ckMovsx (XReg a _) (XMem b _ _ _ _) = when (a <= b) $ Left "MOVSX with shrinking operands" + ckMovsx (XMem a _ _ _ _) (XReg b _) = when (a <= b) $ Left "MOVSX with shrinking operands" + ckMovsx (XMem a _ _ _ _) (XMem b _ _ _ _) = when (a <= b) $ Left "MOVSX with shrinking operands" + ckMovsx _ _ = undefined + + ckSizeEq sz (XReg a _) = when (a /= sz) $ Left "Invalid operand size" + ckSizeEq sz (XMem a _ _ _ _) = when (a /= sz) $ Left "Invalid operand size" + ckSizeEq sz (XImm v) = when (v >= 2 ^ (8 * sz)) $ Left "Invalid operand size (immediate)" + + fitsIn v sz = (fromIntegral v :: Integer) < ((2 :: Integer) ^ (8 * sz)) + + +instance Stringifiable XRef where + stringify (XReg sz reg) = + let n = fromEnum reg + in case n of + _ | n < 4 -> erPrefix $ lxSuffix $ [chr (n + ord 'a')] + | n < 8 -> erPrefix $ lSuffix $ ["si", "di", "sp", "bp"] !! (n - 4) + | otherwise -> bwdSuffix $ 'r' : show n + where + erPrefix s = case sz of + 4 -> 'e' : s + 8 -> 'r' : s + _ -> s + lxSuffix s = case sz of + 1 -> s ++ "l" + _ -> s ++ "x" + lSuffix s = case sz of + 1 -> s ++ "l" + _ -> s + bwdSuffix s = case sz of + 1 -> s ++ "b" + 2 -> s ++ "w" + 4 -> s ++ "d" + _ -> s + + stringify (XMem _ _ (mult, _) _ _) | not (mult `elem` [0,1,2,4,8]) = + error $ "Register multiplier has invalid value " ++ show mult ++ " in XMem" + stringify (XMem sz mr pair lab off) = + let res = intercalate "+" $ catMaybes [goR1 mr, goPair pair, goLab lab, goOff off] + in szword sz ++ " " ++ if null res then "[0]" else "[" ++ res ++ "]" + where + szword 1 = "byte" + szword 2 = "word" + szword 4 = "dword" + szword 8 = "qword" + szword _ = undefined + goR1 Nothing = Nothing + goR1 (Just r) = Just $ stringify (XReg 8 r) + goPair (0, _) = Nothing + goPair (mult, r) = Just $ show mult ++ "*" ++ stringify (XReg 8 r) + goLab = id + goOff 0 = Nothing + goOff o = Just $ show o + + stringify (XImm imm) = show imm + +instance Stringifiable Reg where stringify (Reg x) = stringify x +instance Stringifiable Mem where stringify (Mem x) = stringify x +instance Stringifiable Imm where stringify (Imm x) = stringify x +instance Stringifiable RegMem where stringify (RegMem x) = stringify x +instance Stringifiable RegMemImm where stringify (RegMemImm x) = stringify x + +instance Stringifiable CondCode where + stringify CCA = "a" + stringify CCAE = "ae" + stringify CCB = "b" + stringify CCBE = "be" + stringify CCC = "c" + stringify CCE = "e" + stringify CCG = "g" + stringify CCGE = "ge" + stringify CCL = "l" + stringify CCLE = "le" + stringify CCNA = "na" + stringify CCNAE = "nae" + stringify CCNB = "nb" + stringify CCNBE = "nbe" + stringify CCNC = "nc" + stringify CCNE = "ne" + stringify CCNG = "ng" + stringify CCNGE = "nge" + stringify CCNL = "nl" + stringify CCNLE = "nle" + +instance Stringifiable Ins where + stringify (MOV a b) = "mov " ++ stringify a ++ ", " ++ stringify b + stringify (MOVi a b) = "mov " ++ stringify a ++ ", " ++ stringify b + stringify (MOVi64 a b) = "mov " ++ stringify a ++ ", " ++ stringify b + stringify (MOVSX a b@(RegMem bx)) = case compare (xrefGetSize bx) 4 of + EQ -> "movsxd " ++ stringify a ++ ", " ++ stringify b + LT -> "movsx " ++ stringify a ++ ", " ++ stringify b + GT -> undefined + stringify (ADD a b) = "add " ++ stringify a ++ ", " ++ stringify b + stringify (SUB a b) = "sub " ++ stringify a ++ ", " ++ stringify b + stringify (AND a b) = "and " ++ stringify a ++ ", " ++ stringify b + stringify (OR a b) = "or " ++ stringify a ++ ", " ++ stringify b + stringify (XOR a b) = "xor " ++ stringify a ++ ", " ++ stringify b + stringify (IMULDA a) = "imul " ++ stringify a + stringify (IMUL a b) = "imul " ++ stringify a ++ ", " ++ stringify b + stringify (IMUL3 a b c) = "imul " ++ stringify a ++ ", " ++ stringify b ++ ", " ++ stringify c + stringify (MULDA a) = "mul " ++ stringify a + stringify (IDIVDA a) = "idiv " ++ stringify a + stringify (DIVDA a) = "div " ++ stringify a + stringify (CMP a b) = "cmp " ++ stringify a ++ ", " ++ stringify b + stringify (CMPi a b) = "cmp " ++ stringify a ++ ", " ++ stringify b + stringify (SETCC cc a) = "set" ++ stringify cc ++ " " ++ stringify a + stringify (CALL a) = "call " ++ a + stringify (PUSH a) = "push " ++ stringify a + stringify (POP a) = "pop " ++ stringify a + stringify (JMP s) = "jmp " ++ s + stringify (JCC cc s) = "j" ++ stringify cc ++ " " ++ s + stringify RET = "ret" + +instance Stringifiable Asm where + stringify (Asm funcs) = intercalate "\n" $ map goF funcs + where + goF :: (String, [Ins]) -> String + goF (name, inss) = name ++ ":\n" ++ unlines (map (('\t' :) . stringify) inss) + +xrefGetSize :: XRef -> Int +xrefGetSize (XReg s _) = s +xrefGetSize (XMem s _ _ _ _) = s +xrefGetSize (XImm _) = undefined + +xrefSetSize :: Int -> XRef -> XRef +xrefSetSize sz (XReg _ r) = XReg sz r +xrefSetSize sz (XMem _ a b c d) = XMem sz a b c d +xrefSetSize _ x@(XImm _) = x + +isXMem :: XRef -> Bool +isXMem (XMem _ _ _ _ _) = True +isXMem _ = False + +isXImm :: XRef -> Bool +isXImm (XImm _) = True +isXImm _ = False diff --git a/arrays.lang b/arrays.lang new file mode 100644 index 0000000..8cab05f --- /dev/null +++ b/arrays.lang @@ -0,0 +1,7 @@ +func int main() { + int[] arr := new int[5]; + arr[0] = 97; + arr[1] = 98; + putc(char(arr[0])); + return 0; +} @@ -0,0 +1,105 @@ +func putstr(char[] str) { + int i := 0; + while (str[i] != '\0') { + putc(str[i]); + i = i + 1; + } +} + +func int strlen(char[] str) { + int i := 0; + char c := str[i]; + while (c != '\0') { + i = i + 1; + c = str[i]; + } + return i; +} + +func int[] makejumpmap(char[] src, int srclen) { + int[] jm := new int[srclen]; + int[] stack := new int[srclen]; + int stkp := 0; + int i := 0; + while (i < srclen) { + putint(i); putc(' '); putint(stkp); putc(' '); putc(src[i]); putc('\n'); + if (src[i] == '[') { + stack[stkp] = i; + stkp = stkp + 1; + } + if (src[i] == ']') { + stkp = stkp - 1; + int j := stack[stkp]; + jm[i] = j; + jm[j] = i; + } + i = i + 1; + } + /*i = 0; + while (i < srclen) { + putint(jm[i]); + putc(' '); + i = i + 1; + } + putc('\n');*/ + return jm; +} + +func interpret(char[] src, int srclen) { + int[] jm := makejumpmap(src, srclen); + char[] mem := new char[4088]; + int ip := 0; + int memp := 0; + while (src[ip] != '\0') { + // putint(ip); putc('\n'); + char c := src[ip]; + if (c == '+') {mem[memp] = mem[memp] + char(1);} + if (c == '-') {mem[memp] = mem[memp] - char(1);} + if (c == '>') {memp = memp + 1;} + if (c == '<') {memp = memp - 1;} + if (c == '.') {putc(mem[memp]);} + if (c == ',') {mem[memp] = char(getc());} + if (c == '[') { + if (mem[memp] == '\0') { + ip = jm[ip]; + } + } + if (c == ']') { + if (mem[memp] != '\0') { + ip = jm[ip]; + } + } + ip = ip + 1; + } + + memp = 0; + while (memp < 10) { + putint(int(mem[memp])); putc(' '); + memp = memp + 1; + } + putc('\n'); +} + +func int main() { + int bufsize := 4088; + char[] source := new char[4088]; + + int sourcelen := 0; + int done := 0; + while (done != 1) { + int c := getc(); + if (c < 0) { + done = 1; + } else { + source[sourcelen] = char(c); + sourcelen = sourcelen + 1; + if (sourcelen >= bufsize - 1) { + done = 1; + } + } + } + source[sourcelen] = '\0'; + + interpret(source, sourcelen); + return 0; +} diff --git a/chartest.lang b/chartest.lang new file mode 100644 index 0000000..208bcab --- /dev/null +++ b/chartest.lang @@ -0,0 +1,19 @@ +int glob := 42; + +func f(char x) { + putc(x); +} + +func char g() { + return '\n'; +} + +func int main() { + char c := 'a'; + f(c); + putc('\n'); + char d := g(); + c = d + d + d + d + d + d + d + d + 'a'; + putc(c); + return 0; +} diff --git a/fibo.lang b/fibo.lang new file mode 100644 index 0000000..e1a7a4a --- /dev/null +++ b/fibo.lang @@ -0,0 +1,31 @@ +func printnum(int n) { + if (n == 0) {putc('0'); return;} + if (n < 0) {putc('-'); n = -n;} + while (n > 0) { + putc('0' + char(n % 10)); + n = n / 10; + } +} + +func int fibo(int n) { + int a := 0; + int b := 1; + int i := 0; + while (i < n) { + int c := a + b; + a = b; + b = c; + i = i + 1; + } + return b; +} + +func int main() { + int i := 0; + while (i <= 20) { + printnum(fibo(i)); + putc('\n'); + i = i + 1; + } + return 0; +} diff --git a/graph.png b/graph.png Binary files differnew file mode 100644 index 0000000..0356b69 --- /dev/null +++ b/graph.png diff --git a/prologue.asm b/prologue.asm new file mode 100644 index 0000000..74d9de3 --- /dev/null +++ b/prologue.asm @@ -0,0 +1,134 @@ +; SYS_EXIT equ 0x2000001 ;code +; SYS_FORK equ 0x2000002 ;-- +; SYS_READ equ 0x2000003 ;fd, buf, len +; SYS_WRITE equ 0x2000004 ;fd, buf, len + +global start +default rel + +section .text +start: + call main + mov rdi, rax + mov eax, 0x2000001 + syscall + jmp $ + +putc: + push rax + push rdi + push rsi + push rdx + push rcx + push r11 + mov eax, 0x2000004 + mov edi, 1 + lea rsi, [rsp+56] + mov edx, edi + syscall + pop r11 + pop rcx + pop rdx + pop rsi + pop rdi + pop rax + ret + +putint: + push rdi + push rsi + push rax + push rbx + push rcx + push rdx + push r11 + mov rax, [rsp+64] + mov ebx, 18 + test rax, rax + jz .numzero + + mov ecx, 10 + +.strlp: + xor edx, edx + div rcx + add dl, '0' + dec rbx + mov [rsp+rbx], dl + test rax, rax + jnz .strlp + jmp .strdone + +.numzero: + dec rbx + mov byte [rsp+rbx], '0' + jmp .strdone + +.strdone: + mov eax, 0x2000004 + mov edi, 1 + lea rsi, [rsp+rbx] + mov edx, 18 + sub rdx, rbx + syscall + + pop r11 + pop rdx + pop rcx + pop rbx + pop rax + pop rsi + pop rdi + ret + +getc: + push rdi + push rsi + push rdx + push rcx + push r11 + mov eax, 0x2000003 + xor edi, edi + mov rsi, rsp + mov edx, 1 + syscall + cmp rax, 1 + jne .fail + mov rax, [rsp] +.finish: + pop r11 + pop rcx + pop rdx + pop rsi + pop rdi + ret +.fail: + mov rax, -1 + jmp .finish + +_builtin_malloc: + push rdi + push rsi + push rdx + push r8 + push r9 + push r10 + push r11 + push rcx + xor edi, edi + mov rsi, [rsp+72] + mov edx, 0x03 + mov r10d, 0x1001 + mov r8d, -1 + xor r9d, r9d + mov eax, 0x20000C5 + syscall + pop rcx + pop r11 + pop r10 + pop r9 + pop r8 + pop rdx + pop rsi + pop rdi + ret diff --git a/putint.lang b/putint.lang new file mode 100644 index 0000000..198b4ee --- /dev/null +++ b/putint.lang @@ -0,0 +1,4 @@ +func int main() { + putint(-12345678); putc('\n'); + return 0; +} diff --git a/putstr.lang b/putstr.lang new file mode 100644 index 0000000..b13e2c5 --- /dev/null +++ b/putstr.lang @@ -0,0 +1,43 @@ +func putstr(char[] str) { + int i := 0; + while (str[i] != '\0') { + putc(str[i]); + i = i + 1; + } +} + +func int main() {return 0;} + + + +/* +irfunc putstr(char[] str) + {{{(0) + mov t5Q <- 0Q + jmp 7 + }}} + {{{(7) + mov t15Q <- t5Q + add t15Q, 8Q + mov t13Q <- astrQ + add t13Q, t15Q + load t16B <- *t13Q + neq t16B, 0B + jne t16B, 0Q -> 9 | 6 + }}} + {{{(9) + mov t25Q <- t5Q + add t25Q, 8Q + mov t23Q <- astrQ + add t23Q, t25Q + load t26B <- *t23Q + call putc (t26B) + mov t30Q <- t5Q + add t30Q, 1Q + mov t5Q <- t30Q + jmp 7 + }}} + {{{(6) + ret + }}} +*/ diff --git a/test.lang b/test.lang new file mode 100644 index 0000000..f03b3e3 --- /dev/null +++ b/test.lang @@ -0,0 +1,20 @@ +int glob := 100; + +func f() { + glob = glob + 10; +} + +func g(int x , int y) { + glob = glob + x + 2 * y; +} + +func int main() { + int i := 0; /* block /* comments */ nest! */ + // int[4] arr := 2; + i = i + 1; + f(); + if (i > 0 && i < 10) { + int i := 20; + g(i , 2 * i); + } +} |