diff options
| -rw-r--r-- | .gitignore | 4 | ||||
| -rw-r--r-- | AST.hs | 173 | ||||
| -rw-r--r-- | BuildIR.hs | 367 | ||||
| -rw-r--r-- | CodeGen.hs | 388 | ||||
| -rw-r--r-- | Defs.hs | 10 | ||||
| -rw-r--r-- | Intermediate.hs | 174 | ||||
| -rw-r--r-- | LifetimeAnalysis.hs | 70 | ||||
| -rw-r--r-- | Main.hs | 56 | ||||
| -rw-r--r-- | Makefile | 26 | ||||
| -rw-r--r-- | Optimiser.hs | 252 | ||||
| -rw-r--r-- | Pretty.hs | 14 | ||||
| -rw-r--r-- | ProgramParser.hs | 269 | ||||
| -rw-r--r-- | RegAlloc.hs | 87 | ||||
| -rw-r--r-- | ReplaceRefs.hs | 36 | ||||
| -rw-r--r-- | TypeCheck.hs | 234 | ||||
| -rw-r--r-- | TypeRules.hs | 61 | ||||
| -rw-r--r-- | Utils.hs | 7 | ||||
| -rw-r--r-- | Verify.hs | 28 | ||||
| -rw-r--r-- | X64.hs | 275 | ||||
| -rw-r--r-- | arrays.lang | 7 | ||||
| -rw-r--r-- | bf.lang | 105 | ||||
| -rw-r--r-- | chartest.lang | 19 | ||||
| -rw-r--r-- | fibo.lang | 31 | ||||
| -rw-r--r-- | graph.png | bin | 0 -> 166622 bytes | |||
| -rw-r--r-- | prologue.asm | 134 | ||||
| -rw-r--r-- | putint.lang | 4 | ||||
| -rw-r--r-- | putstr.lang | 43 | ||||
| -rw-r--r-- | test.lang | 20 | 
28 files changed, 2894 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f220a21 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +main +obj +obsolete +z_output* @@ -0,0 +1,173 @@ +module AST where + +import Data.List + +import Defs +import Pretty + + +data Program = Program [DVar] [DFunc] +  deriving (Show, Eq) + +data DVar = DVar Type Name Expression +  deriving (Show, Eq) + +data DFunc = DFunc (Maybe Type) Name [(Type, Name)] Block +  deriving (Show, Eq) + +data Type +    = TInt | TChar | TArr Type (Maybe Size) +  deriving (Show, Eq) + +data Block = Block [Statement] +  deriving (Show, Eq) + +data Statement +    = SDecl Type Name Expression +    | SAs AsExpression Expression +    | SIf Expression Block Block +    | SWhile Expression Block +    | SReturn (Maybe Expression) +    | SExpr Expression +  deriving (Show, Eq) + +data AsExpression +    = AEVar Name (Maybe Type) +    | AESubscript AsExpression Expression (Maybe Type) +  deriving (Show, Eq) + +data Expression +    = EBin BinaryOp Expression Expression (Maybe Type) +    | EUn UnaryOp Expression (Maybe Type) +    | ELit Literal (Maybe Type) +    | ESubscript Expression Expression (Maybe Type) +    | ECast Type Expression +    | ENew Type Expression +  deriving (Show, Eq) + +data BinaryOp +    = BOAdd | BOSub | BOMul | BODiv | BOMod | BOPow +    | BOAnd | BOOr +    | BOEq | BONeq | BOGt | BOLt | BOGeq | BOLeq +  deriving (Show, Eq) + +data UnaryOp +    = UONot | UONeg +  deriving (Show, Eq) + +data Literal +    = LInt Integer +    | LChar Char +    | LVar Name +    | LCall Name [Expression] +  deriving (Show, Eq) + + +sizeof :: Type -> Size +sizeof TInt = 8 +sizeof TChar = 1 +sizeof (TArr _ _) = 8 + + +instance Pretty Program where +    prettyI i (Program vars funcs) = +        concatMap (++ ("\n" ++ indent i)) $ +            map (prettyI i) vars ++ map (prettyI i) funcs +      where +        indent n = replicate (2*n) ' ' + +instance Pretty DVar where +    prettyI i (DVar t n e) = +        prettyI i t ++ " " ++ n ++ " := " ++ prettyI i e ++ ";" + +instance Pretty DFunc where +    prettyI i (DFunc mt n al b) = +        "func" ++ maybe "" ((' ' :) . prettyI i) mt ++ " " ++ n ++ "(" ++ +            intercalate "," +                (map (\(at,an) -> prettyI i at ++ " " ++ an) al) ++ +            ") " ++ prettyI i b +  +instance Pretty Type where +    prettyI _ TInt = "int" +    prettyI _ TChar = "char" +    prettyI _ (TArr t Nothing) = pretty t ++ "[]" +    prettyI _ (TArr t (Just sz)) = pretty t ++ "[" ++ show sz ++ "]" + +instance Pretty Block where +    prettyI _ (Block []) = "{}" +    prettyI i (Block l) = +        "{" ++ +            concatMap (("\n" ++ indent (i+1)) ++) (map (prettyI (i+1)) l) ++ +            "\n" ++ indent i ++ "}" +      where +        indent n = replicate (2*n) ' ' + +instance Pretty Statement where +    prettyI i (SDecl t n e) = +        prettyI i t ++ " " ++ n ++ " := " ++ prettyI i e ++ ";" +    prettyI i (SAs target e) = +        prettyI i target ++ " = " ++ prettyI i e ++ ";" +    prettyI i (SIf c b1 b2) = +        "if " ++ prettyI i c ++ " " ++ prettyI i b1 ++ " else " ++ prettyI i b2 +    prettyI i (SWhile c b) = +        "while " ++ prettyI i c ++ " " ++ prettyI i b +    prettyI _ (SReturn Nothing) = +        "return;" +    prettyI i (SReturn (Just e)) = +        "return " ++ prettyI i e ++ ";" +    prettyI i (SExpr e) = prettyI i e ++ ";" + +instance Pretty Expression where +    prettyI i (EBin bo a b (Just t)) = +        "(" ++ prettyI i (EBin bo a b Nothing) ++ " :: " ++ prettyI i t ++ ")" +    prettyI i (EBin bo a b Nothing) = +        "(" ++ prettyI i a ++ ") " ++ prettyI i bo ++ +            " (" ++ prettyI i b ++ ")" +    prettyI i (EUn uo e (Just t)) = +        "(" ++ prettyI i (EUn uo e Nothing) ++ " :: " ++ prettyI i t ++ ")" +    prettyI i (EUn uo e Nothing) = +        prettyI i uo ++ "(" ++ prettyI i e ++ ")" +    prettyI i (ELit l (Just t)) = +        "(" ++ prettyI i (ELit l Nothing) ++ " :: " ++ prettyI i t ++ ")" +    prettyI i (ELit l Nothing) = prettyI i l +    prettyI i (ESubscript a b (Just t)) = +        "(" ++ prettyI i (ESubscript a b Nothing) ++ " :: " ++ prettyI i t ++ ")" +    prettyI i (ESubscript a b Nothing) = "(" ++ prettyI i a ++ ")[" ++ prettyI i b ++ "]" +    prettyI i (ECast t e) = prettyI i t ++ "(" ++ prettyI i e ++ ")" +    prettyI i (ENew t e) = "new " ++ prettyI i t ++ "[" ++ prettyI i e ++ "]" + +instance Pretty AsExpression where +    prettyI i (AEVar n (Just t)) = +        "(" ++ prettyI i (AEVar n Nothing) ++ " :: " ++ prettyI i t ++ ")" +    prettyI _ (AEVar n Nothing) = n +    prettyI i (AESubscript ae e (Just t)) = +        "(" ++ prettyI i (AESubscript ae e Nothing) ++ " :: " ++ prettyI i t ++ ")" +    prettyI i (AESubscript ae e Nothing) = prettyI i ae ++ "[" ++ prettyI i e ++ "]" + +instance Pretty BinaryOp where +    prettyI _ BOAdd = "+" +    prettyI _ BOSub = "-" +    prettyI _ BOMul = "*" +    prettyI _ BODiv = "/" +    prettyI _ BOPow = "**" +    prettyI _ BOMod = "%" +    prettyI _ BOAnd = "&&" +    prettyI _ BOOr = "||" +    prettyI _ BOEq = "==" +    prettyI _ BONeq = "!=" +    prettyI _ BOGt = ">" +    prettyI _ BOLt = "<" +    prettyI _ BOGeq = ">=" +    prettyI _ BOLeq = "<=" + + +instance Pretty UnaryOp where +    prettyI _ UONot = "!" +    prettyI _ UONeg = "-" + +instance Pretty Literal where +    prettyI _ (LInt n) = show n +    prettyI _ (LChar c) = show c +    prettyI _ (LVar n) = n +    prettyI i (LCall n al) = +        n ++ "(" ++ intercalate ", " (map (prettyI i) al) ++ ")" diff --git a/BuildIR.hs b/BuildIR.hs new file mode 100644 index 0000000..28cbf5e --- /dev/null +++ b/BuildIR.hs @@ -0,0 +1,367 @@ +{-# LANGUAGE DeriveFunctor, GeneralizedNewtypeDeriving #-} + +module BuildIR(buildIR) where + +import Control.Monad.Except +import Control.Monad.State.Strict +import Data.Char +import Data.List +import qualified Data.Map.Strict as Map +import Data.Maybe + +import AST +import Defs +import Intermediate +import Pretty +import TypeRules + + +type Scope = Map.Map Name (Ref, Type) + +data BuildState = BuildState +    { nextId :: Id, +      scopeStack :: [Scope], +      currentBlock :: Id, +      blockMap :: Map.Map Id BB } + +initBuildState :: BuildState +initBuildState = BuildState +    { nextId = 0, +      scopeStack = [], +      currentBlock = undefined, +      blockMap = Map.empty } + +newtype BuildM a = BuildM {unBuildM :: StateT BuildState (Except String) a} +  deriving (Functor, Applicative, Monad, MonadState BuildState, MonadError String) + +genId :: BuildM Id +genId = state $ \s -> (nextId s, s {nextId = nextId s + 1}) + +genTemp :: Size -> BuildM Ref +genTemp sz = liftM (Temp sz) genId + +newBlock :: BuildM Id +newBlock = do +    i <- genId +    let block = BB i [] ITermNone +    modify $ \s -> s {currentBlock = i, blockMap = Map.insert i block (blockMap s)} +    return i + +newBlockNoSwitch :: BuildM Id +newBlockNoSwitch = do +    i <- genId +    let block = BB i [] ITermNone +    modify $ \s -> s {blockMap = Map.insert i block (blockMap s)} +    return i + +addIns :: IRIns -> BuildM () +addIns ins = +    modify $ \s -> s { +        blockMap = Map.adjust bbAddIns (currentBlock s) (blockMap s)} +  where +    bbAddIns :: BB -> BB +    bbAddIns (BB bid inss term) = BB bid (inss ++ [ins]) term + +setTerm :: IRTerm -> BuildM () +setTerm term = +    modify $ \s -> s { +        blockMap = Map.adjust bbSetTerm (currentBlock s) (blockMap s)} +  where +    bbSetTerm :: BB -> BB +    bbSetTerm (BB bid inss oldterm) = case oldterm of +        ITermNone -> BB bid inss term +        _ -> error "setTerm: oldterm /= ITermNone" + +clearBlockMap :: BuildM () +clearBlockMap = modify $ \s -> s {currentBlock = undefined, blockMap = Map.empty} + +getAllBlocks :: BuildM [BB] +getAllBlocks = liftM Map.elems (gets blockMap) + +switchBlock :: Id -> BuildM () +switchBlock bid = modify $ \s -> s {currentBlock = bid} + +withScope :: BuildM a -> BuildM a +withScope act = do +    modify $ \s -> s {scopeStack = Map.empty : scopeStack s} +    res <- act +    modify $ \s -> s {scopeStack = tail (scopeStack s)} +    return res + +modifyScope :: (Scope -> Scope) -> BuildM () +modifyScope f = +    modify $ \s -> s {scopeStack = f (head (scopeStack s)) : tail (scopeStack s)} + +scopeInsert :: Name -> Ref -> Type -> BuildM () +scopeInsert n ref t = modifyScope $ Map.insert n (ref, t) + +findVar :: Name -> BuildM (Maybe (Int, (Ref, Type))) +findVar n = do +    stk <- gets scopeStack +    let results = map (Map.lookup n) stk +    return $ fmap (\idx -> (idx, fromJust (results !! idx))) $ +        findIndex isJust results + + +buildIR :: Program -> Error IRProgram +buildIR (Program vars funcs) = +    runExcept $ evalStateT (unBuildM result) initBuildState +  where +    goDFunc :: DFunc -> BuildM IRFunc +    goDFunc (DFunc rt n al bl) = do +        clearBlockMap  +        firstid <- newBlock +        lastid <- newBlockNoSwitch +        withScope $ do +            forM_ al $ \(at, an) -> scopeInsert an (Argument (sizeof at) an) at +            convertBlock bl lastid +        switchBlock lastid +        setTerm IRet +        bblist <- getAllBlocks +        return $ IRFunc rt n al bblist firstid + +    result :: BuildM IRProgram +    result = do +        withScope $ do +            mapM_ (\(DVar t n _) -> scopeInsert n (Global (sizeof t) n) t) vars +            IRProgram vars <$> mapM goDFunc funcs + +convertBlock :: Block -> Id -> BuildM () +convertBlock (Block sts) nextnext = do +    withScope $ forM_ sts $ \st -> do +        endid <- newBlockNoSwitch +        convertStatement st endid +        switchBlock endid +    setTerm $ IJmp nextnext + +convertStatement :: Statement -> Id -> BuildM () +convertStatement (SDecl t n e) nextnext = do +    endid <- newBlockNoSwitch +    ref <- convertExpression e endid +    varref <- genTemp (sizeof t) +    scopeInsert n varref t +    switchBlock endid +    addIns $ IMov varref ref +    setTerm $ IJmp nextnext +convertStatement (SAs ae e) nextnext = do +    bl2 <- newBlockNoSwitch +    eref <- convertExpression e bl2 +    switchBlock bl2 +    convertAsExpression ae eref nextnext +convertStatement (SIf c b1 b2) nextnext = do +    cend <- newBlockNoSwitch +    blThen <- newBlockNoSwitch +    blElse <- newBlockNoSwitch + +    cref <- convertExpression c cend +    switchBlock cend +    setTerm $ IJcc CNeq cref (Constant (refSize cref) 0) blThen blElse +    switchBlock blThen +    convertBlock b1 nextnext +    switchBlock blElse +    convertBlock b2 nextnext +convertStatement (SWhile c b) nextnext = do +    cond <- newBlockNoSwitch +    setTerm $ IJmp cond +    cend <- newBlockNoSwitch +    body <- newBlockNoSwitch +    bodyend <- newBlockNoSwitch + +    switchBlock cond +    cref <- convertExpression c cend +    switchBlock cend +    setTerm $ IJcc CNeq cref (Constant (refSize cref) 0) body nextnext +    switchBlock body +    convertBlock b bodyend +    switchBlock bodyend +    setTerm $ IJmp cond +convertStatement (SReturn Nothing) _ = do +    setTerm IRet +convertStatement (SReturn (Just e)) _ = do +    bl <- newBlockNoSwitch +    ref <- convertExpression e bl +    switchBlock bl +    setTerm $ IRetr ref +convertStatement (SExpr e) nextnext = do +    void $ convertExpression e nextnext + +convertExpression :: Expression -> Id -> BuildM Ref +convertExpression (ELit (LInt n) _) nextnext = do +    ref <- genTemp (sizeof TInt) +    addIns $ IMov ref (Constant (sizeof TInt) (fromInteger n)) +    setTerm $ IJmp nextnext +    return ref +convertExpression (ELit (LChar c) _) nextnext = do +    ref <- genTemp (sizeof TChar) +    addIns $ IMov ref (Constant (sizeof TChar) (fromIntegral $ ord c)) +    setTerm $ IJmp nextnext +    return ref +convertExpression (ELit (LVar n) _) nextnext = do +    mres <- findVar n +    case mres of +        Just (_, (r, t)) -> do +            ref <- genTemp (sizeof t) +            addIns $ IMov ref r +            setTerm $ IJmp nextnext +            return ref +        Nothing -> throwError $ "Undefined variable '" ++ n ++ "' referenced" +convertExpression (ELit (LCall n al) mrt) nextnext = do +    refs <- withScope $ forM al $ \arg -> do +        endid <- newBlockNoSwitch +        r <- convertExpression arg endid +        switchBlock endid +        return r +    destref <- case mrt of +        Nothing -> do +            addIns $ ICall n refs +            return $ Temp 0 (-1) +        Just typ -> do +            r <- genTemp (sizeof typ) +            addIns $ ICallr r n refs +            return r +    setTerm $ IJmp nextnext +    return destref +convertExpression (EBin BOAnd e1 e2 _) nextnext = do +    destref <- genTemp (sizeof TInt) +    bl2 <- newBlockNoSwitch +    blTryR <- newBlockNoSwitch +    bl3 <- newBlockNoSwitch +    blNope <- newBlockNoSwitch +    blYes <- newBlockNoSwitch + +    ref1 <- convertExpression e1 bl2 + +    switchBlock bl2 +    setTerm $ IJcc CNeq ref1 (Constant (refSize ref1) 0) blTryR blNope + +    switchBlock blTryR +    ref2 <- convertExpression e2 bl3 + +    switchBlock bl3 +    setTerm $ IJcc CNeq ref2 (Constant (refSize ref2) 0) blYes blNope + +    switchBlock blYes +    addIns $ IMov destref (Constant (refSize destref) 1) +    setTerm $ IJmp nextnext + +    switchBlock blNope +    addIns $ IMov destref (Constant (refSize destref) 0) +    setTerm $ IJmp nextnext + +    return destref +convertExpression (EBin bo e1 e2 _) nextnext = do +    bl2 <- newBlockNoSwitch +    ref1 <- convertExpression e1 bl2 +    switchBlock bl2 +    bl3 <- newBlockNoSwitch +    ref2 <- convertExpression e2 bl3 +    switchBlock bl3 +    ref <- genTemp (sizeof $ fromJust $ retTypeBO bo (fromJust $ typeof e1) (fromJust $ typeof e2)) +    case bo of +        BOAdd -> addIns $ IAri AAdd ref1 ref2 +        BOSub -> addIns $ IAri ASub ref1 ref2 +        BOMul -> addIns $ IAri AMul ref1 ref2 +        BODiv -> addIns $ IAri ADiv ref1 ref2 +        BOMod -> addIns $ IAri AMod ref1 ref2 +        BOEq -> addIns $ IAri AEq ref1 ref2 +        BONeq -> addIns $ IAri ANeq ref1 ref2 +        BOGt -> addIns $ IAri AGt ref1 ref2 +        BOLt -> addIns $ IAri ALt ref1 ref2 +        BOGeq -> addIns $ IAri AGeq ref1 ref2 +        BOLeq -> addIns $ IAri ALeq ref1 ref2 +        BOPow -> error $ "Pow operator not implemented" +        BOAnd -> undefined +        BOOr -> undefined +    addIns $ IMov ref ref1 +    setTerm $ IJmp nextnext +    return ref +convertExpression (EUn UONot e mt) nextnext = +    convertExpression (EBin BOEq e (ELit (LInt 0) (typeof e)) mt) nextnext +convertExpression (EUn UONeg e mt) nextnext = +    convertExpression (EBin BOSub (ELit (LInt 0) (typeof e)) e mt) nextnext +convertExpression (ESubscript arr sub t) nextnext = do +    let elemsz = sizeof $ fromJust t +    bl2 <- newBlockNoSwitch +    arrref <- convertExpression arr bl2 +    switchBlock bl2 +    bl3 <- newBlockNoSwitch +    subref <- convertExpression sub bl3 +    switchBlock bl3 +    addIns $ IAri AMul subref (Constant (refSize subref) (fromIntegral elemsz)) +    addIns $ IAri AAdd subref (Constant (refSize subref) (fromIntegral $ sizeof TInt)) +    addIns $ IAri AAdd arrref subref +    ref <- genTemp elemsz +    addIns $ ILoad ref arrref +    setTerm $ IJmp nextnext +    return ref +convertExpression (ECast dt e) nextnext = do +    let typ = case typeof e of +                  Nothing -> error $ "Cast subject " ++ show e ++ " has Nothing type" +                  Just t -> t +    when (not $ isIntegralType typ && isIntegralType dt) $ +        error $ "convertExpression: unimplemented cast from " ++ pretty typ ++ " to " ++ pretty dt +    ref <- genTemp (sizeof dt) +    bl <- newBlockNoSwitch +    eref <- convertExpression e bl +    switchBlock bl +    addIns $ IResize ref eref +    setTerm $ IJmp nextnext +    return ref +convertExpression (ENew t sze) nextnext = do +    when (not $ isBasicType t) $ +        throwError $ "Array element type in 'new' expression is not a basic type (" ++ pretty t ++ ")" +    bl2 <- newBlockNoSwitch +    szref <- convertExpression sze bl2 +    switchBlock bl2 +    ref <- genTemp (sizeof $ TArr t Nothing) +    addIns $ IAri AMul szref (Constant (sizeof TInt) (fromIntegral $ sizeof t)) +    addIns $ IAri AAdd szref (Constant (sizeof TInt) (fromIntegral $ sizeof TInt)) +    addIns $ ICallr ref "_builtin_malloc" [szref] +    setTerm $ IJmp nextnext +    return ref + +convertAsExpression :: AsExpression -> Ref -> Id -> BuildM () +convertAsExpression (AEVar n _) valueref nextnext = do +    mres <- findVar n +    vref <- case mres of +                Just (_, (r, _)) -> return r +                Nothing -> throwError $ "Undefined variable '" ++ n ++ "'" ++ +                                        " used in assignment expression" +    addIns $ IMov vref valueref +    setTerm $ IJmp nextnext +convertAsExpression (AESubscript ae2 expr mrt) valueref nextnext = do +    let elemsz = sizeof $ fromJust mrt +    ae2ref <- goLoad ae2 +    bl2 <- newBlockNoSwitch +    offref <- convertExpression expr bl2 +    switchBlock bl2 +    -- TODO: do bounds checking +    addIns $ IAri AMul offref (Constant (sizeof TInt) (fromIntegral elemsz)) +    addIns $ IAri AAdd offref (Constant (sizeof TInt) (fromIntegral $ sizeof TInt)) +    addIns $ IAri AAdd ae2ref offref +    addIns $ IStore ae2ref valueref +    setTerm $ IJmp nextnext +  where +    goLoad :: AsExpression -> BuildM Ref +    goLoad (AEVar n _) = do +        mres <- findVar n +        (vref, t) <- case mres of +                        Just (_, (r, t)) -> return (r, t) +                        Nothing -> throwError $ "Undefined variable '" ++ n ++ "'" ++ +                                                " used in assignment expression" +        ref <- genTemp (sizeof t) +        addIns $ IMov ref vref +        return ref +    goLoad (AESubscript ae expr' _) = do +        let elemsz = sizeof $ fromJust $ typeof ae +        ref <- goLoad ae +        bl2 <- newBlockNoSwitch +        eref <- convertExpression expr' bl2 +        switchBlock bl2 +        -- TODO: do bounds checking +        addIns $ IAri AMul eref (Constant (sizeof TInt) (fromIntegral elemsz)) +        addIns $ IAri AAdd eref (Constant (sizeof TInt) (fromIntegral $ sizeof TInt)) +        addIns $ IAri AAdd ref eref +        dstref <- genTemp elemsz +        addIns $ ILoad dstref ref +        return dstref diff --git a/CodeGen.hs b/CodeGen.hs new file mode 100644 index 0000000..d4c9439 --- /dev/null +++ b/CodeGen.hs @@ -0,0 +1,388 @@ +{-# LANGUAGE DeriveFunctor, GeneralizedNewtypeDeriving, TupleSections, QuasiQuotes, ScopedTypeVariables #-} + +module CodeGen(codegen) where + +import Control.Monad +import Control.Monad.Except +import Control.Monad.State.Strict +import Data.List +import Data.Maybe +import Data.Map.Strict ((!)) +import qualified Data.Map.Strict as Map +import Text.Heredoc +import Debug.Trace + +import AST +import Defs +import Intermediate +import qualified LifetimeAnalysis as LA +import RegAlloc +import Utils +import X64 (Register(..), CondCode(..), XRef(..), Ins(..), xref) +import qualified X64 as X64 + + +data CGState = CGState +    { nextId :: Int, +      regsToRestore :: [Register], +      spillSize :: Size, +      x64Result :: X64.Asm } + +newtype CGMonad a = CGMonad { unCGMonad :: StateT CGState (Except String) a } +  deriving (Functor, Applicative, Monad, MonadState CGState, MonadError String) + +initState :: CGState +initState = CGState {nextId = 1, regsToRestore = [], spillSize = 0, x64Result = X64.Asm []} + +execCGMonad :: CGMonad a -> Error X64.Asm +execCGMonad = fmap x64Result . runExcept . flip execStateT initState . unCGMonad + +addIns :: X64.Ins -> CGMonad () +addIns ins = modify $ \s -> +    let (X64.Asm funcs) = x64Result s +        (pre, (lab, inss)) = (init funcs, last funcs) +    in s {x64Result = X64.Asm $ pre ++ [(lab, inss ++ [ins])]} + +newLabel :: String -> CGMonad () +newLabel lab = modify $ \s -> +    let (X64.Asm funcs) = x64Result s +    in s {x64Result = X64.Asm $ funcs ++ [(lab, [])]} + +-- genId :: CGMonad Int +-- genId = state $ \s -> (nextId s, s {nextId = nextId s + 1}) + +setRegsToRestore :: [Register] -> CGMonad () +setRegsToRestore regs = modify $ \s -> s {regsToRestore = regs} + +setSpillSize :: Size -> CGMonad () +setSpillSize sz = modify $ \s -> s {spillSize = sz} + + +codegen :: IRProgram -> Error String +codegen (IRProgram vars funcs) = do +    x64 <- execCGMonad $ mapM_ codegenFunc funcs +    -- traceShowM x64 +    X64.verify x64 +    varcg <- liftM unlines $ mapM codegenVar vars +    return $ [there|prologue.asm|] ++ "\n" ++ X64.stringify x64 ++ +             "\nsection .data\n" ++ (if length vars > 0 then varcg else "db 0  ; keep dyld happy\n") + + +codegenVar :: DVar -> Error String +codegenVar (DVar TInt n (ELit (LInt i) (Just TInt))) = Right $ n ++ ": dq " ++ show i +codegenVar _ = Left "Unsupported global variable declaration" + + +type AllocMap = Map.Map Ref XRef + +codegenFunc :: IRFunc -> CGMonad () +codegenFunc (IRFunc _ name al bbs sid) = do +    let temprefsperbb = collectTempRefs bbs +        alltemprefs = uniq $ sort $ map LA.unAccess $ concat $ concat $ map fst temprefsperbb +        lifespans = map (\r -> (findLifeSpan r, r)) alltemprefs +            where findLifeSpan ref = +                      fromJust $ findFirstLast id $ concat $ LA.lifetimeAnalysis ref temprefsperbb + +        aliascandidates = findAliasCandidates bbs :: [(Ref, Ref)] + +        gpRegs = [R8, R9, R10, R11, R12, R13, R14, R15] +        allocation = regalloc lifespans gpRegs aliascandidates :: Map.Map Ref (Allocation Register) + +        spillrefs = map fst $ filter (isAllocMem . snd) $ Map.toList allocation +        (spilloffsets, spillsz) = initLast $ scanl (+) 0 $ map refSize spillrefs +        spilloffsetmap = Map.fromList $ zip spillrefs spilloffsets + +        usedregs = uniq $ sort $ catMaybes $ flip map (Map.toList allocation) $ \(_, a) -> case a of +            AllocReg reg -> Just reg +            AllocMem -> Nothing + +    traceShowM temprefsperbb +    traceShowM lifespans +    -- traceM $ "ALLOCATION: " ++ show allocation + +    let nsaves = length usedregs +        allocationXref = flip Map.mapWithKey allocation $ \ref alloc -> case alloc of +            AllocReg reg -> XReg (fromIntegral $ refSize ref) reg +            AllocMem -> XMem (fromIntegral $ refSize ref) +                             (Just RSP) (0, RAX) Nothing +                             (fromIntegral $ spilloffsetmap ! ref) +        allocmap = foldl inserter allocationXref (zip al [0::Int ..]) +          where +            inserter m ((t, n), i) = +                let offset = fromIntegral spillsz + 8 * nsaves + 8 {- rbp -} + 8 {- ret addr -} + 8 * i +                in Map.insert (Argument (sizeof t) n) +                              (XMem (fromIntegral $ sizeof t) +                                    (Just RSP) (0, RAX) Nothing +                                    (fromIntegral offset)) +                              m + +    newLabel name +    addIns $ PUSH (xref $ XReg 8 RBP) +    addIns $ MOV (xref $ XReg 8 RBP) (xref $ XReg 8 RSP) +    forM_ usedregs $ \reg -> addIns $ PUSH (xref $ XReg 8 reg) +    when (spillsz /= 0) $ addIns $ SUB (xref $ XReg 8 RSP) (xref $ XImm $ fromIntegral spillsz) +    setRegsToRestore usedregs +    setSpillSize spillsz + +    let ([startbb], rest) = partition (\(BB i _ _) -> i == sid) bbs +    codegenBB allocmap startbb +    mapM_ (codegenBB allocmap) rest + +findAliasCandidates :: [BB] -> [(Ref, Ref)] +findAliasCandidates = concatMap (\(BB _ inss _) -> concatMap goI inss) +  where +    goI :: IRIns -> [(Ref, Ref)] +    goI (IMov d s) = [(d, s)] +    goI _ = [] + +findFirstLast :: forall a. (a -> Bool) -> [a] -> Maybe (Int, Int) +findFirstLast f l = go Nothing 0 l +  where +    go :: Maybe (Int, Int) -> Int -> [a] -> Maybe (Int, Int) +    go mr _ [] = mr +    go mr i (x:xs) +        | f x = go (note mr i) (i+1) xs +        | otherwise = go mr (i+1) xs + +    note :: Maybe (Int, Int) -> Int -> Maybe (Int, Int) +    note Nothing i = Just (i, i) +    note (Just (a, _)) i = Just (a, i) + +isAllocMem :: Allocation a -> Bool +isAllocMem AllocMem = True +isAllocMem _ = False + +initLast :: [a] -> ([a], a) +initLast [] = undefined +initLast [x] = ([], x) +initLast (x:xs) = let (acc, l) = initLast xs in (x : acc, l) + +codegenBB :: AllocMap -> BB -> CGMonad () +codegenBB allocmap (BB bid inss term) = do +    newLabel $ ".bb" ++ show bid +    mapM_ (codegenIns allocmap) inss +    codegenTerm allocmap term + +mkxref :: Ref -> AllocMap -> XRef +mkxref (Constant _ v) _ = XImm v +mkxref (Global sz n) _ = XMem (fromIntegral sz) Nothing (0, RAX) (Just n) 0 +mkxref r m = fromJust $ Map.lookup r m + +mkmov :: XRef -> XRef -> X64.Ins +mkmov a@(XReg _ _) b@(XReg _ _) = MOV (xref a) (xref b) +mkmov a@(XReg _ _) b@(XMem _ _ _ _ _) = MOV (xref a) (xref b) +mkmov a@(XReg _ _) b@(XImm _) = MOVi64 (xref a) (xref b) +mkmov a@(XMem _ _ _ _ _) b@(XReg _ _) = MOV (xref a) (xref b) +mkmov a@(XMem _ _ _ _ _) b@(XImm v) | v < 2 ^ (32 :: Int) = MOVi (xref a) (xref b) +mkmov a b = error $ "Invalid mkmov: " ++ show a ++ "; " ++ show b + +mkcmp :: XRef -> XRef -> X64.Ins +mkcmp a b@(XImm _) = CMPi (xref a) (xref b) +mkcmp a b = CMP (xref a) (xref b) + +codegenIns :: AllocMap -> IRIns -> CGMonad () +codegenIns m (IMov d s) +    | dm == sm = return () +    | X64.isXMem dm && X64.isXMem sm = do +        addIns $ mkmov (XReg (fromIntegral $ refSize s) RAX) sm +        addIns $ mkmov dm (XReg (fromIntegral $ refSize d) RAX) +    | otherwise = addIns $ mkmov dm sm +  where dm = mkxref d m +        sm = mkxref s m +codegenIns m (IStore d s) = do +    sourcexref <- if X64.isXMem sm +        then do +            addIns $ mkmov (XReg sz RBX) sm +            return $ XReg sz RBX +        else return sm +    destxref <- case dm of +        XReg _ r -> return $ XMem sz (Just r) (0, RAX) Nothing 0 +        x@(XMem xsz _ _ _ _) -> do +            addIns $ mkmov (XReg xsz RAX) x +            return $ XMem sz (Just RAX) (0, RAX) Nothing 0 +        XImm _ -> throwError $ "IStore to [immediate] not expected" +    addIns $ mkmov destxref sourcexref +  where dm = mkxref d m +        sm = mkxref s m +        sz = fromIntegral $ refSize s +codegenIns m (ILoad d s) = do +    sourcexref <- case sm of +        XReg _ r -> return $ XMem sz (Just r) (0, RAX) Nothing 0 +        x@(XMem xsz _ _ _ _) -> do +            addIns $ mkmov (XReg xsz RAX) x +            return $ XMem sz (Just RAX) (0, RAX) Nothing 0 +        XImm _ -> throwError $ "ILoad from [immediate] not expected" +    if X64.isXMem dm +        then do +            addIns $ mkmov (XReg sz RAX) sourcexref +            addIns $ mkmov dm (XReg sz RAX) +        else do +            addIns $ mkmov dm sourcexref +  where dm = mkxref d m +        sm = mkxref s m +        sz = fromIntegral $ refSize d +codegenIns m (IAri AMul d s) = do +    let sz = fromIntegral $ refSize d +    addIns $ mkmov (XReg sz RAX) (mkxref d m) +    addIns $ mkmov (XReg sz RBX) (mkxref s m) +    addIns $ IMULDA (xref $ XReg sz RBX) +    addIns $ mkmov (mkxref d m) (XReg sz RAX) +codegenIns m (IAri ADiv d s) = do +    let sz = fromIntegral $ refSize d +    addIns $ XOR (xref $ XReg 4 RDX) (xref $ XReg 4 RDX) +    addIns $ mkmov (XReg sz RAX) (mkxref d m) +    addIns $ mkmov (XReg sz RBX) (mkxref s m) +    addIns $ IDIVDA (xref $ XReg sz RBX) +    addIns $ mkmov (mkxref d m) (XReg sz RAX) +codegenIns m (IAri AMod d s) = do +    let sz = fromIntegral $ refSize d +    addIns $ XOR (xref $ XReg 4 RDX) (xref $ XReg 4 RDX) +    addIns $ mkmov (XReg sz RAX) (mkxref d m) +    addIns $ mkmov (XReg sz RBX) (mkxref s m) +    addIns $ IDIVDA (xref $ XReg sz RBX) +    addIns $ mkmov (mkxref d m) (XReg sz RDX) +codegenIns m (IAri at d s) = case arithTypeToCondCode at of +    Just cc -> do +        arg2 <- if X64.isXMem dm && X64.isXMem sm +                    then do +                        addIns $ mkmov (XReg (fromIntegral $ refSize s) RAX) sm +                        return $ XReg (fromIntegral $ refSize s) RAX +                    else return sm +        addIns $ mkcmp dm arg2 +        addIns $ MOVi (xref dm) (xref $ XImm 0) +        addIns $ SETCC cc (xref $ X64.xrefSetSize 1 dm) +    Nothing -> do +        arg2 <- if X64.isXMem dm && X64.isXMem sm +                    then do +                        addIns $ mkmov (XReg (fromIntegral $ refSize s) RAX) sm +                        return $ XReg (fromIntegral $ refSize s) RAX +                    else return sm +        addIns $ fromJust (arithTypeToIns at) dm arg2 +  where dm = mkxref d m +        sm = mkxref s m +codegenIns m (ICall n rs) = do +    forM_ (zip (reverse rs) [1::Int ..]) $ \(r, i) -> +        let sz = fromIntegral $ refSize r +            src = (mkxref r m) +            dst = (XMem sz (Just RSP) (0, RAX) Nothing (fromIntegral $ (-8) * i)) +        in if X64.isXMem (mkxref r m) +            then do +                addIns $ mkmov (XReg sz RAX) src +                addIns $ mkmov dst (XReg sz RAX) +            else do +                addIns $ mkmov dst src +    when (length rs > 0) $ addIns $ SUB (xref $ XReg 8 RSP) (xref $ XImm (fromIntegral $ 8 * length rs)) +    addIns $ CALL n +    when (length rs > 0) $ addIns $ ADD (xref $ XReg 8 RSP) (xref $ XImm (fromIntegral $ 8 * length rs)) +codegenIns m (ICallr d n rs) = do +    codegenIns m (ICall n rs) +    addIns $ mkmov (mkxref d m) (XReg (fromIntegral $ refSize d) RAX) +codegenIns m fullins@(IResize d s) = do +    let dsz = fromIntegral $ refSize d +        ssz = fromIntegral $ refSize s +        dm = mkxref d m +        sm = mkxref s m +    when (X64.isXImm sm) $ +        throwError $ "Resized value is an immediate in " ++ show fullins ++ +                     "; (dm = " ++ show dm ++ "; sm = " ++ show sm ++ ")" +    case compare dsz ssz of +        EQ -> codegenIns m (IMov d s) +        GT -> if X64.isXMem dm +                then do +                    addIns $ MOVSX (xref $ XReg dsz RAX) (xref sm) +                    addIns $ mkmov dm (XReg dsz RAX) +                else do +                    addIns $ MOVSX (xref dm) (xref sm) +        LT -> if X64.isXMem dm && X64.isXMem sm +                then do +                    addIns $ mkmov (XReg dsz RAX) (X64.xrefSetSize dsz sm) +                    addIns $ mkmov dm (XReg dsz RAX) +                else do +                    addIns $ mkmov dm (X64.xrefSetSize dsz sm) +codegenIns _ INop = return () + +arithTypeToCondCode :: ArithType -> Maybe X64.CondCode +arithTypeToCondCode AEq = Just CCE +arithTypeToCondCode ANeq = Just CCNE +arithTypeToCondCode AGt = Just CCG +arithTypeToCondCode ALt = Just CCL +arithTypeToCondCode AGeq = Just CCGE +arithTypeToCondCode ALeq = Just CCLE +arithTypeToCondCode _ = Nothing + +cmpTypeToCondCode :: CmpType -> X64.CondCode +cmpTypeToCondCode CEq = CCE +cmpTypeToCondCode CNeq = CCNE +cmpTypeToCondCode CGt = CCG +cmpTypeToCondCode CLt = CCL +cmpTypeToCondCode CGeq = CCGE +cmpTypeToCondCode CLeq = CCLE + +arithTypeToIns :: ArithType -> Maybe (XRef -> XRef -> X64.Ins) +arithTypeToIns AAdd = Just $ \a b -> ADD (xref a) (xref b) +arithTypeToIns ASub = Just $ \a b -> SUB (xref a) (xref b) +arithTypeToIns AAnd = Just $ \a b -> AND (xref a) (xref b) +arithTypeToIns AOr = Just $ \a b -> OR (xref a) (xref b) +arithTypeToIns AXor = Just $ \a b -> XOR (xref a) (xref b) +arithTypeToIns _ = Nothing + +codegenTerm :: AllocMap -> IRTerm -> CGMonad () +codegenTerm m (IJcc ct a b t e) = do +    addIns $ mkcmp (mkxref a m) (mkxref b m) +    addIns $ JCC (cmpTypeToCondCode ct) (".bb" ++ show t) +    addIns $ JMP (".bb" ++ show e) +codegenTerm _ (IJmp i) = addIns $ JMP (".bb" ++ show i) +codegenTerm _ IRet = do +    spillsz <- gets spillSize +    when (spillsz /= 0) $ addIns $ ADD (xref $ XReg 8 RSP) (xref $ XImm $ fromIntegral spillsz) +    usedregs <- gets regsToRestore +    forM_ (reverse usedregs) $ \reg -> addIns $ POP (xref $ XReg 8 reg) +    addIns $ mkmov (XReg 8 RSP) (XReg 8 RBP) +    addIns $ POP (xref $ XReg 8 RBP) +    addIns RET +codegenTerm m (IRetr r) = do +    addIns $ mkmov (XReg (fromIntegral $ refSize r) RAX) (mkxref r m) +    spillsz <- gets spillSize +    when (spillsz /= 0) $ addIns $ ADD (xref $ XReg 8 RSP) (xref $ XImm $ fromIntegral spillsz) +    usedregs <- gets regsToRestore +    forM_ (reverse usedregs) $ \reg -> addIns $ POP (xref $ XReg 8 reg) +    addIns $ mkmov (XReg 8 RSP) (XReg 8 RBP) +    addIns $ POP (xref $ XReg 8 RBP) +    addIns RET +codegenTerm _ ITermNone = undefined + + +collectTempRefs :: [BB] -> [([[LA.Access Ref]], [Int])] +collectTempRefs bbs = +    flip map bbs $ \(BB _ inss term) -> +        let refs = map (filter (isTemp . LA.unAccess)) $ concatMap listRefsIns inss ++ listRefsTerm term +            nexts = map (\i -> fromJust $ findIndex (\(BB j _ _) -> j == i) bbs) $ listNextIds term +        in (refs, nexts) +  where +    listRefsIns :: IRIns -> [[LA.Access Ref]] +    listRefsIns (IMov a b) = [[LA.Read b], [LA.Write a]] +    listRefsIns (IStore a b) = [[LA.Read a, LA.Read b]] +    listRefsIns (ILoad a b) = [[LA.Read b], [LA.Write a]] +    listRefsIns (IAri _ a b) = [[LA.Write a, LA.Read b]] +    listRefsIns (ICall _ l) = [map LA.Read l] +    listRefsIns (ICallr a _ l) = [LA.Write a : map LA.Read l] +    listRefsIns (IResize a b) = [[LA.Read b], [LA.Write a]] +    listRefsIns INop = [[]] + +    listRefsTerm :: IRTerm -> [[LA.Access Ref]] +    listRefsTerm (IJcc _ a b _ _) = [[LA.Read a, LA.Read b]] +    listRefsTerm (IJmp _) = [[]] +    listRefsTerm IRet = [[]] +    listRefsTerm (IRetr a) = [[LA.Read a]] +    listRefsTerm ITermNone = undefined + +    listNextIds :: IRTerm -> [Id] +    listNextIds (IJcc _ _ _ a b) = [a, b] +    listNextIds (IJmp a) = [a] +    listNextIds IRet = [] +    listNextIds (IRetr _) = [] +    listNextIds ITermNone = undefined + +    isTemp :: Ref -> Bool +    isTemp (Temp _ _) = True +    isTemp _ = False @@ -0,0 +1,10 @@ +module Defs where + +import Data.Int + + +type Name = String +type Id = Int +type Size = Integer +type Value = Int64 +type Error = Either String diff --git a/Intermediate.hs b/Intermediate.hs new file mode 100644 index 0000000..5f3a9f2 --- /dev/null +++ b/Intermediate.hs @@ -0,0 +1,174 @@ +module Intermediate where + +import Data.Bits +import Data.List + +import AST +import Defs +import Pretty + + +data BB = BB Id [IRIns] IRTerm +  deriving (Show, Eq) + +data Ref = Temp Size Int | Argument Size Name | Global Size Name | Constant Size Value +  deriving (Show, Eq, Ord) + +data IRProgram = IRProgram [DVar] [IRFunc] +  deriving (Show, Eq) + +data IRFunc = IRFunc (Maybe Type) Name [(Type, Name)] [BB] Id +  deriving (Show, Eq) + +data IRIns +    = IMov Ref Ref +    | IStore Ref Ref +    | ILoad Ref Ref +    | IAri ArithType Ref Ref +    | ICall Name [Ref] +    | ICallr Ref Name [Ref] +    | IResize Ref Ref +    | INop +  deriving (Show, Eq) + +data IRTerm +    = IJcc CmpType Ref Ref Id Id  -- Id Id == if-yes if-no +    | IJmp Id +    | IRet +    | IRetr Ref +    | ITermNone +  deriving (Show, Eq) + +data ArithType +    = AAdd | ASub | AMul | ADiv | AMod +    | AAnd | AOr | AXor +    | AEq | ANeq | AGt | ALt | AGeq | ALeq +  deriving (Show, Eq) + +data CmpType +    = CEq | CNeq | CGt | CLt | CGeq | CLeq +  deriving (Show, Eq) + + +refSize :: Ref -> Size +refSize (Temp sz _) = sz +refSize (Argument sz _) = sz +refSize (Global sz _) = sz +refSize (Constant sz _) = sz + + +instance Pretty BB where +    prettyI i (BB bid inss term) = +        "{{{(" ++ show bid ++ ")\n" ++ indent (i+1) ++ +            intercalate ("\n" ++ indent (i+1)) (map pretty inss) ++ +            (if null inss then "" else "\n" ++ indent (i+1)) ++ +                pretty term ++ +            "\n" ++ indent i ++ "}}}" +      where +        indent n = replicate (2*n) ' ' + +instance Pretty Ref where +    prettyI _ (Temp sz k) = "t" ++ show k ++ pretty_sizeSuffix sz +    prettyI _ (Argument sz n) = "a" ++ n ++ pretty_sizeSuffix sz +    prettyI _ (Global sz n) = "g" ++ n ++ pretty_sizeSuffix sz +    prettyI _ (Constant sz n) = show n ++ pretty_sizeSuffix sz + +pretty_sizeSuffix :: Size -> String +pretty_sizeSuffix 1 = "B" +pretty_sizeSuffix 2 = "W" +pretty_sizeSuffix 4 = "D" +pretty_sizeSuffix 8 = "Q" +pretty_sizeSuffix sz = "<" ++ show sz ++ ">" + +instance Pretty IRProgram where +    prettyI i (IRProgram vars funcs) = +        intercalate ("\n" ++ indent i) (map (prettyI i) vars) ++ +            "\n" ++ indent i ++ +            intercalate ("\n" ++ indent i) (map (prettyI (i+1)) funcs) ++ +            "\n" +      where +        indent n = replicate (2*n) ' ' + +instance Pretty IRFunc where +    prettyI i (IRFunc mt n al bbs sid) = +        "irfunc" ++ maybe "" ((' ' :) . prettyI i) mt ++ " " ++ n ++ "(" ++ +            intercalate "," +                (map (\(at,an) -> prettyI i at ++ " " ++ an) al) ++ +            ")\n" ++ indent i ++ +            intercalate ("\n" ++ indent i) (map (prettyI i) sorted) +      where +        indent n' = replicate (2*n') ' ' + +        sorted = uncurry (++) $ partition (\(BB bid _ _) -> bid == sid) bbs + +instance Pretty IRIns where +    prettyI _ (IMov d s) = "mov " ++ pretty d ++ " <- " ++ pretty s +    prettyI _ (IStore d s) = "store *" ++ pretty d ++ " <- " ++ pretty s +    prettyI _ (ILoad d s) = "load " ++ pretty d ++ " <- *" ++ pretty s +    prettyI _ (IAri at d s) = +        pretty at ++ " " ++ pretty d ++ ", " ++ pretty s +    prettyI _ (ICall n al) = +        "call " ++ n ++ " (" ++ intercalate ", " (map pretty al) ++ ")" +    prettyI _ (ICallr d n al) = +        "call " ++ pretty d ++ " <- " ++ n ++ " (" ++ intercalate ", " (map pretty al) ++ ")" +    prettyI _ (IResize d s) = "resize " ++ pretty d ++ " <- " ++ pretty s +    prettyI _ INop = "nop" + +instance Pretty IRTerm where +    prettyI _ (IJcc ct s1 s2 did1 did2) = +        pretty ct ++ " " ++ pretty s1 ++ ", " ++ pretty s2 ++ " -> " ++ show did1 ++ " | " ++ show did2 +    prettyI _ (IJmp did) = "jmp " ++ show did +    prettyI _ IRet = "ret" +    prettyI _ (IRetr ref) = "retr " ++ pretty ref +    prettyI _ ITermNone = "?NONE?" +  +instance Pretty ArithType where +    prettyI _ AAdd = "add" +    prettyI _ ASub = "sub" +    prettyI _ AMul = "mul" +    prettyI _ ADiv = "div" +    prettyI _ AMod = "mod" +    prettyI _ AAnd = "and" +    prettyI _ AOr = "or" +    prettyI _ AXor = "xor" +    prettyI _ AEq = "eq" +    prettyI _ ANeq = "neq" +    prettyI _ AGt = "gt" +    prettyI _ ALt = "lt" +    prettyI _ AGeq = "geq" +    prettyI _ ALeq = "leq" + +instance Pretty CmpType where +    prettyI _ CEq = "jeq" +    prettyI _ CNeq = "jne" +    prettyI _ CGt = "jg" +    prettyI _ CLt = "jl" +    prettyI _ CGeq = "jge" +    prettyI _ CLeq = "jle" + + +evaluateArith :: ArithType -> Value -> Value -> Value +evaluateArith at a b = case at of +    AAdd -> a + b +    ASub -> a - b +    AMul -> a * b +    ADiv -> if b == 0 then error "Division by zero detected" else a `div` b +    AMod -> if b == 0 then error "Modulo by zero detected" else a `mod` b +    AAnd -> a .&. b +    AOr -> a .|. b +    AXor -> a `xor` b +    AEq -> if a == b then 1 else 0 +    ANeq -> if a /= b then 1 else 0 +    AGt -> if a > b then 1 else 0 +    ALt -> if a < b then 1 else 0 +    AGeq -> if a >= b then 1 else 0 +    ALeq -> if a <= b then 1 else 0 + +evaluateCmp :: CmpType -> Value -> Value -> Bool +evaluateCmp ct a b = case ct of +    CEq -> a == b +    CNeq -> a /= b +    CGt -> a > b +    CLt -> a < b +    CGeq -> a >= b +    CLeq -> a <= b diff --git a/LifetimeAnalysis.hs b/LifetimeAnalysis.hs new file mode 100644 index 0000000..a590862 --- /dev/null +++ b/LifetimeAnalysis.hs @@ -0,0 +1,70 @@ +module LifetimeAnalysis(lifetimeAnalysis, Access(..), unAccess) where + +import Data.List +import Data.Maybe + + +data Access a = Write a | Read a +  deriving (Show, Eq) + +unAccess :: Access a -> a +unAccess (Write x) = x +unAccess (Read x) = x + +type BB a = ([[Access a]], [Int]) + +lifetimeAnalysis :: Eq a => a -> [BB a] -> [[Bool]] +lifetimeAnalysis target bbs = +    foldOr2 $ map (\p -> markAliveFrom bbs target p (emptyMark bbs)) occurrences +  where +    occurrences :: [(Int, Int)] +    occurrences = do +        (i, bb) <- zip [0..] bbs +        (j, ins) <- zip [0..] (fst bb) +        if ins `contains` Write target || ins `contains` Read target +            then [(i, j)] +            else [] +     +emptyMark :: [BB a] -> [[Bool]] +emptyMark bbs = flip map bbs $ \(inss, _) -> map (const False) inss + +-- Assumes `target` is known to be alive at `pos`. +markAliveFrom :: Eq a => [BB a] -> a -> (Int, Int) -> [[Bool]] -> [[Bool]] +markAliveFrom bbs target pos topmark = setAt2 (maybe topmark id $ goNoCheck pos topmark) pos True +  where +    goNoCheck :: (Int, Int) -> [[Bool]] -> Maybe [[Bool]] +    goNoCheck (i, j) mark = +        let suc = flip filter (successors bbs (i, j)) $ \(i', j') -> +                      not $ mark !! i' !! j' +            markset = setAt2 mark (i, j) True +        in case catMaybes [go s markset | s <- suc] of +               [] -> Nothing +               l -> Just $ foldOr2 l + +    go :: (Int, Int) -> [[Bool]] -> Maybe [[Bool]] +    go (i, j) mark +        | fst (bbs !! i) !! j `contains` Write target = Nothing +        | fst (bbs !! i) !! j `contains` Read target = Just $ setAt2 mark (i, j) True +        | otherwise = goNoCheck (i, j) mark + +successors :: [BB a] -> (Int, Int) -> [(Int, Int)] +successors bbs (i, j) = +    let (inss, nexts) = bbs !! i +    in if j < length inss - 1 +        then [(i, j + 1)] +        else [(n, 0) | n <- nexts] + +contains :: Eq a => [a] -> a -> Bool +contains l v = isJust $ find (== v) l + +modifyAt2 :: [[a]] -> (Int, Int) -> (a -> a) -> [[a]] +modifyAt2 l (i, j) f = modifyAt l i $ \li -> modifyAt li j f + +modifyAt :: [a] -> Int -> (a -> a) -> [a] +modifyAt l i f = let (pre, v : post) = splitAt i l in pre ++ f v : post + +setAt2 :: [[a]] -> (Int, Int) -> a -> [[a]] +setAt2 l p v = modifyAt2 l p (const v) + +foldOr2 :: [[[Bool]]] -> [[Bool]] +foldOr2 = foldl1 (\m1 m2 -> map (map (uncurry (||)) . uncurry zip) (zip m1 m2)) @@ -0,0 +1,56 @@ +module Main where + +import System.Exit +import System.IO +import System.Process +import Debug.Trace + +import BuildIR +import CodeGen +import Defs +import Optimiser +import Pretty +import ProgramParser +import TypeCheck +import Verify + + +infix 2 <?> +(<?>) :: (a -> Error b) -> String -> a -> Error b +f <?> pre = \a -> case f a of +    Left e -> Left $ pre ++ ": " ++ e +    Right x -> Right x + + +tracePrettyId :: Pretty a => a -> a +tracePrettyId x = trace (pretty x) x + +eitherToIO :: Either String a -> IO a +eitherToIO = either die return + + +main :: IO () +main = do +    source <- getContents + +    let eres = return source +                 >>= parseProgram <?> "Parse error" +                 -- >>= return . traceShowId +                 >>= typeCheck <?> "Type error" +                 >>= buildIR <?> "IR building error" +                 -- >>= return . tracePrettyId +                 >>= optimise <?> "Error while optimising" +                 >>= verify <?> "Verify error" +                 >>= return . tracePrettyId +                 >>= codegen <?> "Codegen error" + +    asm <- eitherToIO eres +    -- hPutStr stderr asm + +    writeFile "z_output.asm" asm + +    hPutStrLn stderr "Assembling with yasm..." +    callCommand "yasm -w+all -fmacho64 z_output.asm -o z_output.o" + +    hPutStrLn stderr "Linking with ld..." +    callCommand "ld z_output.o -o z_output" diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..3ce6d7e --- /dev/null +++ b/Makefile @@ -0,0 +1,26 @@ +RUNFLAGS = +GHCFLAGS = -Wall -Widentities -odir obj -hidir obj +ifneq ($(PROFILE),) +	RUNFLAGS += +RTS -xc +	GHCFLAGS += -prof -fprof-auto +else +	GHCFLAGS += -O3 +endif + +TARGET = main + +.PHONY: all clean run + +all: $(TARGET) + +clean: +	rm -f $(TARGET) +	rm -rf obj + +run: $(TARGET) +	./$(TARGET) $(RUNFLAGS) + + +$(TARGET): $(wildcard *.hs) +	@mkdir -p obj +	ghc $(GHCFLAGS) Main.hs -o $@ diff --git a/Optimiser.hs b/Optimiser.hs new file mode 100644 index 0000000..6e6227c --- /dev/null +++ b/Optimiser.hs @@ -0,0 +1,252 @@ +module Optimiser(optimise) where + +import Data.List +import Data.Maybe +import qualified Data.Map.Strict as Map +import Debug.Trace + +import Defs +import Intermediate +import ReplaceRefs +import Utils + + +type Optimisation = IRProgram -> IRProgram +type FuncOptimisation = IRFunc -> IRFunc + +optimise :: IRProgram -> Error IRProgram +optimise prog = +    let master = foldl1 (.) (reverse optimisations) {-. trace "-- OPT PASS --"-} +        reslist = iterate master prog +        pairs = zip reslist (tail reslist) +    in Right $ fst $ fromJust $ find (uncurry (==)) pairs +  where +    optimisations = map funcopt $ +        -- [chainJumps, removeUnusedBlocks] +        [chainJumps, mergeTerminators, looseJumps, removeUnusedBlocks, identityOps, +         constantPropagate, removeNops, movPush, evaluateInstructions, evaluateTerminators] + + +funcopt :: FuncOptimisation -> Optimisation +funcopt fo (IRProgram vars funcs) = IRProgram vars (map fo funcs) + + +chainJumps :: FuncOptimisation +chainJumps (IRFunc rt name al bbs sid) = IRFunc rt name al bbs' sid +  where +    bbs' = snd $ last $ takeWhile fst $ iterate (mergeChain . snd) (True, bbs) +     +    mergeChain :: [BB] -> (Bool, [BB]) +    mergeChain [] = (False, []) +    mergeChain bbs2 = case findIndex isSuitable bbs2 of +        Nothing -> (False, bbs2) +        Just idx -> +            let (BB bid1 inss1 (IJmp target), rest) = +                    (bbs2 !! idx, take idx bbs2 ++ drop (idx+1) bbs2) +                [BB _ inss2 term2] = filter (\(BB bid _ _) -> bid == target) rest +                merged = BB bid1 (inss1 ++ inss2) term2 +            in (True, merged : rest) +      where +        hasJmpTo :: Id -> BB -> Bool +        hasJmpTo i (BB _ _ (IJmp i')) = i == i' +        hasJmpTo _ _ = False + +        isSuitable :: BB -> Bool +        isSuitable (BB _ _ (IJmp target)) = sum (map (fromEnum . hasJmpTo target) bbs2) == 1 +        isSuitable _ = False + +mergeTerminators :: FuncOptimisation +mergeTerminators (IRFunc rt name al bbs sid) = IRFunc rt name al bbs' sid +  where +    bbs' = flip map bbs $ \bb@(BB bid inss term) -> case term of +        IJmp i -> case find ((== i) . fst) singles of +            Just (_, t) -> BB bid inss t +            Nothing -> bb +        _ -> bb + +    singles = map (\(BB i _ t) -> (i, t)) $ filter (\(BB _ inss _) -> null inss) bbs + +looseJumps :: FuncOptimisation +looseJumps (IRFunc rt name al bbs sid) = IRFunc rt name al bbs' sid +  where +    bbs' = flip map bbs $ \bb@(BB bid inss term) -> case term of +        IJmp i -> BB bid inss (IJmp (translate i)) +        IJcc ct r1 r2 i j -> BB bid inss (IJcc ct r1 r2 (translate i) (translate j)) +        _ -> bb + +    translate i = fromMaybe i $ Map.lookup i transmap + +    transmap = Map.fromList $ catMaybes $ flip map bbs $ \bb -> case bb of +        BB bid [] (IJmp i) -> Just (bid, i) +        _ -> Nothing + +removeUnusedBlocks :: FuncOptimisation +removeUnusedBlocks (IRFunc rt name al bbs sid) = IRFunc rt name al bbs' sid +  where +    bbs' = filter isReachable bbs + +    isReachable :: BB -> Bool +    isReachable (BB bid _ _) +        | bid == sid = True +        | otherwise = isJust $ flip find bbs $ \(BB _ _ term) -> case term of +            IJcc _ _ _ i1 i2 -> i1 == bid || i2 == bid +            IJmp i -> i == bid +            _ -> False + +identityOps :: FuncOptimisation +identityOps (IRFunc rt name al bbs sid) = IRFunc rt name al (map go bbs) sid +  where +    go :: BB -> BB +    go (BB bid inss term) = BB bid (catMaybes $ map goI inss) term + +    goI :: IRIns -> Maybe IRIns +    goI (IAri AAdd _ (Constant _ 0)) = Nothing +    goI (IAri ASub _ (Constant _ 0)) = Nothing +    goI (IAri AMul _ (Constant _ 1)) = Nothing +    goI (IAri ADiv _ (Constant _ 1)) = Nothing +    goI i = Just i + +constantPropagate :: FuncOptimisation +constantPropagate (IRFunc rt name al bbs sid) = IRFunc rt name al bbs' sid +  where +    alltemps = findAllTemps' bbs +    consttemps = catMaybes $ flip map alltemps $ \ref -> +        let locs = findMutations' bbs ref +            loc = head locs +            ins = insAt bbs loc + +            isIMov (IMov _ _) = True +            isIMov _ = False +        in {-trace ("Muts of " ++ show ref ++ ": " ++ show locs ++ ": " ++ +                    show (map (insAt bbs) locs)) $-} +           if length locs == 1 && isIMov ins +               then Just (loc, ins) +               else Nothing + +    bbs' = case consttemps of +                [] -> bbs +                ((loc, IMov ref value) : _) -> +                    replaceRefsBBList ref value (nopifyInsAt bbs loc) +                _ -> undefined + +removeNops :: FuncOptimisation +removeNops (IRFunc rt name al bbs sid) = +    IRFunc rt name al (map go bbs) sid +  where +    go (BB bid inss term) = BB bid (filter (not . isNop) inss) term +    isNop INop = True +    isNop _ = False + +movPush :: FuncOptimisation +movPush (IRFunc rt name al bbs sid) = IRFunc rt name al bbs' sid +  where +    bbs' = map goBB bbs + +    goBB :: BB -> BB +    goBB (BB bid inss term) = BB bid (go inss) term + +    go :: [IRIns] -> [IRIns] +    go [] = [] +    go (ins@(IMov d _) : rest) | isJust (find (== d) (findAllRefsInss rest)) = push ins rest +    go (ins : rest) = ins : go rest + +    push :: IRIns -> [IRIns] -> [IRIns] +    push mov [] = [mov] +    push mov@(IMov d s) (ins@(IMov d' s') : rest) +        | d' == d = if s' == d then push mov rest else push ins rest +        | otherwise = replaceRefsIns d s ins : push mov rest +    push mov@(IMov d s) (ins@(IResize d' s') : rest) +        | d' == d = if s' == d then push mov rest else push ins rest +        | otherwise = replaceRefsIns d s ins : push mov rest +    push mov@(IMov d s) (ins@(ILoad d' _) : rest) +        | d' == d = mov : ins : go rest +        | otherwise = replaceRefsIns d s ins : push mov rest +    push mov@(IMov d s) (ins@(IAri at d' s') : rest) +        | d' == d = case (s, s') of +            (Constant sza a, Constant szb b) +                | sza == szb -> push (IMov d (Constant sza $ evaluateArith at a b)) rest +                | otherwise -> error $ "Inconsistent sizes in " ++ show mov ++ "; " ++ show ins +            _ -> mov : ins : go rest +        | otherwise = replaceRefsIns d s ins : push mov rest +    push mov@(IMov d s) (ins@(ICallr d' _ _) : rest) +        | d' == d = mov : ins : go rest +        | otherwise = replaceRefsIns d s ins : push mov rest +    push mov@(IMov d s) (ins@(IStore _ _) : rest) = replaceRefsIns d s ins : push mov rest +    push mov@(IMov d s) (ins@(ICall _ _) : rest) = replaceRefsIns d s ins : push mov rest +    push mov (ins@INop : rest) = ins : push mov rest +    push _ _ = undefined + +evaluateInstructions :: FuncOptimisation +evaluateInstructions (IRFunc rt name al bbs sid) = IRFunc rt name al (map goBB bbs) sid +  where +    goBB :: BB -> BB +    goBB (BB bid inss term) = BB bid (map goI inss) term + +    goI :: IRIns -> IRIns +    goI (IResize ref (Constant _ v)) = IMov ref $ Constant (refSize ref) $ truncValue (refSize ref) v +    goI ins = ins + +    truncValue :: Size -> Value -> Value +    truncValue sz v = fromIntegral $ (fromIntegral v :: Integer) `mod` (2 ^ (8 * sz)) + +evaluateTerminators :: FuncOptimisation +evaluateTerminators (IRFunc rt name al bbs sid) = IRFunc rt name al bbs' sid +  where +    bbs' = map (\(BB bid inss term) -> BB bid inss (go term)) bbs + +    go :: IRTerm -> IRTerm +    go term@(IJcc ct (Constant sza a) (Constant szb b) i1 i2) +        | sza /= szb = error $ "Inconsistent sizes in " ++ show term +        | evaluateCmp ct a b = IJmp i1 +        | otherwise = IJmp i2 +    go term = term + + +insAt :: [BB] -> (Int, Int) -> IRIns +insAt bbs (i, j) = +    let (BB _ inss _) = bbs !! i +    in inss !! j + +nopifyInsAt :: [BB] -> (Int, Int) -> [BB] +nopifyInsAt bbs (i, j) = +    let (pre, BB bid inss term : post) = splitAt i bbs +        (ipre, _ : ipost) = splitAt j inss +    in pre ++ BB bid (ipre ++ INop : ipost) term : post + +findMutations :: BB -> Ref -> [Int] +findMutations (BB _ inss _) ref = +    catMaybes $ flip map (zip inss [0..]) $ \(ins, idx) -> case ins of +        (IMov r _) | r == ref -> Just idx +        (IAri _ r _) | r == ref -> Just idx +        (ICallr r _ _) | r == ref -> Just idx +        _ -> Nothing + +findMutations' :: [BB] -> Ref -> [(Int, Int)] +findMutations' bbs ref = +    [(i, j) | (bb, i) <- zip bbs [0..], j <- findMutations bb ref] + +findAllRefs :: BB -> [Ref] +findAllRefs (BB _ inss _) = findAllRefsInss inss + +findAllRefsInss :: [IRIns] -> [Ref] +findAllRefsInss inss = uniq $ sort $ concatMap go inss +  where +    go (IMov a b) = [a, b] +    go (IStore a b) = [a, b] +    go (ILoad a b) = [a, b] +    go (IAri _ a b) = [a, b] +    go (ICall _ al) = al +    go (ICallr a _ al) = a : al +    go (IResize a b) = [a, b] +    go INop = [] + +-- findAllRefs' :: [BB] -> [Ref] +-- findAllRefs' = uniq . sort . concatMap findAllRefs + +findAllTemps :: BB -> [Ref] +findAllTemps bb = flip filter (findAllRefs bb) $ \ref -> case ref of +    (Temp _ _) -> True +    _ -> False + +findAllTemps' :: [BB] -> [Ref] +findAllTemps' = concatMap findAllTemps diff --git a/Pretty.hs b/Pretty.hs new file mode 100644 index 0000000..f2f36d8 --- /dev/null +++ b/Pretty.hs @@ -0,0 +1,14 @@ +module Pretty where + +import Data.List + + +class Pretty a where +    pretty :: a -> String +    pretty = prettyI 0 + +    prettyI :: Int -> a -> String + + +instance Pretty a => Pretty [a] where +    prettyI i l = "[" ++ intercalate ", " (map (prettyI i) l) ++ "]" diff --git a/ProgramParser.hs b/ProgramParser.hs new file mode 100644 index 0000000..2cacaf5 --- /dev/null +++ b/ProgramParser.hs @@ -0,0 +1,269 @@ +module ProgramParser(parseProgram) where + +import Control.Monad +import Data.Char +import Text.Parsec +import qualified Text.Parsec.Expr as E + +import AST +import Defs + + +type Parser = Parsec String () + + +parseProgram :: String -> Either String Program +parseProgram s = case parse pProgram "" s of +    Left err -> Left $ show err +    Right p -> Right p + +pProgram :: Parser Program +pProgram = do +    pWhiteComment +    decls <- many pDecl +    eof +    return $ Program (lefts decls) (rights decls) + +pDecl :: Parser (Either DVar DFunc) +pDecl = (Right <$> pDFunc) <|> (Left <$> pDVar) + +pDFunc :: Parser DFunc +pDFunc = do +    symbol "func" +    rt <- (Just <$> pType) <|> return Nothing +    n <- pName +    symbol "(" +    args <- sepBy pTypeAndName (symbol ",") +    symbol ")" +    body <- pBlock +    return $ DFunc rt n args body + +pDVar :: Parser DVar +pDVar = do +    t <- pType +    n <- pName +    symbol ":=" +    e <- pExpression +    symbol ";" +    return $ DVar t n e + +pTypeAndName :: Parser (Type, Name) +pTypeAndName = (,) <$> pType <*> pName + +pType :: Parser Type +pType = do +    t <- pBasicType +    (do +        symbol "[" +        msz <- optionMaybe pInteger +        symbol "]" +        return $ TArr t msz) <|> return t + +pBasicType :: Parser Type +pBasicType = (symbol "int" >> return TInt) <|> (symbol "char" >> return TChar) + +pBlock :: Parser Block +pBlock = do +    symbol "{" +    body <- many pStatement +    symbol "}" +    return $ Block body + +pStatement :: Parser Statement +pStatement = pSIf <|> pSWhile <|> pSReturn <|> pSDecl <|> pSAs <|> pSExpr + +pSDecl :: Parser Statement +pSDecl = do +    (t, n) <- try $ do +        t <- pType +        n <- pName +        symbol ":=" +        return (t, n) +    e <- pExpression +    symbol ";" +    return $ SDecl t n e + +pSAs :: Parser Statement +pSAs = do +    n <- try $ pAsExpression <* symbol "=" +    e <- pExpression +    symbol ";" +    return $ SAs n e + +pSIf :: Parser Statement +pSIf = do +    symbol "if" +    symbol "(" +    cond <- pExpression +    symbol ")" +    bl1 <- pBlock +    bl2 <- try (symbol "else" >> pBlock) <|> return (Block []) +    return $ SIf cond bl1 bl2 + +pSWhile :: Parser Statement +pSWhile = do +    symbol "while" +    symbol "(" +    cond <- pExpression +    symbol ")" +    bl <- pBlock +    return $ SWhile cond bl + +pSReturn :: Parser Statement +pSReturn = do +    symbol "return" +    SReturn <$> ((symbol ";" >> return Nothing) <|> +                 ((Just <$> pExpression) <* symbol ";")) + +pSExpr :: Parser Statement +pSExpr = do +    e <- pExpression +    symbol ";" +    return $ SExpr e + +pExpression :: Parser Expression +pExpression = E.buildExpressionParser optable litparser +  where +    optable = +        [[E.Infix (symbol "**" >> return (mkEBin BOPow)) E.AssocRight], +         [E.Infix (symbol "*" >> return (mkEBin BOMul)) E.AssocLeft, +          E.Infix (symbol "/" >> return (mkEBin BODiv)) E.AssocLeft, +          E.Infix (symbol "%" >> return (mkEBin BOMod)) E.AssocLeft], +         [E.Infix (symbol "+" >> return (mkEBin BOAdd)) E.AssocLeft, +          E.Infix (symbol "-" >> return (mkEBin BOSub)) E.AssocLeft], +         [E.Infix (symbol ">=" >> return (mkEBin BOGeq)) E.AssocNone, +          E.Infix (symbol "<=" >> return (mkEBin BOLeq)) E.AssocNone, +          E.Infix (symbol ">" >> return (mkEBin BOGt)) E.AssocNone, +          E.Infix (symbol "<" >> return (mkEBin BOLt)) E.AssocNone, +          E.Infix (symbol "==" >> return (mkEBin BOEq)) E.AssocNone, +          E.Infix (symbol "!=" >> return (mkEBin BONeq)) E.AssocNone], +         [E.Infix (symbol "&&" >> return (mkEBin BOAnd)) E.AssocLeft], +         [E.Infix (symbol "||" >> return (mkEBin BOOr)) E.AssocLeft]] + +    mkEBin :: BinaryOp -> Expression -> Expression -> Expression +    mkEBin bo a b = EBin bo a b Nothing + +    mkELit :: Literal -> Expression +    mkELit l = ELit l Nothing + +    litparser :: Parser Expression +    litparser = do +        pops <- many pPrefixOp +        e <- pParenExpr <|> pENew <|> pCastExpr <|> (mkELit <$> pLiteral) +        subs <- many $ between (symbol "[") (symbol "]") pExpression +        let e' = foldl (\ex sub -> ESubscript ex sub Nothing) e subs +            e'' = foldl (\ex pop -> EUn pop ex Nothing) e' pops +        return e'' + +pAsExpression :: Parser AsExpression +pAsExpression = do +    n <- pName +    subs <- many $ between (symbol "[") (symbol "]") pExpression +    return $ foldl (\ae expr -> AESubscript ae expr Nothing) (AEVar n Nothing) subs + +pPrefixOp :: Parser UnaryOp +pPrefixOp = (symbol "!" >> return UONot) <|> +            (symbol "-" >> return UONeg) + +pParenExpr :: Parser Expression +pParenExpr = do +    symbol "(" +    e <- pExpression +    symbol ")" +    return e + +pCastExpr :: Parser Expression +pCastExpr = do +    t <- try $ pType <* symbol "(" +    e <- pExpression +    symbol ")" +    return $ ECast t e + +pENew :: Parser Expression +pENew = do +    symbol "new" +    t <- pBasicType +    symbol "[" +    e <- pExpression +    symbol "]" +    return $ ENew t e + +pLiteral :: Parser Literal +pLiteral = (LInt <$> pInteger) <|> (LChar <$> pCharLit) <|> pLCall <|> (LVar <$> pName) + +pCharLit :: Parser Char +pCharLit = do +    void $ char '\'' +    c <- pStringChar +    void $ char '\'' +    pWhiteComment +    return c + +pStringChar :: Parser Char +pStringChar = +    (char '\\' >> ((char 'n' >> return '\n') <|> +                   (char 'r' >> return '\r') <|> +                   (char 't' >> return '\t') <|> +                   (char '0' >> return '\0') <|> +                   (char 'x' >> pHexDigit >>= \a -> pHexDigit >>= \b -> return (chr $ 16 * a + b)))) <|> +    anyToken +  where +    pHexDigit :: Parser Int +    pHexDigit = (subtract 48 . fromEnum <$> digit) +                    <|> ((+ (10 - 97)) . ord <$> oneOf "abcdef") +                    <|> ((+ (10 - 65)) . ord <$> oneOf "ABCDEF") + +pLCall :: Parser Literal +pLCall = do +    n <- try $ pName <* symbol "(" +    al <- sepBy pExpression (symbol ",") +    symbol ")" +    return $ LCall n al + + +pName :: Parser Name +pName = do +    c0 <- satisfy (\c -> isAlpha c || c == '_') +    cr <- many $ satisfy (\c -> isAlpha c || isDigit c || c == '_') +    pWhiteComment +    return $ c0 : cr + +pInteger :: Parser Integer +pInteger = read <$> many1 (satisfy isDigit) <* pWhiteComment + + +symbol :: String -> Parser () +symbol "" = error "symbol \"\"" +symbol s = try $ do +    void $ string s +    when (isAlpha (last s)) $ void $ notFollowedBy (satisfy isAlpha) +    when (isDigit (last s)) $ void $ notFollowedBy (satisfy isDigit) +    pWhiteComment + +pWhiteComment :: Parser () +pWhiteComment = void $ pWhite >> endBy pComment pWhite + +pWhite :: Parser () +pWhite = void $ many (oneOf " \t\n") + +pComment :: Parser () +pComment = pLineComment <|> pBlockComment + +pLineComment :: Parser () +pLineComment = do +    void $ try $ string "//" +    void $ many (satisfy (/= '\n')) +    eof <|> void (char '\n') + +pBlockComment :: Parser () +pBlockComment = do +    void $ try $ string "/*" +    void $ sepEndBy (manyTill anyToken (lookAhead $ try (string "/*") <|> try (string "*/"))) +                    pBlockComment +    void $ string "*/" + +lefts :: [Either a b] -> [a] +lefts = foldr (\e l -> either (:l) (const l) e) [] + +rights :: [Either a b] -> [b] +rights = foldr (\e l -> either (const l) (:l) e) [] diff --git a/RegAlloc.hs b/RegAlloc.hs new file mode 100644 index 0000000..d2b1717 --- /dev/null +++ b/RegAlloc.hs @@ -0,0 +1,87 @@ +module RegAlloc(regalloc, Allocation(..)) where + +import Data.Function +import Data.List +import Data.Maybe +import qualified Data.Map.Strict as Map +import Debug.Trace + +import Utils + + +-- Follows the Linear Scan Register Allocation algorithm specified in: +-- https://www.cs.purdue.edu/homes/suresh/502-Fall2008/papers/linear-scan.pdf + +-- Test case: +-- regalloc [((1, 3), "a"), ((2, 2), "x"), ((2, 10), "b"), ((3, 7), "c"), ((4, 5), "d")] ["ra", "rb"] + + +data Allocation a = AllocReg a | AllocMem +  deriving Show + +type Interval = (Int, Int) + +data State a = State {stActive :: [Int], stAlloc :: [Allocation a], stFreeRegs :: [a]} +  deriving Show + +regalloc :: (Show a, Show b, Ord a, Ord b) +         => [(Interval, b)]           -- [(live interval, name of variable)] +         -> [a]                       -- the available registers +         -> [(b, b)]                  -- pairs to be allocated to the same register if possible +         -> Map.Map b (Allocation a)  -- allocation map +regalloc vars' regs aliaspairs = +    let foldfunc = \st' ((int, name), index) -> +            let st = expireOldIntervals st' int +                wanted = findWantedAliases aliaspairs name +                wantedregs = uniq $ sort $ catMaybes $ flip map wanted $ \n -> +                                case findIndex (== n) intnames of +                                    Nothing -> Nothing +                                    Just idx | idx >= length (stAlloc st) -> Nothing +                                             | otherwise -> case stAlloc st !! idx of +                                        AllocMem -> Nothing +                                        AllocReg r -> Just r +            in if length (stActive st) == length regs +                   then spillAtInterval st index +                   else let -- ([regchoice], fr) = splitAt 1 (stFreeRegs st) +                            (regchoice, fr) = case find (`elem` wantedregs) (stFreeRegs st) of +                                Nothing -> (head (stFreeRegs st), tail (stFreeRegs st)) +                                Just wr -> trace ("Pair-allocated " ++ show name ++ " in " ++ show wr) $ +                                           (wr, stFreeRegs st \\ [wr]) +                            allocrev = stAlloc st ++ [AllocReg regchoice] +                            active = sortBy (compare `on` snd . (ints !!)) $ index : stActive st +                        in State active allocrev fr +    in Map.fromList $ zip intnames $ stAlloc $ +               foldl foldfunc (State [] [] regs) (zip vars [0..]) +  where +    vars = sortBy (compare `on` fst . fst) vars' +    (ints, intnames) = (map fst vars, map snd vars) + +    expireOldIntervals :: State a -> Interval -> State a +    expireOldIntervals st (intstart, _) = +        let (dropped, active) = span ((< intstart) . snd . (ints !!)) (stActive st) +            fr = selectAllocRegs (map (stAlloc st !!) dropped) ++ stFreeRegs st +        in State active (stAlloc st) fr + +    spillAtInterval :: State a -> Int -> State a +    spillAtInterval st index = +        let spill = last (stActive st) +        in if snd (ints !! spill) > snd (ints !! index) +               then let alloc = setAt spill AllocMem (stAlloc st) ++ [stAlloc st !! spill] +                        active = sortBy (compare `on` snd . (ints !!)) $ +                                     index : (stActive st \\ [spill]) +                    in State active alloc (stFreeRegs st) +               else State (stActive st) (stAlloc st ++ [AllocMem]) (stFreeRegs st) + +    findWantedAliases :: (Ord b) => [(b, b)] -> b -> [b] +    findWantedAliases pairs x = +        uniq $ sort $ map snd (filter ((== x) . fst) pairs) ++ map fst (filter ((== x) . snd) pairs) + +selectAllocRegs :: [Allocation a] -> [a] +selectAllocRegs allocs = catMaybes $ flip map allocs $ \alloc -> case alloc of +        (AllocReg r) -> Just r +        AllocMem -> Nothing + +setAt :: Int -> a -> [a] -> [a] +setAt idx v l = +    let (pre, _ : post) = splitAt idx l +    in pre ++ v : post diff --git a/ReplaceRefs.hs b/ReplaceRefs.hs new file mode 100644 index 0000000..3ab73c3 --- /dev/null +++ b/ReplaceRefs.hs @@ -0,0 +1,36 @@ +module ReplaceRefs +    (replaceRefsIns, replaceRefsTerm, replaceRefsBB, replaceRefsBBList) +    where + +import Intermediate + + +replaceRefsIns :: Ref -> Ref -> IRIns -> IRIns +replaceRefsIns from to (IMov a b) = IMov (trans from to a) (trans from to b) +replaceRefsIns from to (IStore a b) = IStore (trans from to a) (trans from to b) +replaceRefsIns from to (ILoad a b) = ILoad (trans from to a) (trans from to b) +replaceRefsIns from to (IAri at a b) = IAri at (trans from to a) (trans from to b) +replaceRefsIns from to (ICall n al) = ICall n (map (trans from to) al) +replaceRefsIns from to (ICallr a n al) = ICallr (trans from to a) n (map (trans from to) al) +replaceRefsIns from to (IResize a b) = IResize (trans from to a) (trans from to b) +replaceRefsIns _ _ INop = INop + +replaceRefsTerm :: Ref -> Ref -> IRTerm -> IRTerm +replaceRefsTerm from to (IJcc ct a b i1 i2) = IJcc ct (trans from to a) (trans from to b) i1 i2 +replaceRefsTerm _ _ (IJmp i) = IJmp i +replaceRefsTerm _ _ IRet = IRet +replaceRefsTerm from to (IRetr a) = IRetr (trans from to a) +replaceRefsTerm _ _ ITermNone = ITermNone + +replaceRefsBB :: Ref -> Ref -> BB -> BB +replaceRefsBB from to (BB bid inss term) = +    BB bid (map (replaceRefsIns from to) inss) (replaceRefsTerm from to term) + +replaceRefsBBList :: Ref -> Ref -> [BB] -> [BB] +replaceRefsBBList from to bbs = map (\bb -> replaceRefsBB from to bb) bbs + + +trans :: Ref -> Ref -> Ref -> Ref +trans from to ref +    | ref == from = to +    | otherwise = ref diff --git a/TypeCheck.hs b/TypeCheck.hs new file mode 100644 index 0000000..36d98c8 --- /dev/null +++ b/TypeCheck.hs @@ -0,0 +1,234 @@ +module TypeCheck(typeCheck) where + +import Control.Monad +import Data.Maybe +import qualified Data.Map.Strict as Map + +import AST +import Defs +import Pretty +import TypeRules + + +data DBItem = DBVar Type | DBFunc (Maybe Type) [Type] + +type TypeDB = [Map.Map Name DBItem] + +dbFind :: TypeDB -> Name -> Maybe DBItem +dbFind db name = findJust $ map (Map.lookup name) db +  where findJust [] = Nothing +        findJust (Just x:_) = Just x +        findJust (Nothing:l) = findJust l + +dbFindTop :: TypeDB -> Name -> Maybe DBItem +dbFindTop [] _ = error "dbFindTop on empty scope stack"  +dbFindTop (m:_) name = Map.lookup name m + +dbSet :: TypeDB -> Name -> DBItem -> TypeDB +dbSet [] _ _ = error "dbSet on empty scope stack" +dbSet (m:ms) name val = Map.insert name val m : ms + +emptyDB :: TypeDB +emptyDB = [Map.fromList +    [("putc", DBFunc Nothing [TChar]), +     ("putint", DBFunc Nothing [TInt]), +     ("getc", DBFunc (Just TInt) []), +     ("_builtin_malloc", DBFunc (Just $ TArr TChar Nothing) [TInt])]] + +withScope :: TypeDB -> (TypeDB -> a) -> a +withScope db f = f (Map.empty : db) + + +typeCheck :: Program -> Error Program +typeCheck (Program vars funcs) = do +    db <- foldM registerDVar emptyDB vars +            >>= \db' -> foldM registerDFunc db' funcs +    vars' <- mapM (annotateDVar db) vars +    funcs' <- mapM (annotateDFunc db) funcs +    return $ Program vars' funcs' + + +registerDVar :: TypeDB -> DVar -> Error TypeDB +registerDVar db (DVar t n _) = case dbFind db n of +    Nothing -> return $ dbSet db n (DBVar t) +    Just _ -> Left $ "Duplicate top-level name '" ++ n ++ "'" + +registerDFunc :: TypeDB -> DFunc -> Error TypeDB +registerDFunc db (DFunc rt n al _) = case dbFind db n of +    Nothing -> return $ dbSet db n (DBFunc rt (map fst al)) +    Just _ -> Left $ "Duplicate top-level name '" ++ n ++ "'" + + +annotateDVar :: TypeDB -> DVar -> Error DVar +annotateDVar db (DVar toptype name expr) = do +    expr' <- annotateExpr db expr +    when (isNothing (typeof expr')) $ +        Left $ "Cannot assign void value in global declaration of " ++ name +    let typ = fromJust $ typeof expr' +    if canCoerce typ toptype +        then return $ DVar toptype name expr' +        else Left $ "Cannot assign a value of type " ++ pretty typ ++ +                    " to a variable of type " ++ pretty toptype + +annotateDFunc :: TypeDB -> DFunc -> Error DFunc +annotateDFunc db dfunc@(DFunc rettype name arglist block) = do +    when (name == "main" && rettype /= Just TInt) $ +        Left $ "Function 'main' should return an int" +    let db' = foldl registerArg db arglist +    block' <- annotateBlock dfunc db' block +    return $ DFunc rettype name arglist block' +  where +    registerArg :: TypeDB -> (Type, Name) -> TypeDB +    registerArg db' (t, n) = dbSet db' n (DBVar t) + +annotateBlock :: DFunc -> TypeDB -> Block -> Error Block +annotateBlock dfunc db (Block sts) = +    Block . snd <$> foldM (\(db', l) st -> +                               (\(db'', st') -> (db'', l ++ [st'])) <$> annotateStatement dfunc db' st) +                          (db, []) sts + +annotateStatement :: DFunc -> TypeDB -> Statement -> Error (TypeDB, Statement) +annotateStatement _ db (SDecl toptype name expr) = do +    expr' <- annotateExpr db expr +    when (isNothing (typeof expr')) $ +        Left $ "Cannot assign void value in declaration of " ++ name +    when (isJust (dbFindTop db name)) $ +        Left $ "Duplicate declaration of variable " ++ name +    let typ = fromJust $ typeof expr' +    if canCoerce typ toptype +        then return $ (dbSet db name (DBVar toptype), SDecl toptype name expr') +        else Left $ "Cannot assign a value of type " ++ pretty typ ++ +                    " to a variable of type " ++ pretty toptype +annotateStatement _ db (SAs ae expr) = do +    ae' <- annotateAsExpr db ae +    expr' <- annotateExpr db expr +    when (isNothing (typeof expr')) $ +        Left $ "Cannot assign a void value in assignment of " ++ pretty ae +    let typ = fromJust $ typeof expr' +    let aetyp = fromJust $ typeof ae' +    when (not $ isBasicType aetyp) $ +        Left $ "Cannot assign to a location of type " ++ pretty aetyp +    when (not $ canCoerce typ aetyp) $ +        Left $ "Cannot assign a value of type " ++ pretty typ ++ +               " to a location of type " ++ pretty aetyp +    return (db, SAs ae' expr') +annotateStatement dfunc db (SIf expr bl1 bl2) = do +    expr' <- annotateExpr db expr +    when (isNothing (typeof expr')) $ +        Left $ "Cannot use void value in 'if' condition" +    when (not $ canCoerce (fromJust (typeof expr')) TInt) $ +        Left $ "Cannot use type " ++ pretty (fromJust (typeof expr')) ++ " in 'if' condition" +    bl1' <- withScope db $ flip (annotateBlock dfunc) bl1 +    bl2' <- withScope db $ flip (annotateBlock dfunc) bl2 +    return (db, SIf expr' bl1' bl2') +annotateStatement dfunc db (SWhile expr bl) = do +    expr' <- annotateExpr db expr +    when (isNothing (typeof expr')) $ +        Left $ "Cannot use void value in 'while' condition" +    when (not $ canCoerce (fromJust (typeof expr')) TInt) $ +        Left $ "Cannot use type " ++ pretty (fromJust (typeof expr')) ++ " in 'while' condition" +    bl' <- withScope db $ flip (annotateBlock dfunc) bl +    return (db, SWhile expr' bl') +annotateStatement (DFunc Nothing _ _ _) db (SReturn Nothing) = +    return (db, SReturn Nothing) +annotateStatement (DFunc (Just _) _ _ _) _ (SReturn Nothing) = +    Left "Cannot return void value from non-void function" +annotateStatement (DFunc mrt _ _ _) db (SReturn (Just expr)) = do +    expr' <- annotateExpr db expr +    case mrt of +        Nothing -> Left $ "Cannot return non-void value from void function" +        Just rt -> do +            when (isNothing (typeof expr')) $ +                Left $ "Cannot use void value in 'return'" +            when (not $ canCoerce (fromJust (typeof expr')) rt) $ +                Left $ "Cannot coerce type " ++ pretty (fromJust (typeof expr')) ++ " to " ++ pretty rt ++ +                       " in 'return'" +            return (db, SReturn (Just expr')) +annotateStatement _ db (SExpr expr) = (\expr' -> (db, SExpr expr')) <$> annotateExpr db expr + +annotateExpr :: TypeDB -> Expression -> Error Expression +annotateExpr db (EBin bo e1 e2 _) = do +    e1' <- annotateExpr db e1 +    e2' <- annotateExpr db e2 +    when (isNothing (typeof e1')) $ Left $ "Use of void value in expression: " ++ show e1' +    when (isNothing (typeof e2')) $ Left $ "Use of void value in expression: " ++ show e2' +    let t1 = fromJust $ typeof e1' +        t2 = fromJust $ typeof e2' +    rt <- let errval = Left $ "Operator " ++ pretty bo ++ " doesn't take" ++ +                              " arguments of types " ++ pretty t1 ++ " and " ++ +                              pretty t2 +          in maybe errval return $ retTypeBO bo t1 t2 +    return $ EBin bo e1' e2' (Just rt) +annotateExpr db (EUn uo e _) = do +    e' <- annotateExpr db e +    when (isNothing (typeof e')) $ Left "Use of void value in expression" +    let t = fromJust $ typeof e' +    rt <- let errval = Left $ "Unary operator " ++ pretty uo ++ " doesn't take" ++ +                              " an argument of type " ++ pretty t +          in maybe errval return $ retTypeUO uo t +    return $ EUn uo e' (Just rt) +annotateExpr _ (ELit lit@(LInt _) _) = return $ ELit lit (Just TInt) +annotateExpr _ (ELit lit@(LChar _) _) = return $ ELit lit (Just TChar) +annotateExpr db (ELit lit@(LVar n) _) = case dbFind db n of +    Nothing -> Left $ "Use of undeclared variable " ++ n ++ " in expression" +    Just (DBVar t) -> return $ ELit lit (Just t) +    Just (DBFunc _ _) -> Left $ "Cannot use function " ++ n ++ " in expression" +annotateExpr db (ELit (LCall n as) _) = do +    as' <- mapM (annotateExpr db) as +    case dbFind db n of +        Nothing -> Left $ "Use of undeclared function " ++ n +        Just (DBVar t) -> Left $ "Cannot call variable of type " ++ pretty t +        Just (DBFunc mrt ats) -> do +            when (length as' /= length ats) $ +                Left $ "Function '" ++ n ++ "' expected " ++ show (length ats) ++ +                       " arguments but got " ++ show (length as') +            forM_ (zip as' ats) $ \(arg, at) -> do +                when (isNothing (typeof arg)) $ +                        Left "Use of void value in function argument" +                if canCoerce (fromJust $ typeof arg) at +                    then return () +                    else Left $ "Argument of " ++ n ++ " has type " ++ pretty at ++ +                                " but value of type " ++ pretty (fromJust $ typeof arg) ++ +                                " was given" +            return $ ELit (LCall n as') mrt +annotateExpr db (ESubscript arr sub _) = do +    arr' <- annotateExpr db arr +    sub' <- annotateExpr db sub +    let subtyp = fromJust (typeof sub') +    when (subtyp /= TInt) $ +        Left $ "Type of array subscript should be int, but is " ++ pretty subtyp +    case fromJust (typeof arr') of +        TArr et _ -> return $ ESubscript arr' sub' (Just et) +        _ -> Left $ "Subscripted expression is not an array: " ++ pretty arr +annotateExpr db (ECast t e) = do +    e' <- annotateExpr db e +    let typ = fromJust (typeof e') +    if canCast typ t +        then return $ ECast t e' +        else Left $ "Cannot cast value of type " ++ pretty typ ++ " to type " ++ pretty t +annotateExpr db e@(ENew t sze) = do +    sze' <- annotateExpr db sze +    case typeof sze' of +        Nothing -> Left $ "Use of void value in array length in 'new' expression: " ++ pretty e +        Just TInt -> return () +        Just szet -> Left $ "Type of array length in 'new' expression should be int, is " ++ pretty szet +    if isBasicType t +        then return $ ENew t sze' +        else Left $ "Can only allocate arrays of basic types using 'new': " ++ pretty e + +annotateAsExpr :: TypeDB -> AsExpression -> Error AsExpression +annotateAsExpr db (AEVar n _) = case dbFind db n of +    Nothing -> Left $ "Use of undeclared variable " ++ n ++ " in assignment expression" +    Just (DBVar t) -> return $ AEVar n (Just t) +    Just (DBFunc _ _) -> Left $ "Cannot use function " ++ n ++ " in assignment expression" +annotateAsExpr db (AESubscript ae expr _) = do +    ae' <- annotateAsExpr db ae +    expr' <- annotateExpr db expr +    case typeof expr' of +        Nothing -> Left $ "Use of void value in array index in assignment expression" +        Just TInt -> return () +        Just t -> Left $ "Use of non-int type " ++ pretty t ++ " in array index in assignment expression" +    case fromJust (typeof ae') of +        TArr t _ -> return $ AESubscript ae' expr' (Just t) +        t -> Left $ "Indexed expression '" ++ pretty ae ++ "' has non-array type " ++ pretty t ++ +                    " in assignment expression" diff --git a/TypeRules.hs b/TypeRules.hs new file mode 100644 index 0000000..a3e7678 --- /dev/null +++ b/TypeRules.hs @@ -0,0 +1,61 @@ +module TypeRules where + +import AST + + +canCoerce :: Type -> Type -> Bool +canCoerce t1 t2 | t1 == t2 = True +canCoerce (TArr t1 (Just _)) (TArr t2 Nothing) | t1 == t2 = True +canCoerce _ _ = False + +canCast :: Type -> Type -> Bool +canCast t1 t2 | canCoerce t1 t2 = True +canCast t1 t2 | isIntegralType t1 && isIntegralType t2 = True +canCast _ _ = False + +isBasicType :: Type -> Bool +isBasicType = isIntegralType + +isIntegralType :: Type -> Bool +isIntegralType TInt = True +isIntegralType TChar = True +isIntegralType _ = False + +isSimpleArithBO :: BinaryOp -> Bool +isSimpleArithBO = flip elem [BOAdd, BOSub, BOMul, BODiv, BOMod] + +isBoolBO :: BinaryOp -> Bool +isBoolBO = flip elem [BOAnd, BOOr] + +isCompareBO :: BinaryOp -> Bool +isCompareBO = flip elem [BOEq, BONeq, BOGt, BOLt, BOGeq, BOLeq] + +retTypeBO :: BinaryOp -> Type -> Type -> Maybe Type +retTypeBO bo TInt TInt | isSimpleArithBO bo = Just TInt +retTypeBO bo TChar TChar | isSimpleArithBO bo = Just TChar +retTypeBO bo TInt TInt | isBoolBO bo = Just TInt +retTypeBO bo TInt TInt | isCompareBO bo = Just TInt +retTypeBO bo TChar TChar | isCompareBO bo = Just TInt +retTypeBO _ _ _ = Nothing +-- retTypeBO bo t1 t2 = error $ "retTypeBO " ++ show bo ++ " " ++ show t1 ++ " " ++ show t2 + +retTypeUO :: UnaryOp -> Type -> Maybe Type +retTypeUO UONot TInt = Just TInt +retTypeUO UONeg TInt = Just TInt +retTypeUO UONeg TChar = Just TChar +retTypeUO _ _ = Nothing + +class TypeOf a where +    typeof :: a -> Maybe Type + +instance TypeOf Expression where +    typeof (EBin _ _ _ mt) = mt +    typeof (EUn _ _ mt) = mt +    typeof (ELit _ mt) = mt +    typeof (ESubscript _ _ mt) = mt +    typeof (ECast t _) = Just t +    typeof (ENew t _) = Just $ TArr t Nothing + +instance TypeOf AsExpression where +    typeof (AEVar _ mt) = mt +    typeof (AESubscript _ _ mt) = mt diff --git a/Utils.hs b/Utils.hs new file mode 100644 index 0000000..65bb651 --- /dev/null +++ b/Utils.hs @@ -0,0 +1,7 @@ +module Utils where + + +uniq :: Eq a => [a] -> [a] +uniq (a:b:cs) | a == b = uniq (b:cs) +              | otherwise = a : uniq (b:cs) +uniq l = l diff --git a/Verify.hs b/Verify.hs new file mode 100644 index 0000000..18f90ce --- /dev/null +++ b/Verify.hs @@ -0,0 +1,28 @@ +module Verify(verify) where + +import AST +import Defs +import Intermediate + + +verify :: IRProgram -> Error IRProgram +verify (IRProgram vars funcs) = IRProgram <$> mapM verifyVar vars <*> mapM verifyFunc funcs + +verifyVar :: DVar -> Error DVar +verifyVar dvar@(DVar _ n e) = case e of +    ELit _ _ -> return dvar +    _ -> Left $ "Initialisation of global variable " ++ n ++ " is not a literal" + +verifyFunc :: IRFunc -> Error IRFunc +verifyFunc irfunc@(IRFunc mrt name _ bbs _) = +    let terms = map (\(BB _ _ term) -> term) bbs +        (nret', nretr') = unzip $ flip map terms $ \term -> case term of +            IRet -> (1, 0) +            IRetr _ -> (0, 1) +            _ -> (0, 0) +        (nret, nretr) = (sum nret', sum nretr') :: (Int, Int) +    in case (mrt, nret, nretr) of +        (Nothing, _, 0) -> return irfunc +        (Just _, 0, _) -> return irfunc +        (Nothing, _, _) -> Left $ "Some code paths of void function '" ++ name ++ "' return a value" +        (Just _, _, _) -> Left $ "Not all code paths of non-void function '" ++ name ++ "' return a value" @@ -0,0 +1,275 @@ +module X64 where + +import Control.Monad +import Data.Char +import Data.Int +import Data.List +import Data.Maybe + + +type Offset = Int64 + +class Stringifiable a where +    stringify :: a -> String + +data Register = RAX | RBX | RCX | RDX | RSI | RDI | RSP | RBP | R8 | R9 | R10 | R11 | R12 | R13 | R14 | R15 +  deriving (Show, Eq, Ord, Enum, Bounded) + +data XRef = XReg Int Register | XMem Int (Maybe Register) (Int, Register) (Maybe String) Offset | XImm Offset +  deriving (Show, Eq) + +newtype Reg = Reg XRef deriving (Show, Eq) +newtype Mem = Mem XRef deriving (Show, Eq) +newtype Imm = Imm XRef deriving (Show, Eq) +newtype RegMem = RegMem XRef deriving (Show, Eq) +newtype RegMemImm = RegMemImm XRef deriving (Show, Eq) + +data CondCode = CCA | CCAE | CCB | CCBE | CCC | CCE | CCG | CCGE | CCL | CCLE | CCNA | CCNAE | CCNB | CCNBE | CCNC | CCNE | CCNG | CCNGE | CCNL | CCNLE +  deriving (Show, Eq) + +data Ins +    = MOV RegMem RegMem | MOVi RegMem Imm | MOVi64 Reg Imm +    | MOVSX Reg RegMem +    | ADD RegMem RegMemImm +    | SUB RegMem RegMemImm +    | AND RegMem RegMemImm +    | OR RegMem RegMemImm +    | XOR RegMem RegMemImm +    | IMULDA RegMem | IMUL Reg RegMem | IMUL3 Reg RegMem Imm +    | MULDA RegMem +    | IDIVDA RegMem +    | DIVDA RegMem +    | CMP RegMem RegMem | CMPi RegMem Imm +    | SETCC CondCode RegMem +    | CALL String +    | PUSH RegMemImm +    | POP RegMem +    | JMP String +    | JCC CondCode String +    | RET +  deriving (Show, Eq) + +data Asm = Asm [(String, [Ins])] +  deriving (Show, Eq) + + +class XRefSub a where +    xref :: XRef -> a + +instance XRefSub Reg where +    xref x@(XReg _ _) = Reg x +    xref _ = undefined +instance XRefSub Mem where +    xref x@(XMem _ _ _ _ _) = Mem x +    xref _ = undefined +instance XRefSub Imm where +    xref x@(XImm _) = Imm x +    xref _ = undefined +instance XRefSub RegMem where +    xref x@(XReg _ _) = RegMem x +    xref x@(XMem _ _ _ _ _) = RegMem x +    xref _ = undefined +instance XRefSub RegMemImm where +    xref x = RegMemImm x + + +verify :: Asm -> Either String () +verify (Asm funcs) = mapM_ (\(_, inss) -> mapM_ goI inss) funcs +  where +    goI :: Ins -> Either String () +    goI (MOV (RegMem a) (RegMem b)) = ckRegMem a >> ckRegMem b >> ck2mem a b >> ckSizes a b +    goI (MOVi (RegMem a) (Imm b)) = ckRegMem a >> ckImm b >> ckSizes a b +    goI (MOVi64 (Reg a) (Imm b)) = ckReg a >> ckImm b >> ckSizes64 a b +    goI (MOVSX (Reg a) (RegMem b)) = ckReg a >> ckRegMem b >> ckMovsx a b +    goI (ADD (RegMem a) (RegMemImm b)) = ckRegMem a >> ckRegMemImm b >> ck2mem a b >> ckSizes a b +    goI (SUB (RegMem a) (RegMemImm b)) = ckRegMem a >> ckRegMemImm b >> ck2mem a b >> ckSizes a b +    goI (AND (RegMem a) (RegMemImm b)) = ckRegMem a >> ckRegMemImm b >> ck2mem a b >> ckSizes a b +    goI (OR (RegMem a) (RegMemImm b)) = ckRegMem a >> ckRegMemImm b >> ck2mem a b >> ckSizes a b +    goI (XOR (RegMem a) (RegMemImm b)) = ckRegMem a >> ckRegMemImm b >> ck2mem a b >> ckSizes a b +    goI (IMULDA (RegMem a)) = ckRegMem a +    goI (IMUL (Reg a) (RegMem b)) = ckReg a >> ckRegMem b >> ck2mem a b >> ckSizes a b +    goI (IMUL3 (Reg a) (RegMem b) (Imm c)) = ckReg a >> ckRegMem b >> ckImm c >> ckSizes a b >> ckSizes a c +    goI (MULDA (RegMem a)) = ckRegMem a +    goI (IDIVDA (RegMem a)) = ckRegMem a +    goI (DIVDA (RegMem a)) = ckRegMem a +    goI (CMP (RegMem a) (RegMem b)) = ckRegMem a >> ckRegMem b >> ck2mem a b >> ckSizes a b +    goI (CMPi (RegMem a) (Imm b)) = ckRegMem a >> ckImm b >> ckSizes a b +    goI (SETCC _ (RegMem a)) = ckRegMem a >> ckSizeEq 1 a +    goI (CALL s) = when (null s) $ Left "Empty call target" +    goI (PUSH (RegMemImm a)) = ckRegMemImm a +    goI (POP (RegMem a)) = ckRegMem a +    goI (JMP s) = when (null s) $ Left "Empty jump target" +    goI (JCC _ s) = when (null s) $ Left "Empty jcc target" +    goI RET = return () + +    ckReg (XReg _ _) = return () +    ckReg _ = Left "Argument is not a Reg" + +    ckImm (XImm _) = return () +    ckImm _ = Left "Argument is not an Imm" + +    ckRegMem (XReg _ _) = return () +    ckRegMem (XMem _ _ _ _ _) = return () +    ckRegMem _ = Left "Argument is not a Reg or a Mem" + +    ckRegMemImm _ = return () + +    ck2mem x@(XMem _ _ _ _ _) y@(XMem _ _ _ _ _) = +        Left $ "Invalid double-memory operands: " ++ show x ++ "; " ++ show y +    ck2mem _ _ = return () + +    ckSizes64 x@(XReg a _) y@(XReg b _) = +        when (a /= b) $ Left $ "Inconsistent operand sizes: " ++ show x ++ "; " ++ show y +    ckSizes64 x@(XReg a _) y@(XMem b _ _ _ _) = +        when (a /= b) $ Left $ "Inconsistent operand sizes: " ++ show x ++ "; " ++ show y +    ckSizes64 x@(XMem a _ _ _ _) y@(XReg b _) = +        when (a /= b) $ Left $ "Inconsistent operand sizes: " ++ show x ++ "; " ++ show y +    ckSizes64 x@(XMem a _ _ _ _) y@(XMem b _ _ _ _) = +        when (a /= b) $ Left $ "Inconsistent operand sizes: " ++ show x ++ "; " ++ show y +    ckSizes64 x@(XReg a _) y@(XImm v) = +        when (not $ fitsIn v a) $ Left $ "Immediate too large: " ++ show x ++ "; " ++ show y +    ckSizes64 x@(XMem a _ _ _ _) y@(XImm v) = +        when (not $ fitsIn v a) $ Left $ "Immediate too large: " ++ show x ++ "; " ++ show y +    ckSizes64 _ _ = undefined + +    ckSizes a b@(XImm v) = when (v >= 2 ^ (32::Int)) (Left "Immediate too large") >> ckSizes64 a b +    ckSizes a b = ckSizes64 a b + +    ckMovsx (XReg a _) (XReg b _) = when (a <= b) $ Left "MOVSX with shrinking operands" +    ckMovsx (XReg a _) (XMem b _ _ _ _) = when (a <= b) $ Left "MOVSX with shrinking operands" +    ckMovsx (XMem a _ _ _ _) (XReg b _) = when (a <= b) $ Left "MOVSX with shrinking operands" +    ckMovsx (XMem a _ _ _ _) (XMem b _ _ _ _) = when (a <= b) $ Left "MOVSX with shrinking operands" +    ckMovsx _ _ = undefined + +    ckSizeEq sz (XReg a _) = when (a /= sz) $ Left "Invalid operand size" +    ckSizeEq sz (XMem a _ _ _ _) = when (a /= sz) $ Left "Invalid operand size" +    ckSizeEq sz (XImm v) = when (v >= 2 ^ (8 * sz)) $ Left "Invalid operand size (immediate)" + +    fitsIn v sz = (fromIntegral v :: Integer) < ((2 :: Integer) ^ (8 * sz)) + + +instance Stringifiable XRef where +    stringify (XReg sz reg) = +        let n = fromEnum reg +        in case n of +               _ | n < 4 -> erPrefix $ lxSuffix $ [chr (n + ord 'a')] +                 | n < 8 -> erPrefix $ lSuffix $ ["si", "di", "sp", "bp"] !! (n - 4) +                 | otherwise -> bwdSuffix $ 'r' : show n +      where +        erPrefix s = case sz of +            4 -> 'e' : s +            8 -> 'r' : s +            _ -> s +        lxSuffix s = case sz of +            1 -> s ++ "l" +            _ -> s ++ "x" +        lSuffix s = case sz of +            1 -> s ++ "l" +            _ -> s +        bwdSuffix s = case sz of +            1 -> s ++ "b" +            2 -> s ++ "w" +            4 -> s ++ "d" +            _ -> s + +    stringify (XMem _ _ (mult, _) _ _) | not (mult `elem` [0,1,2,4,8]) = +        error $ "Register multiplier has invalid value " ++ show mult ++ " in XMem" +    stringify (XMem sz mr pair lab off) = +        let res = intercalate "+" $ catMaybes [goR1 mr, goPair pair, goLab lab, goOff off] +        in szword sz ++ " " ++ if null res then "[0]" else "[" ++ res ++ "]" +      where +        szword 1 = "byte" +        szword 2 = "word" +        szword 4 = "dword" +        szword 8 = "qword" +        szword _ = undefined +        goR1 Nothing = Nothing +        goR1 (Just r) = Just $ stringify (XReg 8 r) +        goPair (0, _) = Nothing +        goPair (mult, r) = Just $ show mult ++ "*" ++ stringify (XReg 8 r) +        goLab = id +        goOff 0 = Nothing +        goOff o = Just $ show o + +    stringify (XImm imm) = show imm + +instance Stringifiable Reg where stringify (Reg x) = stringify x +instance Stringifiable Mem where stringify (Mem x) = stringify x +instance Stringifiable Imm where stringify (Imm x) = stringify x +instance Stringifiable RegMem where stringify (RegMem x) = stringify x +instance Stringifiable RegMemImm where stringify (RegMemImm x) = stringify x + +instance Stringifiable CondCode where +    stringify CCA = "a" +    stringify CCAE = "ae" +    stringify CCB = "b" +    stringify CCBE = "be" +    stringify CCC = "c" +    stringify CCE = "e" +    stringify CCG = "g" +    stringify CCGE = "ge" +    stringify CCL = "l" +    stringify CCLE = "le" +    stringify CCNA = "na" +    stringify CCNAE = "nae" +    stringify CCNB = "nb" +    stringify CCNBE = "nbe" +    stringify CCNC = "nc" +    stringify CCNE = "ne" +    stringify CCNG = "ng" +    stringify CCNGE = "nge" +    stringify CCNL = "nl" +    stringify CCNLE = "nle" + +instance Stringifiable Ins where +    stringify (MOV a b) = "mov " ++ stringify a ++ ", " ++ stringify b +    stringify (MOVi a b) = "mov " ++ stringify a ++ ", " ++ stringify b +    stringify (MOVi64 a b) = "mov " ++ stringify a ++ ", " ++ stringify b +    stringify (MOVSX a b@(RegMem bx)) = case compare (xrefGetSize bx) 4 of +        EQ -> "movsxd " ++ stringify a ++ ", " ++ stringify b +        LT -> "movsx " ++ stringify a ++ ", " ++ stringify b +        GT -> undefined +    stringify (ADD a b) = "add " ++ stringify a ++ ", " ++ stringify b +    stringify (SUB a b) = "sub " ++ stringify a ++ ", " ++ stringify b +    stringify (AND a b) = "and " ++ stringify a ++ ", " ++ stringify b +    stringify (OR a b) = "or " ++ stringify a ++ ", " ++ stringify b +    stringify (XOR a b) = "xor " ++ stringify a ++ ", " ++ stringify b +    stringify (IMULDA a) = "imul " ++ stringify a +    stringify (IMUL a b) = "imul " ++ stringify a ++ ", " ++ stringify b +    stringify (IMUL3 a b c) = "imul " ++ stringify a ++ ", " ++ stringify b ++ ", " ++ stringify c +    stringify (MULDA a) = "mul " ++ stringify a +    stringify (IDIVDA a) = "idiv " ++ stringify a +    stringify (DIVDA a) = "div " ++ stringify a +    stringify (CMP a b) = "cmp " ++ stringify a ++ ", " ++ stringify b +    stringify (CMPi a b) = "cmp " ++ stringify a ++ ", " ++ stringify b +    stringify (SETCC cc a) = "set" ++ stringify cc ++ " " ++ stringify a +    stringify (CALL a) = "call " ++ a +    stringify (PUSH a) = "push " ++ stringify a +    stringify (POP a) = "pop " ++ stringify a +    stringify (JMP s) = "jmp " ++ s +    stringify (JCC cc s) = "j" ++ stringify cc ++ " " ++ s +    stringify RET = "ret" + +instance Stringifiable Asm where +    stringify (Asm funcs) = intercalate "\n" $ map goF funcs +      where +        goF :: (String, [Ins]) -> String +        goF (name, inss) = name ++ ":\n" ++ unlines (map (('\t' :) . stringify) inss) + +xrefGetSize :: XRef -> Int +xrefGetSize (XReg s _) = s +xrefGetSize (XMem s _ _ _ _) = s +xrefGetSize (XImm _) = undefined + +xrefSetSize :: Int -> XRef -> XRef +xrefSetSize sz (XReg _ r) = XReg sz r +xrefSetSize sz (XMem _ a b c d) = XMem sz a b c d +xrefSetSize _ x@(XImm _) = x + +isXMem :: XRef -> Bool +isXMem (XMem _ _ _ _ _) = True +isXMem _ = False + +isXImm :: XRef -> Bool +isXImm (XImm _) = True +isXImm _ = False diff --git a/arrays.lang b/arrays.lang new file mode 100644 index 0000000..8cab05f --- /dev/null +++ b/arrays.lang @@ -0,0 +1,7 @@ +func int main() { +	int[] arr := new int[5]; +	arr[0] = 97; +	arr[1] = 98; +	putc(char(arr[0])); +	return 0; +} @@ -0,0 +1,105 @@ +func putstr(char[] str) { +	int i := 0; +	while (str[i] != '\0') { +		putc(str[i]); +		i = i + 1; +	} +} + +func int strlen(char[] str) { +	int i := 0; +	char c := str[i]; +	while (c != '\0') { +		i = i + 1; +		c = str[i]; +	} +	return i; +} + +func int[] makejumpmap(char[] src, int srclen) { +	int[] jm := new int[srclen]; +	int[] stack := new int[srclen]; +	int stkp := 0; +	int i := 0; +	while (i < srclen) { +		putint(i); putc(' '); putint(stkp); putc(' '); putc(src[i]); putc('\n'); +		if (src[i] == '[') { +			stack[stkp] = i; +			stkp = stkp + 1; +		} +		if (src[i] == ']') { +			stkp = stkp - 1; +			int j := stack[stkp]; +			jm[i] = j; +			jm[j] = i; +		} +		i = i + 1; +	} +	/*i = 0; +	while (i < srclen) { +		putint(jm[i]); +		putc(' '); +		i = i + 1; +	} +	putc('\n');*/ +	return jm; +} + +func interpret(char[] src, int srclen) { +	int[] jm := makejumpmap(src, srclen); +	char[] mem := new char[4088]; +	int ip := 0; +	int memp := 0; +	while (src[ip] != '\0') { +		// putint(ip); putc('\n'); +		char c := src[ip]; +		if (c == '+') {mem[memp] = mem[memp] + char(1);} +		if (c == '-') {mem[memp] = mem[memp] - char(1);} +		if (c == '>') {memp = memp + 1;} +		if (c == '<') {memp = memp - 1;} +		if (c == '.') {putc(mem[memp]);} +		if (c == ',') {mem[memp] = char(getc());} +		if (c == '[') { +			if (mem[memp] == '\0') { +				ip = jm[ip]; +			} +		} +		if (c == ']') { +			if (mem[memp] != '\0') { +				ip = jm[ip]; +			} +		} +		ip = ip + 1; +	} + +	memp = 0; +	while (memp < 10) { +		putint(int(mem[memp])); putc(' '); +		memp = memp + 1; +	} +	putc('\n'); +} + +func int main() { +	int bufsize := 4088; +	char[] source := new char[4088]; + +	int sourcelen := 0; +	int done := 0; +	while (done != 1) { +		int c := getc(); +		if (c < 0) { +			done = 1; +		} else { +			source[sourcelen] = char(c); +			sourcelen = sourcelen + 1; +			if (sourcelen >= bufsize - 1) { +				done = 1; +			} +		} +	} +	source[sourcelen] = '\0'; + +	interpret(source, sourcelen); +	return 0; +} diff --git a/chartest.lang b/chartest.lang new file mode 100644 index 0000000..208bcab --- /dev/null +++ b/chartest.lang @@ -0,0 +1,19 @@ +int glob := 42; + +func f(char x) { +	putc(x); +} + +func char g() { +	return '\n'; +} + +func int main() { +	char c := 'a'; +	f(c); +	putc('\n'); +	char d := g(); +	c = d + d + d + d + d + d + d + d + 'a'; +	putc(c); +	return 0; +} diff --git a/fibo.lang b/fibo.lang new file mode 100644 index 0000000..e1a7a4a --- /dev/null +++ b/fibo.lang @@ -0,0 +1,31 @@ +func printnum(int n) { +	if (n == 0) {putc('0'); return;} +	if (n < 0) {putc('-'); n = -n;} +	while (n > 0) { +		putc('0' + char(n % 10)); +		n = n / 10; +	} +} + +func int fibo(int n) { +	int a := 0; +	int b := 1; +	int i := 0; +	while (i < n) { +		int c := a + b; +		a = b; +		b = c; +		i = i + 1; +	} +	return b; +} + +func int main() { +	int i := 0; +	while (i <= 20) { +		printnum(fibo(i)); +		putc('\n'); +		i = i + 1; +	} +	return 0; +} diff --git a/graph.png b/graph.png Binary files differnew file mode 100644 index 0000000..0356b69 --- /dev/null +++ b/graph.png diff --git a/prologue.asm b/prologue.asm new file mode 100644 index 0000000..74d9de3 --- /dev/null +++ b/prologue.asm @@ -0,0 +1,134 @@ +; SYS_EXIT  equ 0x2000001 ;code +; SYS_FORK  equ 0x2000002 ;-- +; SYS_READ  equ 0x2000003 ;fd, buf, len +; SYS_WRITE equ 0x2000004 ;fd, buf, len + +global start +default rel + +section .text +start: +	call main +	mov rdi, rax +	mov eax, 0x2000001 +	syscall +	jmp $ + +putc: +	push rax +	push rdi +	push rsi +	push rdx +	push rcx +	push r11 +	mov eax, 0x2000004 +	mov edi, 1 +	lea rsi, [rsp+56] +	mov edx, edi +	syscall +	pop r11 +	pop rcx +	pop rdx +	pop rsi +	pop rdi +	pop rax +	ret + +putint: +	push rdi +	push rsi +	push rax +	push rbx +	push rcx +	push rdx +	push r11 +	mov rax, [rsp+64] +	mov ebx, 18 +	test rax, rax +	jz .numzero + +	mov ecx, 10 + +.strlp: +	xor edx, edx +	div rcx +	add dl, '0' +	dec rbx +	mov [rsp+rbx], dl +	test rax, rax +	jnz .strlp +	jmp .strdone + +.numzero: +	dec rbx +	mov byte [rsp+rbx], '0' +	jmp .strdone + +.strdone: +	mov eax, 0x2000004 +	mov edi, 1 +	lea rsi, [rsp+rbx] +	mov edx, 18 +	sub rdx, rbx +	syscall + +	pop r11 +	pop rdx +	pop rcx +	pop rbx +	pop rax +	pop rsi +	pop rdi +	ret + +getc: +	push rdi +	push rsi +	push rdx +	push rcx +	push r11 +	mov eax, 0x2000003 +	xor edi, edi +	mov rsi, rsp +	mov edx, 1 +	syscall +	cmp rax, 1 +	jne .fail +	mov rax, [rsp] +.finish: +	pop r11 +	pop rcx +	pop rdx +	pop rsi +	pop rdi +	ret +.fail: +	mov rax, -1 +	jmp .finish + +_builtin_malloc: +	push rdi +	push rsi +	push rdx +	push r8 +	push r9 +	push r10 +	push r11 +	push rcx +	xor edi, edi +	mov rsi, [rsp+72] +	mov edx, 0x03 +	mov r10d, 0x1001 +	mov r8d, -1 +	xor r9d, r9d +	mov eax, 0x20000C5 +	syscall +	pop rcx +	pop r11 +	pop r10 +	pop r9 +	pop r8 +	pop rdx +	pop rsi +	pop rdi +	ret diff --git a/putint.lang b/putint.lang new file mode 100644 index 0000000..198b4ee --- /dev/null +++ b/putint.lang @@ -0,0 +1,4 @@ +func int main() { +	putint(-12345678); putc('\n'); +	return 0; +} diff --git a/putstr.lang b/putstr.lang new file mode 100644 index 0000000..b13e2c5 --- /dev/null +++ b/putstr.lang @@ -0,0 +1,43 @@ +func putstr(char[] str) { +	int i := 0; +	while (str[i] != '\0') { +		putc(str[i]); +		i = i + 1; +	} +} + +func int main() {return 0;} + + + +/* +irfunc putstr(char[] str) +  {{{(0) +    mov t5Q <- 0Q +    jmp 7 +  }}} +  {{{(7) +    mov t15Q <- t5Q +    add t15Q, 8Q +    mov t13Q <- astrQ +    add t13Q, t15Q +    load t16B <- *t13Q +    neq t16B, 0B +    jne t16B, 0Q -> 9 | 6 +  }}} +  {{{(9) +    mov t25Q <- t5Q +    add t25Q, 8Q +    mov t23Q <- astrQ +    add t23Q, t25Q +    load t26B <- *t23Q +    call putc (t26B) +    mov t30Q <- t5Q +    add t30Q, 1Q +    mov t5Q <- t30Q +    jmp 7 +  }}} +  {{{(6) +    ret +  }}} +*/ diff --git a/test.lang b/test.lang new file mode 100644 index 0000000..f03b3e3 --- /dev/null +++ b/test.lang @@ -0,0 +1,20 @@ +int glob := 100; + +func f() { +	glob = glob + 10; +} + +func g(int x , int y) { +	glob = glob + x + 2 * y; +} + +func int main() { +	int i := 0; /* block /* comments */ nest! */ +	// int[4] arr := 2; +	i = i + 1; +	f(); +	if (i > 0 && i < 10) { +		int i := 20; +		g(i , 2 * i); +	} +}  | 
