module X64 where import Control.Monad import Data.Functor.Identity import Data.Char import Data.Int import Data.List import Data.Maybe type Offset = Int64 class Stringifiable a where stringify :: a -> String data Register = RAX | RBX | RCX | RDX | RSI | RDI | RSP | RBP | R8 | R9 | R10 | R11 | R12 | R13 | R14 | R15 deriving (Show, Eq, Ord, Enum, Bounded) data XRef = XReg Int Register | XMem Int (Maybe Register) (Int, Register) (Maybe String) Offset | XImm Offset deriving (Show, Eq) newtype Reg = Reg XRef deriving (Show, Eq) newtype Mem = Mem XRef deriving (Show, Eq) newtype Imm = Imm XRef deriving (Show, Eq) newtype RegMem = RegMem XRef deriving (Show, Eq) newtype RegMemImm = RegMemImm XRef deriving (Show, Eq) data CondCode = CCA | CCAE | CCB | CCBE | CCC | CCE | CCG | CCGE | CCL | CCLE | CCNA | CCNAE | CCNB | CCNBE | CCNC | CCNE | CCNG | CCNGE | CCNL | CCNLE deriving (Show, Eq) data Ins = MOV RegMem RegMemImm | MOVi Reg Imm | LEA Reg Mem | MOVSX Reg RegMem | ADD RegMem RegMemImm | SUB RegMem RegMemImm | AND RegMem RegMemImm | OR RegMem RegMemImm | XOR RegMem RegMemImm | IMULDA RegMem | IMUL Reg RegMem | IMUL3 Reg RegMem Imm | MULDA RegMem | IDIVDA RegMem | DIVDA RegMem | CMP RegMem RegMem | CMPi RegMem Imm | SETCC CondCode RegMem | CALL String | PUSH RegMemImm | POP RegMem | JMP String | JCC CondCode String | RET | INT3 deriving (Show, Eq) type Func = (String, [Ins]) data Asm = Asm [Func] deriving (Show, Eq) class XRefSub a where xref :: XRef -> a instance XRefSub Reg where xref x@(XReg _ _) = Reg x xref _ = undefined instance XRefSub Mem where xref x@(XMem _ _ _ _ _) = Mem x xref _ = undefined instance XRefSub Imm where xref x@(XImm _) = Imm x xref _ = undefined instance XRefSub RegMem where xref x@(XReg _ _) = RegMem x xref x@(XMem _ _ _ _ _) = RegMem x xref x = RegMem x -- xref _ = undefined instance XRefSub RegMemImm where xref x = RegMemImm x verify :: Asm -> Either String () verify (Asm funcs) = mapM_ (\(_, inss) -> mapM_ goI inss) funcs where goI :: Ins -> Either String () goI (MOV (RegMem a) (RegMemImm b)) = ckRegMem a >> ckRegMemImm b >> ck2mem a b >> ckSizes a b goI (MOVi (Reg a) (Imm b)) = ckReg a >> ckImm b >> ckSizes64 a b goI (LEA (Reg a) (Mem b)) = ckReg a >> ckMem b >> ckSizes a b goI (MOVSX (Reg a) (RegMem b)) = ckReg a >> ckRegMem b >> ckMovsx a b goI (ADD (RegMem a) (RegMemImm b)) = ckRegMem a >> ckRegMemImm b >> ck2mem a b >> ckSizes a b goI (SUB (RegMem a) (RegMemImm b)) = ckRegMem a >> ckRegMemImm b >> ck2mem a b >> ckSizes a b goI (AND (RegMem a) (RegMemImm b)) = ckRegMem a >> ckRegMemImm b >> ck2mem a b >> ckSizes a b goI (OR (RegMem a) (RegMemImm b)) = ckRegMem a >> ckRegMemImm b >> ck2mem a b >> ckSizes a b goI (XOR (RegMem a) (RegMemImm b)) = ckRegMem a >> ckRegMemImm b >> ck2mem a b >> ckSizes a b goI (IMULDA (RegMem a)) = ckRegMem a goI (IMUL (Reg a) (RegMem b)) = ckReg a >> ckRegMem b >> ck2mem a b >> ckSizes a b goI (IMUL3 (Reg a) (RegMem b) (Imm c)) = ckReg a >> ckRegMem b >> ckImm c >> ckSizes a b >> ckSizes a c goI (MULDA (RegMem a)) = ckRegMem a goI (IDIVDA (RegMem a)) = ckRegMem a goI (DIVDA (RegMem a)) = ckRegMem a goI (CMP (RegMem a) (RegMem b)) = ckRegMem a >> ckRegMem b >> ck2mem a b >> ckSizes a b goI (CMPi (RegMem a) (Imm b)) = ckRegMem a >> ckImm b >> ckSizes a b goI (SETCC _ (RegMem a)) = ckRegMem a >> ckSizeEq 1 a goI (CALL s) = when (null s) $ Left "Empty call target" goI (PUSH (RegMemImm a)) = ckRegMemImm a goI (POP (RegMem a)) = ckRegMem a goI (JMP s) = when (null s) $ Left "Empty jump target" goI (JCC _ s) = when (null s) $ Left "Empty jcc target" goI RET = return () goI INT3 = return () ckReg (XReg _ _) = return () ckReg _ = Left "Argument is not a Reg" ckMem (XMem _ _ _ _ _) = return () ckMem _ = Left "Argument is not a Mem" ckImm (XImm _) = return () ckImm _ = Left "Argument is not an Imm" ckRegMem (XReg _ _) = return () ckRegMem (XMem _ _ _ _ _) = return () ckRegMem _ = Left "Argument is not a Reg or a Mem" ckRegMemImm _ = return () ck2mem x@(XMem _ _ _ _ _) y@(XMem _ _ _ _ _) = Left $ "Invalid double-memory operands: " ++ show x ++ "; " ++ show y ck2mem _ _ = return () ckSizes64 x@(XReg a _) y@(XReg b _) = when (a /= b) $ Left $ "Inconsistent operand sizes: " ++ show x ++ "; " ++ show y ckSizes64 x@(XReg a _) y@(XMem b _ _ _ _) = when (a /= b) $ Left $ "Inconsistent operand sizes: " ++ show x ++ "; " ++ show y ckSizes64 x@(XMem a _ _ _ _) y@(XReg b _) = when (a /= b) $ Left $ "Inconsistent operand sizes: " ++ show x ++ "; " ++ show y ckSizes64 x@(XMem a _ _ _ _) y@(XMem b _ _ _ _) = when (a /= b) $ Left $ "Inconsistent operand sizes: " ++ show x ++ "; " ++ show y ckSizes64 x@(XReg a _) y@(XImm v) = when (not $ fitsIn v a) $ Left $ "Immediate too large: " ++ show x ++ "; " ++ show y ckSizes64 x@(XMem a _ _ _ _) y@(XImm v) = when (not $ fitsIn v a) $ Left $ "Immediate too large: " ++ show x ++ "; " ++ show y ckSizes64 _ _ = undefined ckSizes a b@(XImm v) = when (v >= 2 ^ (32::Int)) (Left "Immediate too large") >> ckSizes64 a b ckSizes a b = ckSizes64 a b ckMovsx (XReg a _) (XReg b _) = when (a <= b) $ Left "MOVSX with shrinking operands" ckMovsx (XReg a _) (XMem b _ _ _ _) = when (a <= b) $ Left "MOVSX with shrinking operands" ckMovsx (XMem a _ _ _ _) (XReg b _) = when (a <= b) $ Left "MOVSX with shrinking operands" ckMovsx (XMem a _ _ _ _) (XMem b _ _ _ _) = when (a <= b) $ Left "MOVSX with shrinking operands" ckMovsx _ _ = undefined ckSizeEq sz (XReg a _) = when (a /= sz) $ Left "Invalid operand size" ckSizeEq sz (XMem a _ _ _ _) = when (a /= sz) $ Left "Invalid operand size" ckSizeEq sz (XImm v) = when (v >= 2 ^ (8 * sz)) $ Left "Invalid operand size (immediate)" fitsIn v sz = (fromIntegral v :: Integer) < ((2 :: Integer) ^ (8 * sz)) instance Stringifiable XRef where stringify (XReg sz reg) = let n = fromEnum reg in case n of _ | n < 4 -> erPrefix $ lxSuffix $ [chr (n + ord 'a')] | n < 8 -> erPrefix $ lSuffix $ ["si", "di", "sp", "bp"] !! (n - 4) | otherwise -> bwdSuffix $ 'r' : show n where erPrefix s = case sz of 4 -> 'e' : s 8 -> 'r' : s _ -> s lxSuffix s = case sz of 1 -> s ++ "l" _ -> s ++ "x" lSuffix s = case sz of 1 -> s ++ "l" _ -> s bwdSuffix s = case sz of 1 -> s ++ "b" 2 -> s ++ "w" 4 -> s ++ "d" _ -> s stringify (XMem _ _ (mult, _) _ _) | not (mult `elem` [0,1,2,4,8]) = error $ "Register multiplier has invalid value " ++ show mult ++ " in XMem" stringify x@(XMem sz _ _ _ _) = szword sz ++ " " ++ stringify_only_xmem_brackets x where szword 1 = "byte" szword 2 = "word" szword 4 = "dword" szword 8 = "qword" szword s = error $ "Invalid (szword " ++ show s ++ ") in stringify XMem" stringify (XImm imm) = show imm stringify_only_xmem_brackets :: XRef -> String stringify_only_xmem_brackets (XMem _ mr pair lab off) = let res = intercalate "+" (catMaybes [goR1 mr, goPair pair, goLab lab]) ++ goOff off in if null res then "[0]" else "[" ++ res ++ "]" where goR1 Nothing = Nothing goR1 (Just r) = Just $ stringify (XReg 8 r) goPair (0, _) = Nothing goPair (mult, r) = Just $ show mult ++ "*" ++ stringify (XReg 8 r) goLab = fmap ('$' :) goOff o | o > 0 = '+' : show o | o < 0 = show o | otherwise = "" stringify_only_xmem_brackets _ = undefined instance Stringifiable Reg where stringify (Reg x) = stringify x instance Stringifiable Mem where stringify (Mem x) = stringify x instance Stringifiable Imm where stringify (Imm x) = stringify x instance Stringifiable RegMem where stringify (RegMem x) = stringify x instance Stringifiable RegMemImm where stringify (RegMemImm x) = stringify x instance Stringifiable CondCode where stringify CCA = "a" stringify CCAE = "ae" stringify CCB = "b" stringify CCBE = "be" stringify CCC = "c" stringify CCE = "e" stringify CCG = "g" stringify CCGE = "ge" stringify CCL = "l" stringify CCLE = "le" stringify CCNA = "na" stringify CCNAE = "nae" stringify CCNB = "nb" stringify CCNBE = "nbe" stringify CCNC = "nc" stringify CCNE = "ne" stringify CCNG = "ng" stringify CCNGE = "nge" stringify CCNL = "nl" stringify CCNLE = "nle" instance Stringifiable Ins where stringify (MOV a b) = "mov " ++ stringify a ++ ", " ++ stringify b stringify (MOVi a b) = "mov " ++ stringify a ++ ", " ++ stringify b stringify (LEA a (Mem b)) = "lea " ++ stringify a ++ ", " ++ stringify_only_xmem_brackets b stringify (MOVSX a b@(RegMem bx)) = case compare (xrefGetSize bx) 4 of EQ -> "movsxd " ++ stringify a ++ ", " ++ stringify b LT -> "movsx " ++ stringify a ++ ", " ++ stringify b GT -> undefined stringify (ADD a b) = "add " ++ stringify a ++ ", " ++ stringify b stringify (SUB a b) = "sub " ++ stringify a ++ ", " ++ stringify b stringify (AND a b) = "and " ++ stringify a ++ ", " ++ stringify b stringify (OR a b) = "or " ++ stringify a ++ ", " ++ stringify b stringify (XOR a b) = "xor " ++ stringify a ++ ", " ++ stringify b stringify (IMULDA a) = "imul " ++ stringify a stringify (IMUL a b) = "imul " ++ stringify a ++ ", " ++ stringify b stringify (IMUL3 a b c) = "imul " ++ stringify a ++ ", " ++ stringify b ++ ", " ++ stringify c stringify (MULDA a) = "mul " ++ stringify a stringify (IDIVDA a) = "idiv " ++ stringify a stringify (DIVDA a) = "div " ++ stringify a stringify (CMP a b) = "cmp " ++ stringify a ++ ", " ++ stringify b stringify (CMPi a b) = "cmp " ++ stringify a ++ ", " ++ stringify b stringify (SETCC cc a) = "set" ++ stringify cc ++ " " ++ stringify a stringify (CALL a) = "call $" ++ a stringify (PUSH a) = "push " ++ stringify a stringify (POP a) = "pop " ++ stringify a stringify (JMP s) = "jmp $" ++ s stringify (JCC cc s) = "j" ++ stringify cc ++ " $" ++ s stringify RET = "ret" stringify INT3 = "int3" instance Stringifiable Asm where stringify (Asm funcs) = intercalate "\n" $ map goF funcs where goF :: (String, [Ins]) -> String goF (name, inss) = "$" ++ name ++ ":\n" ++ unlines (map (('\t' :) . stringify) inss) xrefGetSize :: XRef -> Int xrefGetSize (XReg s _) = s xrefGetSize (XMem s _ _ _ _) = s xrefGetSize (XImm _) = undefined xrefSetSize :: Int -> XRef -> XRef xrefSetSize sz (XReg _ r) = XReg sz r xrefSetSize sz (XMem _ a b c d) = XMem sz a b c d xrefSetSize _ x@(XImm _) = x offsetXMem :: Offset -> XRef -> XRef offsetXMem off (XMem sz mr tup lbl o) = XMem sz mr tup lbl (o + off) offsetXMem _ _ = undefined isXReg :: XRef -> Bool isXReg (XReg _ _) = True isXReg _ = False isXMem :: XRef -> Bool isXMem (XMem _ _ _ _ _) = True isXMem _ = False isXImm :: XRef -> Bool isXImm (XImm _) = True isXImm _ = False xrefMapM :: Monad m => (XRef -> m XRef) -> Ins -> m Ins xrefMapM f (MOV (RegMem x) (RegMemImm y)) = MOV <$> (RegMem <$> f x) <*> (RegMemImm <$> f y) xrefMapM f (MOVi (Reg x) (Imm y)) = MOVi <$> (Reg <$> f x) <*> (Imm <$> f y) xrefMapM f (LEA (Reg x) (Mem y)) = LEA <$> (Reg <$> f x) <*> (Mem <$> f y) xrefMapM f (MOVSX (Reg x) (RegMem y)) = MOVSX <$> (Reg <$> f x) <*> (RegMem <$> f y) xrefMapM f (ADD (RegMem x) (RegMemImm y)) = ADD <$> (RegMem <$> f x) <*> (RegMemImm <$> f y) xrefMapM f (SUB (RegMem x) (RegMemImm y)) = SUB <$> (RegMem <$> f x) <*> (RegMemImm <$> f y) xrefMapM f (AND (RegMem x) (RegMemImm y)) = AND <$> (RegMem <$> f x) <*> (RegMemImm <$> f y) xrefMapM f (OR (RegMem x) (RegMemImm y)) = OR <$> (RegMem <$> f x) <*> (RegMemImm <$> f y) xrefMapM f (XOR (RegMem x) (RegMemImm y)) = XOR <$> (RegMem <$> f x) <*> (RegMemImm <$> f y) xrefMapM f (IMULDA (RegMem x)) = IMULDA <$> (RegMem <$> f x) xrefMapM f (IMUL (Reg x) (RegMem y)) = IMUL <$> (Reg <$> f x) <*> (RegMem <$> f y) xrefMapM f (IMUL3 (Reg x) (RegMem y) (Imm z)) = IMUL3<$>(Reg <$> f x)<*>(RegMem <$> f y)<*>(Imm <$> f z) xrefMapM f (MULDA (RegMem x)) = MULDA <$> (RegMem <$> f x) xrefMapM f (IDIVDA (RegMem x)) = IDIVDA <$> (RegMem <$> f x) xrefMapM f (DIVDA (RegMem x)) = DIVDA <$> (RegMem <$> f x) xrefMapM f (CMP (RegMem x) (RegMem y)) = CMP <$> (RegMem <$> f x) <*> (RegMem <$> f y) xrefMapM f (CMPi (RegMem x) (Imm y)) = CMPi <$> (RegMem <$> f x) <*> (Imm <$> f y) xrefMapM f (SETCC c (RegMem x)) = SETCC c <$> (RegMem <$> f x) xrefMapM _ i@(CALL _) = return i xrefMapM f (PUSH (RegMemImm x)) = PUSH <$> (RegMemImm <$> f x) xrefMapM f (POP (RegMem x)) = POP <$> (RegMem <$> f x) xrefMapM _ i@(JMP _) = return i xrefMapM _ i@(JCC _ _) = return i xrefMapM _ i@RET = return i xrefMapM _ i@INT3 = return i xrefMap :: (XRef -> XRef) -> Ins -> Ins xrefMap f i = runIdentity $ xrefMapM (return . f) i