From 3d5d85e00f2a81efb62bb17f8e5db63fe5a49a61 Mon Sep 17 00:00:00 2001 From: tomsmeding Date: Mon, 28 Aug 2017 14:34:24 +0200 Subject: Optimisations (and array shift) Array points at data, not size stack alignment cross-platform liblang fast strlen --- BuildIR.hs | 30 +++++------ CodeGen.hs | 10 +++- Main.hs | 2 - X64.hs | 29 ++++++----- X64Optimiser.hs | 63 +++++++++++++++++++++-- liblang.asm | 154 ++++++++++++++++++++++++++++++++++++++++++++++++++++---- 6 files changed, 245 insertions(+), 43 deletions(-) diff --git a/BuildIR.hs b/BuildIR.hs index 5b12dc2..9eb4d62 100644 --- a/BuildIR.hs +++ b/BuildIR.hs @@ -338,20 +338,20 @@ convertExpression (ESubscript arr sub t) nextnext = do subref <- convertExpression sub bl3 switchBlock bl3 offref <- genTemp (refSize subref) - off8ref <- genTemp (refSize subref) elemptr <- genTemp (refSize arrref) + arrszptr <- genTemp (sizeof TInt) arrsz <- genTemp (sizeof TInt) errbl <- gets errorBlock - addIns $ ILoad arrsz arrref + addIns $ IAri ASub arrszptr arrref (Constant (refSize arrref) (fromIntegral $ sizeof TInt)) + addIns $ ILoad arrsz arrszptr bl4 <- newBlockNoSwitch setTerm $ IJcc CUGeq subref arrsz errbl bl4 switchBlock bl4 addIns $ IAri AMul offref subref (Constant (refSize subref) (fromIntegral elemsz)) - addIns $ IAri AAdd off8ref offref (Constant (refSize subref) (fromIntegral $ sizeof TInt)) - addIns $ IAri AAdd elemptr arrref off8ref + addIns $ IAri AAdd elemptr arrref offref ref <- genTemp elemsz addIns $ ILoad ref elemptr setTerm $ IJmp nextnext @@ -375,13 +375,15 @@ convertExpression (ENew t sze) nextnext = do bl2 <- newBlockNoSwitch szref <- convertExpression sze bl2 switchBlock bl2 + ref' <- genTemp (sizeof $ TArr t Nothing) ref <- genTemp (sizeof $ TArr t Nothing) argref' <- genTemp (sizeof TInt) argref <- genTemp (sizeof TInt) addIns $ IAri AMul argref' szref (Constant (sizeof TInt) (fromIntegral $ sizeof t)) addIns $ IAri AAdd argref argref' (Constant (sizeof TInt) (fromIntegral $ sizeof TInt)) - addIns $ ICallr ref "_builtin_malloc" [argref] - addIns $ IStore ref szref + addIns $ ICallr ref' "_builtin_malloc" [argref] + addIns $ IStore ref' szref + addIns $ IAri AAdd ref ref' (Constant (refSize ref') (fromIntegral $ sizeof TInt)) setTerm $ IJmp nextnext return ref @@ -400,20 +402,20 @@ convertAsExpression (AESubscript ae2 expr mrt) valueref nextnext = do bl2 <- newBlockNoSwitch subref <- convertExpression expr bl2 switchBlock bl2 - offref' <- genTemp (sizeof TInt) offref <- genTemp (sizeof TInt) elemptr <- genTemp (sizeof TInt) + arrszptr <- genTemp (sizeof TInt) arrsz <- genTemp (sizeof TInt) errbl <- gets errorBlock - addIns $ ILoad arrsz ae2ref + addIns $ IAri ASub arrszptr ae2ref (Constant (refSize ae2ref) (fromIntegral $ sizeof TInt)) + addIns $ ILoad arrsz arrszptr bl3 <- newBlockNoSwitch setTerm $ IJcc CUGeq subref arrsz errbl bl3 switchBlock bl3 - addIns $ IAri AMul offref' subref (Constant (sizeof TInt) (fromIntegral elemsz)) - addIns $ IAri AAdd offref offref' (Constant (sizeof TInt) (fromIntegral $ sizeof TInt)) + addIns $ IAri AMul offref subref (Constant (refSize subref) (fromIntegral elemsz)) addIns $ IAri AAdd elemptr ae2ref offref addIns $ IStore elemptr valueref setTerm $ IJmp nextnext @@ -434,20 +436,20 @@ convertAsExpression (AESubscript ae2 expr mrt) valueref nextnext = do bl2 <- newBlockNoSwitch eref <- convertExpression expr' bl2 switchBlock bl2 - offref' <- genTemp (sizeof TInt) offref <- genTemp (sizeof TInt) elemptr <- genTemp (sizeof TInt) + arrszptr <- genTemp (sizeof TInt) arrsz <- genTemp (sizeof TInt) errbl <- gets errorBlock - addIns $ ILoad arrsz ref + addIns $ IAri ASub arrszptr ref (Constant (refSize ref) (fromIntegral $ sizeof TInt)) + addIns $ ILoad arrsz arrszptr bl3 <- newBlockNoSwitch setTerm $ IJcc CUGeq eref arrsz errbl bl3 switchBlock bl3 - addIns $ IAri AMul offref' eref (Constant (sizeof TInt) (fromIntegral elemsz)) - addIns $ IAri AAdd offref offref' (Constant (sizeof TInt) (fromIntegral $ sizeof TInt)) + addIns $ IAri AMul offref eref (Constant (sizeof TInt) (fromIntegral elemsz)) addIns $ IAri AAdd elemptr ref offref dstref <- genTemp elemsz addIns $ ILoad dstref elemptr diff --git a/CodeGen.hs b/CodeGen.hs index 8eee371..bf3e477 100644 --- a/CodeGen.hs +++ b/CodeGen.hs @@ -64,6 +64,7 @@ codegen (IRProgram vars funcs) = do x64 <- execCGMonad $ mapM_ codegenFunc funcs -- traceShowM x64 X64.verify x64 + -- traceM $ X64.stringify x64 varcg <- liftM unlines $ mapM codegenVar vars x64opt <- x64Optimise x64 return $ "extern putc, putint, getc, exit, _builtin_malloc, _builtin_outofbounds\n" ++ @@ -109,6 +110,7 @@ codegenFunc (IRFunc _ name al bbs sid) = do traceM $ "ALLOCATION: " ++ show allocation let nsaves = length usedregs + alignoff = if odd nsaves then 8 else 0 allocationXref = flip Map.mapWithKey allocation $ \ref alloc -> case alloc of AllocReg reg -> XReg (fromIntegral $ refSize ref) reg AllocMem -> XMem (fromIntegral $ refSize ref) @@ -117,7 +119,8 @@ codegenFunc (IRFunc _ name al bbs sid) = do allocmap = foldl inserter allocationXref (zip al [0::Int ..]) where inserter m ((t, n), i) = - let offset = fromIntegral spillsz + 8 * nsaves + 8 {- rbp -} + 8 {- ret addr -} + 8 * i + let offset = fromIntegral spillsz + alignoff + 8 * nsaves + + 8 {- rbp -} + 8 {- ret addr -} + 8 * i in Map.insert (Argument (sizeof t) n) (XMem (fromIntegral $ sizeof t) (Just RSP) (0, RAX) Nothing @@ -128,6 +131,7 @@ codegenFunc (IRFunc _ name al bbs sid) = do addIns $ PUSH (xref $ XReg 8 RBP) addIns $ MOV (xref $ XReg 8 RBP) (xref $ XReg 8 RSP) forM_ usedregs $ \reg -> addIns $ PUSH (xref $ XReg 8 reg) + when (odd $ length usedregs) $ addIns $ SUB (xref $ XReg 8 RSP) (xref $ XImm 8) when (spillsz /= 0) $ addIns $ SUB (xref $ XReg 8 RSP) (xref $ XImm $ fromIntegral spillsz) setRegsToRestore usedregs setSpillSize spillsz @@ -291,6 +295,7 @@ codegenIns m (IAri at d s1 s2) = case arithTypeToCondCode at of s1m = mkxref s1 m s2m = mkxref s2 m codegenIns m (ICall n rs) = do + when (odd $ length rs) $ addIns $ SUB (xref $ XReg 8 RSP) (xref $ XImm 8) forM_ (zip (reverse rs) [1::Int ..]) $ \(r, i) -> let sz = fromIntegral $ refSize r src = (mkxref r m) @@ -304,6 +309,7 @@ codegenIns m (ICall n rs) = do when (length rs > 0) $ addIns $ SUB (xref $ XReg 8 RSP) (xref $ XImm (fromIntegral $ 8 * length rs)) addIns $ CALL n when (length rs > 0) $ addIns $ ADD (xref $ XReg 8 RSP) (xref $ XImm (fromIntegral $ 8 * length rs)) + when (odd $ length rs) $ addIns $ ADD (xref $ XReg 8 RSP) (xref $ XImm 8) codegenIns m (ICallr d n rs) = do codegenIns m (ICall n rs) addIns $ mkmov (mkxref d m) (XReg (fromIntegral $ refSize d) RAX) @@ -380,6 +386,7 @@ codegenTerm _ IRet = do spillsz <- gets spillSize when (spillsz /= 0) $ addIns $ ADD (xref $ XReg 8 RSP) (xref $ XImm $ fromIntegral spillsz) usedregs <- gets regsToRestore + when (odd $ length usedregs) $ addIns $ ADD (xref $ XReg 8 RSP) (xref $ XImm 8) forM_ (reverse usedregs) $ \reg -> addIns $ POP (xref $ XReg 8 reg) addIns $ mkmov (XReg 8 RSP) (XReg 8 RBP) addIns $ POP (xref $ XReg 8 RBP) @@ -389,6 +396,7 @@ codegenTerm m (IRetr r) = do spillsz <- gets spillSize when (spillsz /= 0) $ addIns $ ADD (xref $ XReg 8 RSP) (xref $ XImm $ fromIntegral spillsz) usedregs <- gets regsToRestore + when (odd $ length usedregs) $ addIns $ ADD (xref $ XReg 8 RSP) (xref $ XImm 8) forM_ (reverse usedregs) $ \reg -> addIns $ POP (xref $ XReg 8 reg) addIns $ mkmov (XReg 8 RSP) (XReg 8 RBP) addIns $ POP (xref $ XReg 8 RBP) diff --git a/Main.hs b/Main.hs index fb557e8..b8c50e6 100644 --- a/Main.hs +++ b/Main.hs @@ -8,7 +8,6 @@ import Debug.Trace import BuildIR import CodeGen import Defs -import InternStrings import Optimiser import Pretty import ProgramParser @@ -34,7 +33,6 @@ performCompile source = do let eres = return source >>= parseProgram "Parse error" >>= typeCheck "Type error" - -- >>= internStrings "Error interning strings" >>= buildIR "IR building error" >>= optimise "Error while optimising" >>= return . traceShowId diff --git a/X64.hs b/X64.hs index b71012a..26c35dd 100644 --- a/X64.hs +++ b/X64.hs @@ -184,26 +184,31 @@ instance Stringifiable XRef where stringify (XMem _ _ (mult, _) _ _) | not (mult `elem` [0,1,2,4,8]) = error $ "Register multiplier has invalid value " ++ show mult ++ " in XMem" - stringify (XMem sz mr pair lab off) = - let res = intercalate "+" (catMaybes [goR1 mr, goPair pair, goLab lab]) ++ goOff off - in szword sz ++ " " ++ if null res then "[0]" else "[" ++ res ++ "]" + stringify x@(XMem sz _ _ _ _) = szword sz ++ " " ++ stringify_only_xmem_brackets x where szword 1 = "byte" szword 2 = "word" szword 4 = "dword" szword 8 = "qword" szword _ = undefined - goR1 Nothing = Nothing - goR1 (Just r) = Just $ stringify (XReg 8 r) - goPair (0, _) = Nothing - goPair (mult, r) = Just $ show mult ++ "*" ++ stringify (XReg 8 r) - goLab = id - goOff o | o > 0 = '+' : show o - | o < 0 = show o - | otherwise = "" stringify (XImm imm) = show imm +stringify_only_xmem_brackets :: XRef -> String +stringify_only_xmem_brackets (XMem _ mr pair lab off) = + let res = intercalate "+" (catMaybes [goR1 mr, goPair pair, goLab lab]) ++ goOff off + in if null res then "[0]" else "[" ++ res ++ "]" + where + goR1 Nothing = Nothing + goR1 (Just r) = Just $ stringify (XReg 8 r) + goPair (0, _) = Nothing + goPair (mult, r) = Just $ show mult ++ "*" ++ stringify (XReg 8 r) + goLab = id + goOff o | o > 0 = '+' : show o + | o < 0 = show o + | otherwise = "" +stringify_only_xmem_brackets _ = undefined + instance Stringifiable Reg where stringify (Reg x) = stringify x instance Stringifiable Mem where stringify (Mem x) = stringify x instance Stringifiable Imm where stringify (Imm x) = stringify x @@ -235,7 +240,7 @@ instance Stringifiable CondCode where instance Stringifiable Ins where stringify (MOV a b) = "mov " ++ stringify a ++ ", " ++ stringify b stringify (MOVi a b) = "mov " ++ stringify a ++ ", " ++ stringify b - stringify (LEA a b) = "lea " ++ stringify a ++ ", " ++ stringify b + stringify (LEA a (Mem b)) = "lea " ++ stringify a ++ ", " ++ stringify_only_xmem_brackets b stringify (MOVSX a b@(RegMem bx)) = case compare (xrefGetSize bx) 4 of EQ -> "movsxd " ++ stringify a ++ ", " ++ stringify b LT -> "movsx " ++ stringify a ++ ", " ++ stringify b diff --git a/X64Optimiser.hs b/X64Optimiser.hs index fa5d113..195389f 100644 --- a/X64Optimiser.hs +++ b/X64Optimiser.hs @@ -1,6 +1,7 @@ module X64Optimiser(x64Optimise) where import Data.List +import Data.Maybe import Defs import X64 @@ -9,7 +10,10 @@ import X64 x64Optimise :: Asm -> Error Asm x64Optimise asm = return $ - funcopt optCoalesceInstructions $ + funcopt optSimpleInstructions $ + funcopt optDoubleAdd $ + funcopt optMergeRSP $ + funcopt optMergeRSP $ -- #HACK (sometimes needed to eliminate all rsp arithmetic) optUnnecessaryJumps $ funcopt optSimpleInstructions $ asm @@ -35,10 +39,12 @@ optSimpleInstructions (name, inss) = (name, concat $ map goI inss) goI (MOVi (Reg (XReg 8 r)) (Imm (XImm 0))) = [XOR (RegMem (XReg 4 r)) (RegMemImm (XReg 4 r))] goI (MOVi (Reg a) (Imm (XImm 0))) = [XOR (RegMem a) (RegMemImm a)] goI (MOVSX (Reg a) (RegMem b)) | a == b = [] + goI (ADD _ (RegMemImm (XImm 0))) = [] + goI (SUB _ (RegMemImm (XImm 0))) = [] goI ins = [ins] -optCoalesceInstructions :: Func -> Func -optCoalesceInstructions (name, inss) = (name, go inss) +optMergeRSP :: Func -> Func +optMergeRSP (name, inss) = (name, go inss) where go :: [Ins] -> [Ins] go [] = [] @@ -76,3 +82,54 @@ optCoalesceInstructions (name, inss) = (name, go inss) XMem _ _ (_, RSP) _ _ -> Nothing x@(XReg _ _) -> Just x x@(XMem _ _ _ _ _) -> Just x + +optDoubleAdd :: Func -> Func +optDoubleAdd (name, inss) = (name, go inss) + where + go :: [Ins] -> [Ins] + go [] = [] + go (add@(ADD (RegMem xreg@(XReg _ xregReg)) (RegMemImm (XImm _))) : rest) = + let midx = flip findIndex rest $ \ins -> case ins of + ADD (RegMem xreg2@(XReg _ _)) (RegMemImm (XImm _)) | xreg == xreg2 -> True + SUB (RegMem xreg2@(XReg _ _)) (RegMemImm (XImm _)) | xreg == xreg2 -> True + _ -> False + in case midx of + Nothing -> add : go rest + Just idx -> if all (canSkip xregReg) (take idx rest) + then go $ merge add (rest !! idx) : take idx rest ++ drop (idx + 1) rest + else add : go rest + go (ins : rest) = ins : go rest + + canSkip :: Register -> Ins -> Bool + canSkip _ (CALL _) = False + canSkip _ (JMP _) = False + canSkip _ (JCC _ _) = False + canSkip _ RET = False + canSkip reg ins = + isJust $ xrefMapM (\y -> if y `containsReg` reg then Nothing else Just y) ins + + containsReg :: XRef -> Register -> Bool + containsReg (XReg _ r) reg | r == reg = True + containsReg (XMem _ (Just r) _ _ _) reg | r == reg = True + containsReg (XMem _ _ (s, r) _ _) reg | s /= 0 && r == reg = True + containsReg _ _ = False + + merge :: Ins -> Ins -> Ins + merge ins1 ins2 = + let e1 = effectOf ins1 + e2 = effectOf ins2 + dst1 = destOf ins1 + dst2 = destOf ins2 + in if dst1 == dst2 + then ADD (RegMem dst1) (RegMemImm $ XImm $ e1 + e2) + else undefined + + effectOf :: Ins -> Offset + effectOf (ADD _ (RegMemImm (XImm i))) = i + effectOf (SUB _ (RegMemImm (XImm i))) = -i + effectOf _ = undefined + + destOf :: Ins -> XRef + destOf (ADD (RegMem d) _) = d + destOf (SUB (RegMem d) _) = d + destOf _ = undefined diff --git a/liblang.asm b/liblang.asm index 3ae345d..9cd905c 100644 --- a/liblang.asm +++ b/liblang.asm @@ -1,16 +1,77 @@ -SYS_EXIT equ 0x2000001 ;code -SYS_FORK equ 0x2000002 ;-- -SYS_READ equ 0x2000003 ;fd, buf, len -SYS_WRITE equ 0x2000004 ;fd, buf, len -SYS_MMAP equ 0x20000C5 ;addr, len, prot, flags, fd, offset +; TAKEN FROM https://github.com/davidad/asm_concurrency/blob/master/os_dependent_stuff.asm +; syscalls +%ifidn __OUTPUT_FORMAT__,elf64 +; http://lxr.linux.no/linux+v3.13.5/arch/x86/syscalls/syscall_64.tbl + %define SYS_READ 0 + %define SYS_OPEN 2 + %define SYS_WRITE 1 + %define SYS_MMAP 9 + %define SYS_FTRUNCATE 77 + %define SYS_PWRITE 18 + %define SYS_FORK 57 + %define SYS_WAITID 247 + %define SYS_EXIT 60 +%elifidn __OUTPUT_FORMAT__,macho64 +; http://www.opensource.apple.com/source/xnu/xnu-1456.1.26/bsd/kern/syscalls.master + %define SYS_READ 0x2000003 + %define SYS_OPEN 0x2000005 + %define SYS_WRITE 0x2000004 + %define SYS_MMAP 0x20000C5 + %define SYS_FTRUNCATE 0x20000C9 + %define SYS_PWRITE 0x200009A + %define SYS_FORK 0x2000002 + %define SYS_WAITID 0x20000AD + %define SYS_EXIT 0x2000001 +%endif -global start, putc, putint, getc, exit, _builtin_malloc, _builtin_outofbounds +; mmap() and mprotect() flags +%ifidn __OUTPUT_FORMAT__,elf64 +; http://lxr.linux.no/linux+v3.13.5/include/uapi/asm-generic/mman-common.h + %define MAP_SHARED 0x01 + %define MAP_PRIVATE 0x02 + %define MAP_FIXED 0x10 + %define MAP_ANON 0x20 + %define PROT_NONE 0x0 + %define PROT_READ 0x1 + %define PROT_WRITE 0x2 + %define PROT_EXEC 0x4 + %define PROT_SEM 0x8 + %define PROT_GROWSDOWN 0x01000000 + %define PROT_GROWSUP 0x02000000 +%elifidn __OUTPUT_FORMAT__,macho64 +; http://www.opensource.apple.com/source/xnu/xnu-2050.18.24/bsd/sys/mman.h + %define MAP_SHARED 0x001 + %define MAP_PRIVATE 0x002 + %define MAP_FIXED 0x010 + %define MAP_RENAME 0x020 + %define MAP_NORESERVE 0x040 + %define MAP_INHERIT 0x080 + %define MAP_NOEXTEND 0x100 + %define MAP_SEMAPHORE 0x200 + %define MAP_NOCACHE 0x400 + %define MAP_JIT 0x800 + %define MAP_FILE 0x0000 + %define MAP_ANON 0x1000 + %define PROT_NONE 0x0 + %define PROT_READ 0x1 + %define PROT_WRITE 0x2 + %define PROT_EXEC 0x4 +%endif + +%define STDIN_FILENO 0 +%define STDOUT_FILENO 1 +%define STDERR_FILENO 2 + + +global start, _start, putc, putint, getc, exit, _builtin_malloc, _builtin_outofbounds default rel extern main section .text start: +_start: + and rsp, -16 ; make stack 16-byte aligned call main mov rdi, rax mov eax, SYS_EXIT @@ -25,7 +86,7 @@ putc: push rcx push r11 mov eax, SYS_WRITE - mov edi, 1 + mov edi, STDOUT_FILENO lea rsi, [rsp+56] mov edx, edi syscall @@ -91,7 +152,7 @@ getc: push rcx push r11 mov eax, SYS_READ - xor edi, edi + mov edi, STDIN_FILENO mov rsi, rsp mov edx, 1 syscall @@ -115,6 +176,73 @@ exit: syscall jmp $ +strlen: + push rbx + push rcx + push rdx + push rdi + push rsi + mov rsi, [rsp+8] ; string data pointer + mov rcx, [rsi-8] ; array size + xor edx, edx ; string walker + cmp rcx, 16 + jb .short + + movdqa [rsp], xmm1 + movdqa [rsp-16], xmm2 + + mov rax, rcx ; now array size + lea rbx, [rax-16] + pxor xmm2, xmm2 +.lp: + movdqu xmm1, [rsi+rdx] + pcmpistri xmm1, xmm2, 0x08 + js .nullfound + add rdx, 16 + cmp rdx, rbx + jbe .lp + + movdqa xmm2, [rsp-16] + movdqa xmm1, [rsp] + + cmp rdx, rax + je .nonull + + mov rcx, rax + sub rcx, rdx + +.short: + mov al, [rsi+rdx] + inc rdx + test al, al + loopnz .short + jnz .nonull + lea rax, [rdx-1] + pop rsi + pop rdi + pop rdx + pop rcx + pop rbx + ret + +.nullfound: + add rdx, rcx + mov rax, rdx + pop rsi + pop rdi + pop rdx + pop rcx + pop rbx + ret + +.nonull: + mov edi, STDERR_FILENO + lea rsi, [nonull_msg] + mov edx, nonull_msg.len + mov eax, SYS_WRITE + syscall + jmp exit255 + _builtin_malloc: push rdi push rsi @@ -126,12 +254,13 @@ _builtin_malloc: push rcx xor edi, edi mov rsi, [rsp+72] - mov edx, 0x03 - mov r10d, 0x1001 + mov edx, PROT_READ | PROT_WRITE + mov r10d, MAP_ANON | MAP_PRIVATE mov r8d, -1 xor r9d, r9d mov eax, SYS_MMAP syscall + add rax, 8 pop rcx pop r11 pop r10 @@ -143,11 +272,12 @@ _builtin_malloc: ret _builtin_outofbounds: - mov edi, 2 + mov edi, STDERR_FILENO lea rsi, [outofbounds_msg] mov rdx, outofbounds_msg.len mov eax, SYS_WRITE syscall +exit255: mov edi, 255 mov eax, SYS_EXIT syscall @@ -157,3 +287,5 @@ _builtin_outofbounds: section .data outofbounds_msg: db "Runtime Error: Out-of-bounds array access detected", 10 .len: equ $ - outofbounds_msg +nonull_msg: db "Runtime Error: No null byte found in string", 10 +.len: equ $ - nonull_msg -- cgit v1.2.3-54-g00ecf