From db29f6d193988ebef66ecabdead0fd8f2ed3087d Mon Sep 17 00:00:00 2001 From: tomsmeding Date: Tue, 26 Nov 2019 21:24:25 +0100 Subject: Use Sequence-based string in VM; 50% faster lispparser.lisp --- DString.hs | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ VM.hs | 25 +++++++++++---------- lisphs.cabal | 4 ++-- 3 files changed, 86 insertions(+), 14 deletions(-) create mode 100644 DString.hs diff --git a/DString.hs b/DString.hs new file mode 100644 index 0000000..042ed09 --- /dev/null +++ b/DString.hs @@ -0,0 +1,71 @@ +-- | Some kind of dynamic string, or dequeue string, if you like, that +-- supports efficient concatenation on both sides if you don't make your +-- chunks too weird. +module DString ( + DString, + pack, unpack, singleton, length, take, drop, null, head, concat, hPutStr +) where + +import Prelude hiding (length, take, drop, null, head, concat) + +import Data.Foldable (toList) +import qualified Data.Sequence as S +import Data.Sequence (ViewL((:<))) +import qualified Data.Text.Lazy as T +import qualified Data.Text.Lazy.IO as T +import System.IO (Handle) + + +newtype DString = DString (S.Seq T.Text) + deriving (Show) + +instance Eq DString where + DString s == DString s' = T.concat (toList s) == T.concat (toList s') + +pack :: String -> DString +pack s = DString (S.singleton (T.pack s)) + +unpack :: DString -> String +unpack (DString s) = T.unpack (T.concat (toList s)) + +singleton :: Char -> DString +singleton c = DString (S.singleton (T.singleton c)) + +length :: DString -> Int +length (DString s) = fromIntegral (sum (map T.length (toList s))) + +take :: Int -> DString -> DString +take n (DString s) = + let n' = fromIntegral n + lens = S.scanl (\acc t -> acc + T.length t) 0 s + ndigits = S.length (S.takeWhileL (< n') lens) + 1 + text = T.take n' (T.concat (toList (S.take ndigits s))) + in DString (S.singleton text) + +drop :: Int -> DString -> DString +drop n (DString s) + | n <= 0 = DString s + | otherwise = + let n' = fromIntegral n + lens = S.scanl (\acc text -> acc + T.length text) 0 s + ndigits = S.length (S.takeWhileL (< n') lens) - 1 + s' = S.drop ndigits s + remain = n' - (lens `S.index` ndigits) + s'' = case S.viewl s' of + S.EmptyL -> S.empty + text :< rest -> T.drop remain text S.<| rest + in DString s'' + +null :: DString -> Bool +null (DString s) = sum (map T.length (toList s)) == 0 + +head :: DString -> Char +head (DString s) = case S.viewl s of + S.EmptyL -> error "Empty DString in DString.head" + text :< rest -> if T.length text > 0 then T.head text else DString.head (DString rest) + +concat :: [DString] -> DString +concat strs = DString (mconcat [s | DString s <- strs]) + +hPutStr :: Handle -> DString -> IO () +hPutStr h (DString s) = mapM_ (T.hPutStr h) (toList s) diff --git a/VM.hs b/VM.hs index bed0596..7812afd 100644 --- a/VM.hs +++ b/VM.hs @@ -10,6 +10,7 @@ import qualified System.IO.Error as IO -- import Debug.Trace import AST +import qualified DString as DS import Intermediate @@ -33,7 +34,7 @@ data RunValue = RVClosure Name [RunValue] | RVList [RunValue] | RVNum Int - | RVString String + | RVString DS.DString | RVQuoted RunValue | RVName Name deriving Show @@ -142,7 +143,7 @@ vmRunBuiltin state "cons" [val, RVList l] = return (RVList (val : l), state) vmRunBuiltin state "sys-open-file" [RVNum modenum, RVString path] = do let mode = [ReadMode, WriteMode] !! modenum fid = sUniq state - handle <- openFile path mode + handle <- openFile (DS.unpack path) mode return (RVNum fid, state { sHandles = Map.insert fid handle (sHandles state), sUniq = fid + 1 }) vmRunBuiltin state "sys-close-file" [RVNum fid] = do hClose (sHandles state ! fid) @@ -152,9 +153,9 @@ vmRunBuiltin state "sys-get-char" [RVNum fid] = do eof <- hIsEOF h if eof then return (RVList [], state) - else hGetChar h >>= \ch -> return (RVString [ch], state) + else hGetChar h >>= \ch -> return (RVString (DS.singleton ch), state) vmRunBuiltin state "sys-put-string" [RVNum fid, RVString str] = do - hPutStr (sHandles state ! fid) str + DS.hPutStr (sHandles state ! fid) str return (RVList [], state) vmRunBuiltin state "sys-flush" [RVNum fid] = do hFlush (sHandles state ! fid) @@ -162,14 +163,14 @@ vmRunBuiltin state "sys-flush" [RVNum fid] = do vmRunBuiltin state "sys-stdin" [] = return (RVNum (-1), state) vmRunBuiltin state "sys-stdout" [] = return (RVNum (-2), state) vmRunBuiltin state "sys-stderr" [] = return (RVNum (-3), state) -vmRunBuiltin state "length" [RVString str] = return (RVNum (length str), state) +vmRunBuiltin state "length" [RVString str] = return (RVNum (DS.length str), state) vmRunBuiltin state "substr" [RVNum idx, RVNum len, RVString str] = - let s = (if len >= 0 then take len else id) (drop idx str) + let s = (if len >= 0 then DS.take len else id) (DS.drop idx str) in return (RVString s, state) -vmRunBuiltin state "ord" [RVString str] = return (RVNum (case str of { "" -> 0; c:_ -> ord c }), state) -vmRunBuiltin state "chr" [RVNum num] = return (RVString [chr num], state) +vmRunBuiltin state "ord" [RVString str] = return (RVNum (if DS.null str then 0 else ord (DS.head str)), state) +vmRunBuiltin state "chr" [RVNum num] = return (RVString (DS.singleton (chr num)), state) vmRunBuiltin state "concat" values - | Just strings <- sequence (map fromRVString values) = return (RVString (concat strings), state) + | Just strings <- sequence (map fromRVString values) = return (RVString ({-# SCC builtin_string_concat #-} DS.concat strings), state) | otherwise = throw "Non-string arguments to 'concat'" vmRunBuiltin state "type-list?" [value] = return (RVNum (case value of { RVList _ -> 1; _ -> 0 }), state) vmRunBuiltin state "type-number?" [value] = return (RVNum (case value of { RVNum _ -> 1; _ -> 0 }), state) @@ -192,7 +193,7 @@ equalOp (RVName n) (RVName m) = Right (n == m) equalOp _ _ = Right False printshow :: RunValue -> String -printshow (RVString str) = str +printshow (RVString str) = DS.unpack str printshow (RVList values) = "[" ++ intercalate "," (map printshow values) ++ "]" printshow (RVNum i) = show i printshow (RVQuoted value) = '\'' : printshow value @@ -206,7 +207,7 @@ truthy _ = True toRunValue :: Value -> RunValue toRunValue (VList values) = RVList (map toRunValue values) toRunValue (VNum i) = RVNum i -toRunValue (VString s) = RVString s +toRunValue (VString s) = RVString (DS.pack s) toRunValue (VQuoted value) = RVQuoted (toRunValue value) toRunValue (VName name) = RVName name toRunValue _ = undefined @@ -214,6 +215,6 @@ toRunValue _ = undefined throw :: String -> IO a throw s = hPutStrLn stderr s >> IO.ioError (IO.userError kErrorExit) -fromRVString :: RunValue -> Maybe String +fromRVString :: RunValue -> Maybe DS.DString fromRVString (RVString str) = Just str fromRVString _ = Nothing diff --git a/lisphs.cabal b/lisphs.cabal index b054466..640ceee 100644 --- a/lisphs.cabal +++ b/lisphs.cabal @@ -12,5 +12,5 @@ executable lisp default-language: Haskell2010 ghc-options: -Wall -O2 build-depends: base >= 4 && < 5, - containers, filepath, mtl, parsec - other-modules: AST, Compiler, CompilerMacros, Intermediate, Optimiser, Parser, VM + containers, filepath, mtl, parsec, text + other-modules: AST, Compiler, CompilerMacros, DString, Intermediate, Optimiser, Parser, VM -- cgit v1.2.3-54-g00ecf