From 42b8b5fbcbe02b02878f8f6e2b98aafc713204be Mon Sep 17 00:00:00 2001 From: Tom Smeding Date: Wed, 1 Apr 2026 13:59:42 +0200 Subject: Cache, drop chronos --- TODO.txt | 1 - aratamete-ircbrowse.cabal | 8 +-- src/Cache.hs | 122 +++++++++++++++------------------------------- src/Index.hs | 69 ++++++++++++++------------ src/Mmap.hs | 8 +-- src/Util.hs | 5 +- src/ZNC.hs | 61 ++++++++++++++++++----- 7 files changed, 139 insertions(+), 135 deletions(-) diff --git a/TODO.txt b/TODO.txt index 1f12c0a..f0caf31 100644 --- a/TODO.txt +++ b/TODO.txt @@ -3,6 +3,5 @@ - Implement calendar view - Ensure the server doesn't crash and burn when you `for i in (seq 1 1100); curl -s 'http://localhost:8000/log/fn-haskell?page='$i | head -1 &; end; wait` -- Check if we still need chronos or can drop it to drop aeson (potentially) - Reimplement mustache to drop aeson - Reimplement HTTP server (or switch to a different one) to drop half the universe diff --git a/aratamete-ircbrowse.cabal b/aratamete-ircbrowse.cabal index 2cef3f6..6b5f720 100644 --- a/aratamete-ircbrowse.cabal +++ b/aratamete-ircbrowse.cabal @@ -9,7 +9,7 @@ build-type: Simple executable aratamete-ircbrowse main-is: Main.hs other-modules: - -- Cache + Cache Config Index Mmap @@ -19,15 +19,15 @@ executable aratamete-ircbrowse base >= 4.21, attoparsec, bytestring, - chronos, + clock, containers, directory, filepath, http-types, mustache, - stm, + random, text >= 2.1.2, - torsor, + time, unix, vector, wai, diff --git a/src/Cache.hs b/src/Cache.hs index 53d2185..4694aa0 100644 --- a/src/Cache.hs +++ b/src/Cache.hs @@ -1,87 +1,43 @@ module Cache where -import Control.Concurrent.STM -import Control.Monad (forM_) -import Data.ByteString (ByteString) -import Data.ByteString qualified as BS +import Control.Monad (when) +import Data.IORef import Data.Map.Strict (Map) import Data.Map.Strict qualified as Map - -import ZNC - - --- value, previous (more recently accessed), next (less recently accessed) -data LRUNode a = LRUNode a (TVar (Maybe (LRUNode a))) (TVar (Maybe (LRUNode a))) - --- head (most recent), last (least recent) -data LRUChain a = LRUChain (TVar (Maybe (LRUNode a))) (TVar (Maybe (LRUNode a))) - -lruchainNew :: IO (LRUChain a) -lruchainNew = LRUChain <$> newTVarIO Nothing <*> newTVarIO Nothing - -lruchainAdd :: LRUChain a -> a -> STM (LRUNode a) -lruchainAdd (LRUChain hd tl) x = do - mhdnode <- readTVar hd - case mhdnode of - Nothing -> do -- chain empty - node <- LRUNode x <$> newTVar Nothing <*> newTVar Nothing - writeTVar hd (Just node) - writeTVar tl (Just node) - return node - Just hdnode -> do - node <- LRUNode x <$> newTVar Nothing <*> newTVar (Just hdnode) - writeTVar hd (Just node) - return node - -lruchainBump :: LRUChain a -> LRUNode a -> STM () -lruchainBump (LRUChain hd _) node@(LRUNode _ prevvar nextvar) = do - mprev <- readTVar prevvar - case mprev of - Nothing -> return () -- already most recent - Just (LRUNode _ _ prevnextvar) -> do - -- remove node from chain where it is now - mnext <- readTVar nextvar - writeTVar prevnextvar mnext - case mnext of - Nothing -> return () - Just (LRUNode _ nextprevvar _) -> writeTVar nextprevvar mprev - -- add node to chain at head - writeTVar nextvar =<< readTVar hd - writeTVar hd (Just node) - -lruchainEvict :: LRUChain a -> STM (Maybe a) -lruchainEvict (LRUChain _ tl) = do - mtlnode <- readTVar tl - case mtlnode of - Nothing -> return Nothing - Just (LRUNode x prevvar _) -> do - mprev <- readTVar prevvar - writeTVar tl mprev - case mprev of - Nothing -> return () - Just (LRUNode _ _ prevnextvar) -> writeTVar prevnextvar Nothing - return (Just x) - --- maxsize, elements in cache, linked list -data LRU a = LRU Int (TVar (Map a (LRUNode a))) (LRUChain a) - -lruNew :: Ord a => Int -> IO (LRU a) -lruNew maxsize = LRU maxsize <$> newTVarIO mempty <*> lruchainNew - --- | If a value got evicted, it is returned -lruBump :: Ord a => LRU a -> a -> IO (Maybe a) -lruBump (LRU maxsize mapvar chain) value = atomically $ do - mp <- readTVar mapvar - case Map.lookup value mp of - Nothing -> do - node <- lruchainAdd chain value - writeTVar mapvar $! Map.insert value node mp - if Map.size mp >= maxsize - then do - mret <- lruchainEvict chain - forM_ mret $ \ret -> writeTVar mapvar $! Map.delete ret mp - return mret - else return Nothing - Just node -> do - lruchainBump chain node - return Nothing +import System.Clock qualified as Clock +import System.Random + + +data Cache k v = Cache Int (IORef (Map k (v, Clock.TimeSpec))) (IORef StdGen) + +cacheNew :: Int -> IO (Cache k v) +cacheNew maxsize = Cache maxsize <$> newIORef Map.empty <*> (newIORef =<< initStdGen) + +cacheAdd :: (Ord k, Show k) => Cache k v -> k -> v -> IO () +cacheAdd cache@(Cache maxsize ref _) key val = do + now <- Clock.getTime Clock.Monotonic + sz <- atomicModifyIORef' ref $ \mp -> + case fst (Map.insertLookupWithKey (\_ _ _ -> (val, now)) key (val, now) mp) of + Nothing -> (Map.insert key (val, now) mp, Map.size mp + 1) + Just _ -> (Map.insert key (val, now) mp, 0) + when (sz > maxsize) $ + cachePrune cache + +cacheLookup :: Ord k => Cache k v -> k -> IO (Maybe v) +cacheLookup (Cache _ ref _) key = fmap fst . Map.lookup key <$> readIORef ref + +-- Uses 2-random LRU (https://danluu.com/2choices-eviction/) +cachePrune :: Ord k => Cache k v -> IO () +cachePrune (Cache maxsize ref genref) = do go =<< atomicModifyIORef' genref splitGen + where + go gen0 = do + (gen', sz') <- atomicModifyIORef' ref $ \mp -> + let sz = Map.size mp + (idx1, gen1) = uniformR (0, sz - 1) gen0 ; (_, (_, tm1)) = Map.elemAt idx1 mp + (idx2, gen2) = uniformR (0, sz - 1) gen1 ; (_, (_, tm2)) = Map.elemAt idx2 mp + oldestIdx = if tm1 < tm2 then idx1 else idx2 + in if sz > maxsize + then (Map.deleteAt oldestIdx mp, (gen2, sz - 1)) + else (mp, (gen0, sz)) + when (sz' > maxsize) $ + go gen' diff --git a/src/Index.hs b/src/Index.hs index 4de4c6c..5754e33 100644 --- a/src/Index.hs +++ b/src/Index.hs @@ -5,22 +5,22 @@ module Index ( indexNumEvents, ) where -import Chronos hiding (day) +import Data.Time.Calendar import Control.Monad (forM) import Data.ByteString qualified as BS +import Data.ByteString (ByteString) import Data.Char (isDigit) import Data.List (sort, scanl') import Data.Map.Strict (Map) import Data.Map.Strict qualified as Map import Data.Text qualified as T import Data.Vector.Storable qualified as VS +import Data.Word (Word32) import System.Directory import System.FilePath import Text.Read (readMaybe) -import Torsor (add, difference) - -import Debug.Trace +import Cache import Config (Channel(..)) import Mmap import Util @@ -34,13 +34,14 @@ data ChanIndex = ChanIndex , ciTotal :: Int } deriving (Show) -data Index = Index FilePath (Map Channel ChanIndex) - deriving (Show) +data Index = Index FilePath (Map Channel ChanIndex) (Cache (Channel, YMD) (ByteString, VS.Vector Word32)) -- init initIndex :: FilePath -> [Channel] -> IO Index initIndex basedir toimport = do + cache <- cacheNew 100 + items <- fmap concat . forM (map chanNetwork toimport) $ \nwT -> do let nw = T.unpack nwT @@ -51,10 +52,10 @@ initIndex basedir toimport = do let path = basedir nw ch fn -- putStrLn $ "Parsing " ++ path ++ " (" ++ show (parseFileName fn) ++ " -> " ++ show (dateToDay (parseFileName fn)) ++ ")" events <- parseLog <$> BS.readFile path - return (dateToDay (parseFileName fn), length events) + return (uncurry3 fromGregorian (parseFileName fn), length events) let minday = minimum (map fst days) maxday = maximum (map fst days) - ndays = difference maxday minday + 1 + ndays = fromIntegral @Integer @Int (diffDays maxday minday + 1) -- traceM $ nw ++ "/" ++ ch ++ ": days = " ++ show [(toFileName (dayToDate d), i) | (d, i) <- days] let countScan = VS.fromListN ndays (drop 1 $ scanl' (+) 0 (makeCounts [minday..maxday] days)) let ntotal = sum (map snd days) @@ -64,7 +65,7 @@ initIndex basedir toimport = do , ciEndDay = maxday , ciCountUntil = countScan , ciTotal = ntotal}) - return (Index basedir (Map.fromList items)) + return (Index basedir (Map.fromList items) cache) makeCounts :: [Day] -> [(Day, Int)] -> [Int] makeCounts [] [] = [] @@ -72,7 +73,7 @@ makeCounts [] _ = error "makeCounts: more entries than days in range" makeCounts (d:ds) ents@((d',n):ents') | d == d' = n : makeCounts ds ents' | d < d' = 0 : makeCounts ds ents - | otherwise = error $ "makeCounts: duplicate entry? " ++ show (d, dayToDate d, d', dayToDate d') + | otherwise = error $ "makeCounts: duplicate entry? " ++ show (d, toGregorian d, d', toGregorian d') makeCounts (_:ds) [] = 0 : makeCounts ds [] -- search @@ -80,13 +81,13 @@ makeCounts (_:ds) [] = 0 : makeCounts ds [] -- | Returns proper lazy list of events. Reading the files happens strictly, -- but parsing the events happens lazily. indexGetEventsLinear :: Index -> Channel -> Int -> Int -> IO [(YMDHMS, Event)] -indexGetEventsLinear (Index basedir mp) chan@(Channel network channel) from count +indexGetEventsLinear (Index basedir mp cache) chan@(Channel network channel) from count | from + count < 0 = return [] | from >= ciTotal ci = return [] | otherwise = do let scan = ciCountUntil ci day1idx = binSearch scan from - day1 = day1idx `add` ciStartDay ci + day1 = day1idx `addDays'` ciStartDay ci neventBeforeDay1 | day1idx == 0 = 0 | otherwise = scan VS.! (day1idx - 1) neventInclDay1 = scan VS.! day1idx @@ -99,9 +100,9 @@ indexGetEventsLinear (Index basedir mp) chan@(Channel network channel) from coun | otherwise = let loop day2idx nbefore nseen | nseen + nOnDay2 >= count = - (day2idx `add` ciStartDay ci, count - nseen) + (day2idx `addDays'` ciStartDay ci, count - nseen) | day2idx == VS.length scan - 1 = - (day2idx `add` ciStartDay ci, nOnDay2) + (day2idx `addDays'` ciStartDay ci, nOnDay2) | otherwise = loop (day2idx + 1) (scan VS.! day2idx) (nseen + nOnDay2) where @@ -126,14 +127,19 @@ indexGetEventsLinear (Index basedir mp) chan@(Channel network channel) from coun then parseLogRange (off1, Just (off2 - off1)) else parseLogRange (off1, Nothing) | day == day2 = parseLogRange (0, Just off2) - | otherwise = parseLog - Date (Year y) (Month monthm1) (DayOfMonth d) = dayToDate day - fixDate = map $ \(tod, ev) -> - (YMDHMS (YMD y (fromIntegral monthm1 + 1) (fromIntegral d)) tod - ,ev) + | otherwise = \_lineStarts -> parseLog + (y, month, d) = toGregorian day + ymd = YMD (fromIntegral y) (fromIntegral month) (fromIntegral d) + fixDate = map $ \(tod, ev) -> (YMDHMS ymd tod, ev) in if neventsOnDay > 0 - then fixDate . parse <$> - mapFile (basedir T.unpack network T.unpack channel toFileName (dayToDate day)) + then do (bs, lineStarts) <- cacheLookup cache (chan, ymd) >>= \case + Nothing -> do + bs <- mapFile (basedir T.unpack network T.unpack channel toFileName (toGregorian day)) + let lineStarts = preparseLog bs + cacheAdd cache (chan, ymd) (bs, lineStarts) + return (bs, lineStarts) + Just (bs, lineStarts) -> return (bs, lineStarts) + return (fixDate (parse lineStarts bs)) else return [] return (concat evs) @@ -166,20 +172,20 @@ binSearch vec needle -- other methods indexNumEvents :: Index -> Channel -> Int -indexNumEvents (Index _ mp) chan = ciTotal (mp Map.! chan) +indexNumEvents (Index _ mp _) chan = ciTotal (mp Map.! chan) -- utilities -parseFileName :: String -> Date +parseFileName :: String -> (Year, MonthOfYear, DayOfMonth) parseFileName name | (y, '-' : s1) <- splitAt 4 name , (m, '-' : s2) <- splitAt 2 s1 , (d, ".log") <- splitAt 2 s2 , all isDigit y, all isDigit m, all isDigit d - , let y' = read' y ; m' = read' m - 1 ; d' = read' d - , 0 <= m', m' < 12 - , 1 <= d', d' <= daysInMonth (isLeapYear (Year y')) (Month m') - = Date (Year y') (Month m') (DayOfMonth d') + , let y' = read' y ; m' = read' m ; d' = read' d + , 1 <= m', m' <= 12 + , 1 <= d', d' <= gregorianMonthLength y' m' + = (y', m', d') | otherwise = error $ "Invalid ZNC log file name: " ++ name where @@ -188,6 +194,9 @@ parseFileName name Just r -> r Nothing -> error $ "No read: " ++ show s -toFileName :: Date -> String -toFileName (Date (Year y) (Month m) (DayOfMonth d)) = - pad '0' 4 y ++ '-' : pad '0' 2 (m + 1) ++ '-' : pad '0' 2 d ++ ".log" +toFileName :: (Year, MonthOfYear, DayOfMonth) -> String +toFileName (y, m, d) = + pad '0' 4 y ++ '-' : pad '0' 2 m ++ '-' : pad '0' 2 d ++ ".log" + +addDays' :: Int -> Day -> Day +addDays' = addDays . fromIntegral diff --git a/src/Mmap.hs b/src/Mmap.hs index bfe6042..94f5c49 100644 --- a/src/Mmap.hs +++ b/src/Mmap.hs @@ -45,6 +45,8 @@ mapFile path = mask_ $ do -- fail (and no exceptions are coming from outside as we're masked) if addr == nullPtr then fail "mapFile: could not mmap" - else BS.unsafePackCStringFinalizer addr (fromIntegral @CSize @Int filelen) - (do -- putStrLn ("[munmap " ++ show addr ++ "]") - c_munmap addr filelen) + else do bs <- BS.unsafePackCStringFinalizer addr (fromIntegral @CSize @Int filelen) + (do -- putStrLn ("[munmap " ++ show addr ++ "]") + c_munmap addr filelen) + -- putStrLn ("[mmap " ++ show addr ++ "]") + return bs diff --git a/src/Util.hs b/src/Util.hs index ca31258..1e10eec 100644 --- a/src/Util.hs +++ b/src/Util.hs @@ -17,7 +17,7 @@ data YMDHMS = YMDHMS {-# UNPACK #-} !YMD data YMD = YMD {-# UNPACK #-} !Int {-# UNPACK #-} !Word8 -- ^ 1-based {-# UNPACK #-} !Word8 - deriving (Show) + deriving (Show, Eq, Ord) -- | Time-of-day in seconds, in unspecified time zone data HMS = HMS {-# UNPACK #-} !Word8 @@ -29,3 +29,6 @@ pad :: Show a => Char -> Int -> a -> String pad c w val = let s = show val in replicate (w - length s) c ++ s + +uncurry3 :: (a -> b -> c -> d) -> (a, b, c) -> d +uncurry3 f (x, y, z) = f x y z diff --git a/src/ZNC.hs b/src/ZNC.hs index 35eafe0..c23ffe2 100644 --- a/src/ZNC.hs +++ b/src/ZNC.hs @@ -2,6 +2,7 @@ module ZNC ( -- Log(..), Nick, Event(..), + preparseLog, parseLog, parseLogRange, ) where @@ -14,15 +15,13 @@ import Data.Char (ord) import Data.Either (fromRight) import Data.Text (Text) import Data.Text.Encoding qualified as TE --- import Data.Vector qualified as V --- import Data.Vector (Vector) -import Data.Word (Word8) +import Data.Vector.Storable qualified as VS +import Data.Word (Word8, Word32) -import Util +import Debug.Trace +import Util --- newtype Log = Log (Vector (TOD, Event)) --- deriving (Show) type Nick = Text @@ -41,20 +40,56 @@ data Event | ParseError deriving (Show) +preparseLog :: ByteString -> VS.Vector Word32 +preparseLog = VS.fromList . findLineStarts 0 + where + findLineStarts :: Int -> ByteString -> [Word32] + findLineStarts off bs = + case BS.findIndex (== 10) (BS.drop off bs) of + Nothing | BS.length bs == off -> [] + | otherwise -> [fromIntegral off] + Just i -> fromIntegral off : findLineStarts (off + i + 1) bs + -- these INLINE/NOINLINE pragmas are optimisation without testing or profiling, have fun {-# INLINE parseLog #-} parseLog :: ByteString -> [(HMS, Event)] -parseLog = parseLogRange (0, Nothing) +parseLog = map parseLogLine . BS8.lines -- (start line, number of lines (default to rest of file)) {-# INLINE parseLogRange #-} -parseLogRange :: (Int, Maybe Int) -> ByteString -> [(HMS, Event)] -parseLogRange (startln, mnumln) = - -- Log . V.fromList . - map go . maybe id take mnumln . drop startln . BS8.lines +parseLogRange :: (Int, Maybe Int) -> VS.Vector Word32 -> ByteString -> [(HMS, Event)] +parseLogRange (startln, mnumln) linestarts topbs = + let numln = maybe (VS.length linestarts - startln) id mnumln + splitted = splitWithLineStarts 0 (VS.slice startln numln linestarts) topbs + in -- traceShow ("pLR"::String, splitted) $ + map parseLogLine splitted where - {-# NOINLINE go #-} - go = fromRight (HMS 0 0 0, ParseError) . P.parseOnly parseLine + {-# INLINE splitWithLineStarts #-} + splitWithLineStarts :: Int -> VS.Vector Word32 -> ByteString -> [ByteString] + splitWithLineStarts idx starts bs + | idx >= VS.length starts = [] + | idx == VS.length starts - 1 = + [BS.takeWhile (\b -> b /= 13 && b /= 10) (BS.drop (at idx) bs)] + | otherwise = + trimCR (BS.drop (at idx) (BS.take (at (idx + 1) - 1) bs)) + : splitWithLineStarts (idx + 1) starts bs + where + at i = fromIntegral @Word32 @Int (starts VS.! i) + + trimCR :: ByteString -> ByteString + trimCR bs = case BS.unsnoc bs of + Just (bs', c) | c == 13 -> bs' + _ -> bs + +parseLogLine :: ByteString -> (HMS, Event) +parseLogLine bs = + case parseLogLine' bs of + res@(HMS 0 0 0, ParseError) -> traceShow ("PE" :: String, bs) res + res -> res + +{-# NOINLINE parseLogLine' #-} +parseLogLine' :: ByteString -> (HMS, Event) +parseLogLine' = fromRight (HMS 0 0 0, ParseError) . P.parseOnly parseLine parseLine :: P.Parser (HMS, Event) parseLine = (,) <$> parseTOD <*> parseEvent -- cgit v1.3