From 42b8b5fbcbe02b02878f8f6e2b98aafc713204be Mon Sep 17 00:00:00 2001 From: Tom Smeding Date: Wed, 1 Apr 2026 13:59:42 +0200 Subject: Cache, drop chronos --- src/Index.hs | 69 ++++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 39 insertions(+), 30 deletions(-) (limited to 'src/Index.hs') diff --git a/src/Index.hs b/src/Index.hs index 4de4c6c..5754e33 100644 --- a/src/Index.hs +++ b/src/Index.hs @@ -5,22 +5,22 @@ module Index ( indexNumEvents, ) where -import Chronos hiding (day) +import Data.Time.Calendar import Control.Monad (forM) import Data.ByteString qualified as BS +import Data.ByteString (ByteString) import Data.Char (isDigit) import Data.List (sort, scanl') import Data.Map.Strict (Map) import Data.Map.Strict qualified as Map import Data.Text qualified as T import Data.Vector.Storable qualified as VS +import Data.Word (Word32) import System.Directory import System.FilePath import Text.Read (readMaybe) -import Torsor (add, difference) - -import Debug.Trace +import Cache import Config (Channel(..)) import Mmap import Util @@ -34,13 +34,14 @@ data ChanIndex = ChanIndex , ciTotal :: Int } deriving (Show) -data Index = Index FilePath (Map Channel ChanIndex) - deriving (Show) +data Index = Index FilePath (Map Channel ChanIndex) (Cache (Channel, YMD) (ByteString, VS.Vector Word32)) -- init initIndex :: FilePath -> [Channel] -> IO Index initIndex basedir toimport = do + cache <- cacheNew 100 + items <- fmap concat . forM (map chanNetwork toimport) $ \nwT -> do let nw = T.unpack nwT @@ -51,10 +52,10 @@ initIndex basedir toimport = do let path = basedir nw ch fn -- putStrLn $ "Parsing " ++ path ++ " (" ++ show (parseFileName fn) ++ " -> " ++ show (dateToDay (parseFileName fn)) ++ ")" events <- parseLog <$> BS.readFile path - return (dateToDay (parseFileName fn), length events) + return (uncurry3 fromGregorian (parseFileName fn), length events) let minday = minimum (map fst days) maxday = maximum (map fst days) - ndays = difference maxday minday + 1 + ndays = fromIntegral @Integer @Int (diffDays maxday minday + 1) -- traceM $ nw ++ "/" ++ ch ++ ": days = " ++ show [(toFileName (dayToDate d), i) | (d, i) <- days] let countScan = VS.fromListN ndays (drop 1 $ scanl' (+) 0 (makeCounts [minday..maxday] days)) let ntotal = sum (map snd days) @@ -64,7 +65,7 @@ initIndex basedir toimport = do , ciEndDay = maxday , ciCountUntil = countScan , ciTotal = ntotal}) - return (Index basedir (Map.fromList items)) + return (Index basedir (Map.fromList items) cache) makeCounts :: [Day] -> [(Day, Int)] -> [Int] makeCounts [] [] = [] @@ -72,7 +73,7 @@ makeCounts [] _ = error "makeCounts: more entries than days in range" makeCounts (d:ds) ents@((d',n):ents') | d == d' = n : makeCounts ds ents' | d < d' = 0 : makeCounts ds ents - | otherwise = error $ "makeCounts: duplicate entry? " ++ show (d, dayToDate d, d', dayToDate d') + | otherwise = error $ "makeCounts: duplicate entry? " ++ show (d, toGregorian d, d', toGregorian d') makeCounts (_:ds) [] = 0 : makeCounts ds [] -- search @@ -80,13 +81,13 @@ makeCounts (_:ds) [] = 0 : makeCounts ds [] -- | Returns proper lazy list of events. Reading the files happens strictly, -- but parsing the events happens lazily. indexGetEventsLinear :: Index -> Channel -> Int -> Int -> IO [(YMDHMS, Event)] -indexGetEventsLinear (Index basedir mp) chan@(Channel network channel) from count +indexGetEventsLinear (Index basedir mp cache) chan@(Channel network channel) from count | from + count < 0 = return [] | from >= ciTotal ci = return [] | otherwise = do let scan = ciCountUntil ci day1idx = binSearch scan from - day1 = day1idx `add` ciStartDay ci + day1 = day1idx `addDays'` ciStartDay ci neventBeforeDay1 | day1idx == 0 = 0 | otherwise = scan VS.! (day1idx - 1) neventInclDay1 = scan VS.! day1idx @@ -99,9 +100,9 @@ indexGetEventsLinear (Index basedir mp) chan@(Channel network channel) from coun | otherwise = let loop day2idx nbefore nseen | nseen + nOnDay2 >= count = - (day2idx `add` ciStartDay ci, count - nseen) + (day2idx `addDays'` ciStartDay ci, count - nseen) | day2idx == VS.length scan - 1 = - (day2idx `add` ciStartDay ci, nOnDay2) + (day2idx `addDays'` ciStartDay ci, nOnDay2) | otherwise = loop (day2idx + 1) (scan VS.! day2idx) (nseen + nOnDay2) where @@ -126,14 +127,19 @@ indexGetEventsLinear (Index basedir mp) chan@(Channel network channel) from coun then parseLogRange (off1, Just (off2 - off1)) else parseLogRange (off1, Nothing) | day == day2 = parseLogRange (0, Just off2) - | otherwise = parseLog - Date (Year y) (Month monthm1) (DayOfMonth d) = dayToDate day - fixDate = map $ \(tod, ev) -> - (YMDHMS (YMD y (fromIntegral monthm1 + 1) (fromIntegral d)) tod - ,ev) + | otherwise = \_lineStarts -> parseLog + (y, month, d) = toGregorian day + ymd = YMD (fromIntegral y) (fromIntegral month) (fromIntegral d) + fixDate = map $ \(tod, ev) -> (YMDHMS ymd tod, ev) in if neventsOnDay > 0 - then fixDate . parse <$> - mapFile (basedir T.unpack network T.unpack channel toFileName (dayToDate day)) + then do (bs, lineStarts) <- cacheLookup cache (chan, ymd) >>= \case + Nothing -> do + bs <- mapFile (basedir T.unpack network T.unpack channel toFileName (toGregorian day)) + let lineStarts = preparseLog bs + cacheAdd cache (chan, ymd) (bs, lineStarts) + return (bs, lineStarts) + Just (bs, lineStarts) -> return (bs, lineStarts) + return (fixDate (parse lineStarts bs)) else return [] return (concat evs) @@ -166,20 +172,20 @@ binSearch vec needle -- other methods indexNumEvents :: Index -> Channel -> Int -indexNumEvents (Index _ mp) chan = ciTotal (mp Map.! chan) +indexNumEvents (Index _ mp _) chan = ciTotal (mp Map.! chan) -- utilities -parseFileName :: String -> Date +parseFileName :: String -> (Year, MonthOfYear, DayOfMonth) parseFileName name | (y, '-' : s1) <- splitAt 4 name , (m, '-' : s2) <- splitAt 2 s1 , (d, ".log") <- splitAt 2 s2 , all isDigit y, all isDigit m, all isDigit d - , let y' = read' y ; m' = read' m - 1 ; d' = read' d - , 0 <= m', m' < 12 - , 1 <= d', d' <= daysInMonth (isLeapYear (Year y')) (Month m') - = Date (Year y') (Month m') (DayOfMonth d') + , let y' = read' y ; m' = read' m ; d' = read' d + , 1 <= m', m' <= 12 + , 1 <= d', d' <= gregorianMonthLength y' m' + = (y', m', d') | otherwise = error $ "Invalid ZNC log file name: " ++ name where @@ -188,6 +194,9 @@ parseFileName name Just r -> r Nothing -> error $ "No read: " ++ show s -toFileName :: Date -> String -toFileName (Date (Year y) (Month m) (DayOfMonth d)) = - pad '0' 4 y ++ '-' : pad '0' 2 (m + 1) ++ '-' : pad '0' 2 d ++ ".log" +toFileName :: (Year, MonthOfYear, DayOfMonth) -> String +toFileName (y, m, d) = + pad '0' 4 y ++ '-' : pad '0' 2 m ++ '-' : pad '0' 2 d ++ ".log" + +addDays' :: Int -> Day -> Day +addDays' = addDays . fromIntegral -- cgit v1.3