module Index ( Index, initIndex, indexGetEventsLinear, indexNumEvents, ) where import Data.Time.Calendar import Control.Monad (forM) import Data.ByteString qualified as BS import Data.ByteString (ByteString) import Data.Char (isDigit) import Data.List (sort, scanl') import Data.Map.Strict (Map) import Data.Map.Strict qualified as Map import Data.Text qualified as T import Data.Vector.Storable qualified as VS import Data.Word (Word32) import System.Directory import System.FilePath import Text.Read (readMaybe) import Cache import Config (Channel(..)) import Mmap import Util import ZNC data ChanIndex = ChanIndex { ciStartDay :: Day , ciEndDay :: Day , ciCountUntil :: VS.Vector Int -- ^ number of events up to and /including/ this day , ciTotal :: Int } deriving (Show) data Index = Index FilePath (Map Channel ChanIndex) (Cache (Channel, YMD) (ByteString, VS.Vector Word32)) -- init initIndex :: FilePath -> [Channel] -> IO Index initIndex basedir toimport = do cache <- cacheNew 100 items <- fmap concat . forM (map chanNetwork toimport) $ \nwT -> do let nw = T.unpack nwT forM [ch | Channel nwT' ch <- toimport, nwT' == nwT] $ \chT -> do let ch = T.unpack chT files <- listDirectory (basedir nw ch) days <- fmap sort . forM files $ \fn -> do let path = basedir nw ch fn -- putStrLn $ "Parsing " ++ path ++ " (" ++ show (parseFileName fn) ++ " -> " ++ show (dateToDay (parseFileName fn)) ++ ")" events <- parseLog <$> BS.readFile path return (uncurry3 fromGregorian (parseFileName fn), length events) let minday = minimum (map fst days) maxday = maximum (map fst days) ndays = fromIntegral @Integer @Int (diffDays maxday minday + 1) -- traceM $ nw ++ "/" ++ ch ++ ": days = " ++ show [(toFileName (dayToDate d), i) | (d, i) <- days] let countScan = VS.fromListN ndays (drop 1 $ scanl' (+) 0 (makeCounts [minday..maxday] days)) let ntotal = sum (map snd days) return (Channel nwT chT ,ChanIndex { ciStartDay = minday , ciEndDay = maxday , ciCountUntil = countScan , ciTotal = ntotal}) return (Index basedir (Map.fromList items) cache) makeCounts :: [Day] -> [(Day, Int)] -> [Int] makeCounts [] [] = [] makeCounts [] _ = error "makeCounts: more entries than days in range" makeCounts (d:ds) ents@((d',n):ents') | d == d' = n : makeCounts ds ents' | d < d' = 0 : makeCounts ds ents | otherwise = error $ "makeCounts: duplicate entry? " ++ show (d, toGregorian d, d', toGregorian d') makeCounts (_:ds) [] = 0 : makeCounts ds [] -- search -- | Returns proper lazy list of events. Reading the files happens strictly, -- but parsing the events happens lazily. indexGetEventsLinear :: Index -> Channel -> Int -> Int -> IO [(YMDHMS, Event)] indexGetEventsLinear (Index basedir mp cache) chan@(Channel network channel) from count | from + count < 0 = return [] | from >= ciTotal ci = return [] | otherwise = do let scan = ciCountUntil ci day1idx = binSearch scan from day1 = day1idx `addDays'` ciStartDay ci neventBeforeDay1 | day1idx == 0 = 0 | otherwise = scan VS.! (day1idx - 1) neventInclDay1 = scan VS.! day1idx neventOnDay1 = neventInclDay1 - neventBeforeDay1 off1 = from - neventBeforeDay1 -- day2 is inclusive, off2 is exclusive (day2, off2) | from + count <= neventInclDay1 = (day1, off1 + count) | day1idx == VS.length scan - 1 = (day1, neventOnDay1) | otherwise = let loop day2idx nbefore nseen | nseen + nOnDay2 >= count = (day2idx `addDays'` ciStartDay ci, count - nseen) | day2idx == VS.length scan - 1 = (day2idx `addDays'` ciStartDay ci, nOnDay2) | otherwise = loop (day2idx + 1) (scan VS.! day2idx) (nseen + nOnDay2) where nOnDay2 = scan VS.! day2idx - nbefore in loop (day1idx + 1) (neventBeforeDay1 + neventOnDay1) (neventOnDay1 - off1) -- traceM ("ci = " ++ show ci) -- traceM ("binSearch " ++ show from ++ " =") -- traceM (" " ++ show day1idx) -- traceM ("day1 = " ++ show day1) -- traceM ("off1 = " ++ show off1) -- traceM ("neventOnDay1 = " ++ show neventOnDay1) -- traceM ("count = " ++ show count) -- traceM ("day2 = " ++ show day2) -- traceM ("off2 = " ++ show off2) evs <- forM (zip [day1 .. day2] [day1idx..]) $ \(day, dayidx) -> let neventsOnDay | dayidx == 0 = scan VS.! 0 | otherwise = scan VS.! dayidx - scan VS.! (dayidx - 1) parse | day == day1 = if day1 == day2 then parseLogRange (off1, Just (off2 - off1)) else parseLogRange (off1, Nothing) | day == day2 = parseLogRange (0, Just off2) | otherwise = \_lineStarts -> parseLog (y, month, d) = toGregorian day ymd = YMD (fromIntegral y) (fromIntegral month) (fromIntegral d) fixDate = map $ \(tod, ev) -> (YMDHMS ymd tod, ev) in if neventsOnDay > 0 then do (bs, lineStarts) <- cacheLookup cache (chan, ymd) >>= \case Nothing -> do bs <- mapFile (basedir T.unpack network T.unpack channel toFileName (toGregorian day)) let lineStarts = preparseLog bs cacheAdd cache (chan, ymd) (bs, lineStarts) return (bs, lineStarts) Just (bs, lineStarts) -> return (bs, lineStarts) return (fixDate (parse lineStarts bs)) else return [] return (concat evs) where ci = mp Map.! chan -- | The vector must be sorted. -- Returns index of the first element x such that needle < x. If there is no -- such element (i.e. needle is greater-equal the last element of vec), returns -- the length of vec. -- -- TODO: proportional binary search binSearch :: VS.Vector Int -> Int -> Int binSearch vec needle | veclen == 0 || vec VS.! (veclen - 1) < needle = veclen | needle < vec VS.! 0 = 0 | otherwise = go 0 (veclen - 1) where veclen = VS.length vec -- Invariant: vec[lo] <= needle < vec[hi] go :: Int -> Int -> Int go lo hi | lo + 1 == hi = hi | otherwise = let mid = lo + (hi - lo) `div` 2 in if vec VS.! mid <= needle then go mid hi else go lo mid -- other methods indexNumEvents :: Index -> Channel -> Int indexNumEvents (Index _ mp _) chan = ciTotal (mp Map.! chan) -- utilities parseFileName :: String -> (Year, MonthOfYear, DayOfMonth) parseFileName name | (y, '-' : s1) <- splitAt 4 name , (m, '-' : s2) <- splitAt 2 s1 , (d, ".log") <- splitAt 2 s2 , all isDigit y, all isDigit m, all isDigit d , let y' = read' y ; m' = read' m ; d' = read' d , 1 <= m', m' <= 12 , 1 <= d', d' <= gregorianMonthLength y' m' = (y', m', d') | otherwise = error $ "Invalid ZNC log file name: " ++ name where read' :: Read a => String -> a read' s = case readMaybe s of Just r -> r Nothing -> error $ "No read: " ++ show s toFileName :: (Year, MonthOfYear, DayOfMonth) -> String toFileName (y, m, d) = pad '0' 4 y ++ '-' : pad '0' 2 m ++ '-' : pad '0' 2 d ++ ".log" addDays' :: Int -> Day -> Day addDays' = addDays . fromIntegral