module Index ( Index, initIndex, indexGetEventsLinear, indexNumEvents, ) where import Chronos hiding (day) import Control.Monad (forM) import Data.ByteString qualified as BS import Data.Char (isDigit) import Data.List (sort, scanl') import Data.Map.Strict (Map) import Data.Map.Strict qualified as Map import Data.Text qualified as T import Data.Vector.Storable qualified as VS import System.Directory import System.FilePath import Text.Read (readMaybe) import Torsor (add, difference) import Debug.Trace import Config (Channel(..)) import Mmap import Util import ZNC data ChanIndex = ChanIndex { ciStartDay :: Day , ciEndDay :: Day , ciCountUntil :: VS.Vector Int -- ^ number of events up to and /including/ this day , ciTotal :: Int } deriving (Show) data Index = Index FilePath (Map Channel ChanIndex) deriving (Show) -- init initIndex :: FilePath -> [Channel] -> IO Index initIndex basedir toimport = do items <- fmap concat . forM (map chanNetwork toimport) $ \nwT -> do let nw = T.unpack nwT forM [ch | Channel nwT' ch <- toimport, nwT' == nwT] $ \chT -> do let ch = T.unpack chT files <- listDirectory (basedir nw ch) days <- fmap sort . forM files $ \fn -> do let path = basedir nw ch fn -- putStrLn $ "Parsing " ++ path ++ " (" ++ show (parseFileName fn) ++ " -> " ++ show (dateToDay (parseFileName fn)) ++ ")" events <- parseLog <$> BS.readFile path return (dateToDay (parseFileName fn), length events) let minday = minimum (map fst days) maxday = maximum (map fst days) ndays = difference maxday minday + 1 -- traceM $ nw ++ "/" ++ ch ++ ": days = " ++ show [(toFileName (dayToDate d), i) | (d, i) <- days] let countScan = VS.fromListN ndays (drop 1 $ scanl' (+) 0 (makeCounts [minday..maxday] days)) let ntotal = sum (map snd days) return (Channel nwT chT ,ChanIndex { ciStartDay = minday , ciEndDay = maxday , ciCountUntil = countScan , ciTotal = ntotal}) return (Index basedir (Map.fromList items)) makeCounts :: [Day] -> [(Day, Int)] -> [Int] makeCounts [] [] = [] makeCounts [] _ = error "makeCounts: more entries than days in range" makeCounts (d:ds) ents@((d',n):ents') | d == d' = n : makeCounts ds ents' | d < d' = 0 : makeCounts ds ents | otherwise = error $ "makeCounts: duplicate entry? " ++ show (d, dayToDate d, d', dayToDate d') makeCounts (_:ds) [] = 0 : makeCounts ds [] -- search -- | Returns proper lazy list of events. Reading the files happens strictly, -- but parsing the events happens lazily. indexGetEventsLinear :: Index -> Channel -> Int -> Int -> IO [(YMDHMS, Event)] indexGetEventsLinear (Index basedir mp) chan@(Channel network channel) from count | from + count < 0 = return [] | from >= ciTotal ci = return [] | otherwise = do let scan = ciCountUntil ci day1idx = binSearch scan from day1 = day1idx `add` ciStartDay ci neventBeforeDay1 | day1idx == 0 = 0 | otherwise = scan VS.! (day1idx - 1) neventInclDay1 = scan VS.! day1idx neventOnDay1 = neventInclDay1 - neventBeforeDay1 off1 = from - neventBeforeDay1 -- day2 is inclusive, off2 is exclusive (day2, off2) | from + count <= neventInclDay1 = (day1, off1 + count) | day1idx == VS.length scan - 1 = (day1, neventOnDay1) | otherwise = let loop day2idx nbefore nseen | nseen + nOnDay2 >= count = (day2idx `add` ciStartDay ci, count - nseen) | day2idx == VS.length scan - 1 = (day2idx `add` ciStartDay ci, nOnDay2) | otherwise = loop (day2idx + 1) (scan VS.! day2idx) (nseen + nOnDay2) where nOnDay2 = scan VS.! day2idx - nbefore in loop (day1idx + 1) (neventBeforeDay1 + neventOnDay1) (neventOnDay1 - off1) -- traceM ("ci = " ++ show ci) -- traceM ("binSearch " ++ show from ++ " =") -- traceM (" " ++ show day1idx) -- traceM ("day1 = " ++ show day1) -- traceM ("off1 = " ++ show off1) -- traceM ("neventOnDay1 = " ++ show neventOnDay1) -- traceM ("count = " ++ show count) -- traceM ("day2 = " ++ show day2) -- traceM ("off2 = " ++ show off2) evs <- forM (zip [day1 .. day2] [day1idx..]) $ \(day, dayidx) -> let neventsOnDay | dayidx == 0 = scan VS.! 0 | otherwise = scan VS.! dayidx - scan VS.! (dayidx - 1) parse | day == day1 = if day1 == day2 then parseLogRange (off1, Just (off2 - off1)) else parseLogRange (off1, Nothing) | day == day2 = parseLogRange (0, Just off2) | otherwise = parseLog Date (Year y) (Month monthm1) (DayOfMonth d) = dayToDate day fixDate = map $ \(tod, ev) -> (YMDHMS (YMD y (fromIntegral monthm1 + 1) (fromIntegral d)) tod ,ev) in if neventsOnDay > 0 then fixDate . parse <$> mapFile (basedir T.unpack network T.unpack channel toFileName (dayToDate day)) else return [] return (concat evs) where ci = mp Map.! chan -- | The vector must be sorted. -- Returns index of the first element x such that needle < x. If there is no -- such element (i.e. needle is greater-equal the last element of vec), returns -- the length of vec. -- -- TODO: proportional binary search binSearch :: VS.Vector Int -> Int -> Int binSearch vec needle | veclen == 0 || vec VS.! (veclen - 1) < needle = veclen | needle < vec VS.! 0 = 0 | otherwise = go 0 (veclen - 1) where veclen = VS.length vec -- Invariant: vec[lo] <= needle < vec[hi] go :: Int -> Int -> Int go lo hi | lo + 1 == hi = hi | otherwise = let mid = lo + (hi - lo) `div` 2 in if vec VS.! mid <= needle then go mid hi else go lo mid -- other methods indexNumEvents :: Index -> Channel -> Int indexNumEvents (Index _ mp) chan = ciTotal (mp Map.! chan) -- utilities parseFileName :: String -> Date parseFileName name | (y, '-' : s1) <- splitAt 4 name , (m, '-' : s2) <- splitAt 2 s1 , (d, ".log") <- splitAt 2 s2 , all isDigit y, all isDigit m, all isDigit d , let y' = read' y ; m' = read' m - 1 ; d' = read' d , 0 <= m', m' < 12 , 1 <= d', d' <= daysInMonth (isLeapYear (Year y')) (Month m') = Date (Year y') (Month m') (DayOfMonth d') | otherwise = error $ "Invalid ZNC log file name: " ++ name where read' :: Read a => String -> a read' s = case readMaybe s of Just r -> r Nothing -> error $ "No read: " ++ show s toFileName :: Date -> String toFileName (Date (Year y) (Month m) (DayOfMonth d)) = pad '0' 4 y ++ '-' : pad '0' 2 (m + 1) ++ '-' : pad '0' 2 d ++ ".log"