diff options
Diffstat (limited to 'src/Index.hs')
| -rw-r--r-- | src/Index.hs | 105 |
1 files changed, 98 insertions, 7 deletions
diff --git a/src/Index.hs b/src/Index.hs index 8c605d3..36b2374 100644 --- a/src/Index.hs +++ b/src/Index.hs @@ -1,8 +1,11 @@ +{-# LANGUAGE NumericUnderscores #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} module Index ( Index, initIndex, indexGetEventsLinear, + findEventIDLinear, indexGetEventsDay, indexNumEvents, indexCalendar, @@ -10,20 +13,27 @@ module Index ( import Prelude hiding (foldl') -- exported since GHC 9.10 (base 4.20) +import Control.Applicative (empty) import Control.Concurrent -import Control.Monad (forM, forM_, when) +import Control.Monad (forM, forM_, when, guard) +import Control.Monad.Trans.Class (lift) +import Control.Monad.Trans.Maybe import Data.ByteString qualified as BS import Data.ByteString (ByteString) -import Data.Char (isDigit) +import Data.Char (isDigit, chr, ord) +import Data.Functor ((<&>)) import Data.IORef -import Data.List (sort, scanl', foldl') +import Data.List (sort, scanl', foldl', minimumBy) import Data.Map.Strict (Map) import Data.Map.Strict qualified as Map import Data.Maybe (catMaybes) +import Data.Ord (comparing) import Data.Text qualified as T +import Data.Text (Text) import Data.Time import Data.Vector.Storable qualified as VS import Data.Word +import Numeric (showIntAtBase) import System.Clock qualified as Clock import System.Directory import System.FilePath @@ -64,6 +74,8 @@ data Index = Index !FilePath !(Map Channel (IORef ChanIndex)) !(Cache (Channel, YMD) (ByteString, VS.Vector Word32)) +type EventID = Text + -- init initIndex :: FilePath -> [Channel] -> IO Index @@ -195,7 +207,7 @@ indexUpdateImport index@(Index _ mp _) chan = do -- | Returns proper lazy list of events. Reading the files happens strictly, -- but parsing the events happens lazily. -indexGetEventsLinear :: Index -> Channel -> Int -> Int -> IO [(YMDHMS, Event)] +indexGetEventsLinear :: Index -> Channel -> Int -> Int -> IO [(YMDHMS, EventID, Event)] indexGetEventsLinear index@(Index _ mp _) chan from count = do ci <- readIORef (mp Map.! chan) if from + count < 0 || from >= ciTotal ci @@ -246,10 +258,12 @@ indexGetEventsLinear index@(Index _ mp _) chan from count = do | day == day2 = (0, Just off2) | otherwise = (0, Just neventsOnDay) ymd = ymdFromGregorian (toGregorian day) - fixDate = map $ \(tod, ev) -> (YMDHMS ymd tod, ev) in if neventsOnDay > 0 - then loadDay index chan ymd >>= \case - Just (bs, lineStarts) -> return (fixDate (parseLogRange range lineStarts bs)) + then loadDay index chan ymd <&> \case + Just (bs, lineStarts) -> + let events = parseLogRange range lineStarts bs + in zipWith (\(hms, ev) off -> (YMDHMS ymd hms, genEventID (YMDHMS ymd hms) off, ev)) + events [fst range ..] Nothing -> error $ "events on day " ++ show (dayToYMD day) ++ " but no file" else return [] @@ -278,6 +292,30 @@ binSearch vec needle in if vec IGV.! mid <= needle then go mid hi else go lo mid +-- | If ID is found, returns index in the linear list of events for this channel +findEventIDLinear :: Index -> Channel -> EventID -> IO (Maybe Int) +findEventIDLinear index@(Index _ mp _) chan eid = runMaybeT $ do + (YMDHMS ymd hms, idoff) <- hoistMaybe (parseEventID eid) + day <- hoistMaybe (uncurry3 fromGregorianValid (ymdToGregorian ymd)) + + ci <- lift $ readIORef (mp Map.! chan) + guard (ciStartDay ci <= day && day <= ciEndDay ci) + + (bs, lineStarts) <- MaybeT $ loadDay index chan ymd + let candidates = + map snd $ + takeWhile ((== hms) . fst) $ + dropWhile ((< hms) . fst) $ + zip (parseLogTimesOnly lineStarts bs) [0..] + case candidates of + [] -> empty + _ -> do + let dayidx = fromIntegral @Integer @Int (day `diffDays` ciStartDay ci) + eventsBeforeDay | dayidx == 0 = 0 + | otherwise = ciCountUntil ci IGV.! (dayidx - 1) + let dayoff = minimumBy (comparing (\off -> abs (off - idoff))) candidates + return (eventsBeforeDay + dayoff) + -- other methods indexNumEvents :: Index -> Channel -> IO Int @@ -316,6 +354,59 @@ loadDay (Index basedir _ cache) chan@(Channel network channel) ymd = do Nothing -> return Nothing -- file didn't exist Just (bs, lineStarts) -> return (Just (bs, lineStarts)) +-- | Takes the index of the event in the day's log (the "offset") in addition +-- to the timestamp, in order to disambiguate in case there are multiple events +-- with the same timestamp. +-- +-- >>> genEventID (YMDHMS (YMD 2026 4 7) (HMS 18 56 55)) 123 +-- "a5d9CtBVX" +genEventID :: YMDHMS -> Int -> EventID +genEventID (YMDHMS (YMD y m d) (HMS hh mm ss)) off + -- An event ID is a mixed-radix number. + -- Components: [offset, year, month, day, hour, minute, second] + -- Radix: [ --, 5000, 12, 31, 24, 60, 60] + -- Maximal offset is determined by: + -- > ceiling (2 ** (64 - logBase 2 (5000 * 12 * 31 * 24 * 60 * 60))) + -- 114787088 + -- to fit the ID number in a Word64. + -- Let's round that down converatively to 100_000_000, i.e. 100 million events per day max. + -- + -- The result number is encoded in base62, and an 'a' is prefixed as an ID version identifier. + | off >= 100_000_000 = error "Too many events per day" + | y >= 5000 = error "You should have better tech at this point" + | otherwise = + let cast :: Integral a => a -> Word64 ; cast = fromIntegral + num = (((((cast off * 5000 + cast y) * 12 + cast (m - 1)) * 31 + cast (d - 1)) + * 24 + cast hh) * 60 + cast mm) * 60 + cast ss + in T.pack ('a' : showIntAtBase 62 base62char num "") + where + base62char = chr . fromIntegral . (base62alphabet `BS.index`) + base62alphabet = BS.pack (map (fromIntegral . ord) (['0'..'9'] ++ ['A'..'Z'] ++ ['a'..'z'])) + +-- >>> parseEventID "a5d9CtBVX" +-- Just (YMDHMS (YMD 2026 4 7) (HMS 18 56 55),123) +parseEventID :: EventID -> Maybe (YMDHMS, Int) +parseEventID (T.uncons -> Just ('a', eid)) = do + num <- multiply <$> mapM (fmap (fromIntegral @Int @Word64) . unbase62char) (T.unpack eid) + let (num2, ss) = num `quotRem` 60 + let (num3, mm) = num2 `quotRem` 60 + let (num4, hh) = num3 `quotRem` 24 + let (num5, d') = num4 `quotRem` 31 + let (num6, m') = num5 `quotRem` 12 + let (off , y ) = num6 `quotRem` 5000 + let cast :: Num b => Word64 -> b ; cast = fromIntegral + return (YMDHMS (YMD (cast y) (cast m' + 1) (cast d' + 1)) + (HMS (cast hh) (cast mm) (cast ss)) + ,cast off) + where + multiply = sum . map (uncurry (*)) . zip (iterate (*62) 1) . reverse + unbase62char c + | '0' <= c, c <= '9' = Just (ord c - ord '0') + | 'A' <= c, c <= 'Z' = Just (ord c - ord 'A' + 10) + | 'a' <= c, c <= 'z' = Just (ord c - ord 'a' + 36) + | otherwise = Nothing +parseEventID _ = Nothing + parseFileName :: String -> Maybe YMD parseFileName name | (y, '-' : s1) <- splitAt 4 name |
