summaryrefslogtreecommitdiff
path: root/src/Index.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Index.hs')
-rw-r--r--src/Index.hs105
1 files changed, 98 insertions, 7 deletions
diff --git a/src/Index.hs b/src/Index.hs
index 8c605d3..36b2374 100644
--- a/src/Index.hs
+++ b/src/Index.hs
@@ -1,8 +1,11 @@
+{-# LANGUAGE NumericUnderscores #-}
{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ViewPatterns #-}
module Index (
Index,
initIndex,
indexGetEventsLinear,
+ findEventIDLinear,
indexGetEventsDay,
indexNumEvents,
indexCalendar,
@@ -10,20 +13,27 @@ module Index (
import Prelude hiding (foldl') -- exported since GHC 9.10 (base 4.20)
+import Control.Applicative (empty)
import Control.Concurrent
-import Control.Monad (forM, forM_, when)
+import Control.Monad (forM, forM_, when, guard)
+import Control.Monad.Trans.Class (lift)
+import Control.Monad.Trans.Maybe
import Data.ByteString qualified as BS
import Data.ByteString (ByteString)
-import Data.Char (isDigit)
+import Data.Char (isDigit, chr, ord)
+import Data.Functor ((<&>))
import Data.IORef
-import Data.List (sort, scanl', foldl')
+import Data.List (sort, scanl', foldl', minimumBy)
import Data.Map.Strict (Map)
import Data.Map.Strict qualified as Map
import Data.Maybe (catMaybes)
+import Data.Ord (comparing)
import Data.Text qualified as T
+import Data.Text (Text)
import Data.Time
import Data.Vector.Storable qualified as VS
import Data.Word
+import Numeric (showIntAtBase)
import System.Clock qualified as Clock
import System.Directory
import System.FilePath
@@ -64,6 +74,8 @@ data Index = Index !FilePath
!(Map Channel (IORef ChanIndex))
!(Cache (Channel, YMD) (ByteString, VS.Vector Word32))
+type EventID = Text
+
-- init
initIndex :: FilePath -> [Channel] -> IO Index
@@ -195,7 +207,7 @@ indexUpdateImport index@(Index _ mp _) chan = do
-- | Returns proper lazy list of events. Reading the files happens strictly,
-- but parsing the events happens lazily.
-indexGetEventsLinear :: Index -> Channel -> Int -> Int -> IO [(YMDHMS, Event)]
+indexGetEventsLinear :: Index -> Channel -> Int -> Int -> IO [(YMDHMS, EventID, Event)]
indexGetEventsLinear index@(Index _ mp _) chan from count = do
ci <- readIORef (mp Map.! chan)
if from + count < 0 || from >= ciTotal ci
@@ -246,10 +258,12 @@ indexGetEventsLinear index@(Index _ mp _) chan from count = do
| day == day2 = (0, Just off2)
| otherwise = (0, Just neventsOnDay)
ymd = ymdFromGregorian (toGregorian day)
- fixDate = map $ \(tod, ev) -> (YMDHMS ymd tod, ev)
in if neventsOnDay > 0
- then loadDay index chan ymd >>= \case
- Just (bs, lineStarts) -> return (fixDate (parseLogRange range lineStarts bs))
+ then loadDay index chan ymd <&> \case
+ Just (bs, lineStarts) ->
+ let events = parseLogRange range lineStarts bs
+ in zipWith (\(hms, ev) off -> (YMDHMS ymd hms, genEventID (YMDHMS ymd hms) off, ev))
+ events [fst range ..]
Nothing -> error $ "events on day " ++ show (dayToYMD day) ++ " but no file"
else return []
@@ -278,6 +292,30 @@ binSearch vec needle
in if vec IGV.! mid <= needle
then go mid hi else go lo mid
+-- | If ID is found, returns index in the linear list of events for this channel
+findEventIDLinear :: Index -> Channel -> EventID -> IO (Maybe Int)
+findEventIDLinear index@(Index _ mp _) chan eid = runMaybeT $ do
+ (YMDHMS ymd hms, idoff) <- hoistMaybe (parseEventID eid)
+ day <- hoistMaybe (uncurry3 fromGregorianValid (ymdToGregorian ymd))
+
+ ci <- lift $ readIORef (mp Map.! chan)
+ guard (ciStartDay ci <= day && day <= ciEndDay ci)
+
+ (bs, lineStarts) <- MaybeT $ loadDay index chan ymd
+ let candidates =
+ map snd $
+ takeWhile ((== hms) . fst) $
+ dropWhile ((< hms) . fst) $
+ zip (parseLogTimesOnly lineStarts bs) [0..]
+ case candidates of
+ [] -> empty
+ _ -> do
+ let dayidx = fromIntegral @Integer @Int (day `diffDays` ciStartDay ci)
+ eventsBeforeDay | dayidx == 0 = 0
+ | otherwise = ciCountUntil ci IGV.! (dayidx - 1)
+ let dayoff = minimumBy (comparing (\off -> abs (off - idoff))) candidates
+ return (eventsBeforeDay + dayoff)
+
-- other methods
indexNumEvents :: Index -> Channel -> IO Int
@@ -316,6 +354,59 @@ loadDay (Index basedir _ cache) chan@(Channel network channel) ymd = do
Nothing -> return Nothing -- file didn't exist
Just (bs, lineStarts) -> return (Just (bs, lineStarts))
+-- | Takes the index of the event in the day's log (the "offset") in addition
+-- to the timestamp, in order to disambiguate in case there are multiple events
+-- with the same timestamp.
+--
+-- >>> genEventID (YMDHMS (YMD 2026 4 7) (HMS 18 56 55)) 123
+-- "a5d9CtBVX"
+genEventID :: YMDHMS -> Int -> EventID
+genEventID (YMDHMS (YMD y m d) (HMS hh mm ss)) off
+ -- An event ID is a mixed-radix number.
+ -- Components: [offset, year, month, day, hour, minute, second]
+ -- Radix: [ --, 5000, 12, 31, 24, 60, 60]
+ -- Maximal offset is determined by:
+ -- > ceiling (2 ** (64 - logBase 2 (5000 * 12 * 31 * 24 * 60 * 60)))
+ -- 114787088
+ -- to fit the ID number in a Word64.
+ -- Let's round that down converatively to 100_000_000, i.e. 100 million events per day max.
+ --
+ -- The result number is encoded in base62, and an 'a' is prefixed as an ID version identifier.
+ | off >= 100_000_000 = error "Too many events per day"
+ | y >= 5000 = error "You should have better tech at this point"
+ | otherwise =
+ let cast :: Integral a => a -> Word64 ; cast = fromIntegral
+ num = (((((cast off * 5000 + cast y) * 12 + cast (m - 1)) * 31 + cast (d - 1))
+ * 24 + cast hh) * 60 + cast mm) * 60 + cast ss
+ in T.pack ('a' : showIntAtBase 62 base62char num "")
+ where
+ base62char = chr . fromIntegral . (base62alphabet `BS.index`)
+ base62alphabet = BS.pack (map (fromIntegral . ord) (['0'..'9'] ++ ['A'..'Z'] ++ ['a'..'z']))
+
+-- >>> parseEventID "a5d9CtBVX"
+-- Just (YMDHMS (YMD 2026 4 7) (HMS 18 56 55),123)
+parseEventID :: EventID -> Maybe (YMDHMS, Int)
+parseEventID (T.uncons -> Just ('a', eid)) = do
+ num <- multiply <$> mapM (fmap (fromIntegral @Int @Word64) . unbase62char) (T.unpack eid)
+ let (num2, ss) = num `quotRem` 60
+ let (num3, mm) = num2 `quotRem` 60
+ let (num4, hh) = num3 `quotRem` 24
+ let (num5, d') = num4 `quotRem` 31
+ let (num6, m') = num5 `quotRem` 12
+ let (off , y ) = num6 `quotRem` 5000
+ let cast :: Num b => Word64 -> b ; cast = fromIntegral
+ return (YMDHMS (YMD (cast y) (cast m' + 1) (cast d' + 1))
+ (HMS (cast hh) (cast mm) (cast ss))
+ ,cast off)
+ where
+ multiply = sum . map (uncurry (*)) . zip (iterate (*62) 1) . reverse
+ unbase62char c
+ | '0' <= c, c <= '9' = Just (ord c - ord '0')
+ | 'A' <= c, c <= 'Z' = Just (ord c - ord 'A' + 10)
+ | 'a' <= c, c <= 'z' = Just (ord c - ord 'a' + 36)
+ | otherwise = Nothing
+parseEventID _ = Nothing
+
parseFileName :: String -> Maybe YMD
parseFileName name
| (y, '-' : s1) <- splitAt 4 name