diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/Index.hs | 105 | ||||
| -rw-r--r-- | src/Main.hs | 85 | ||||
| -rw-r--r-- | src/Util.hs | 2 | ||||
| -rw-r--r-- | src/ZNC.hs | 47 |
4 files changed, 173 insertions, 66 deletions
diff --git a/src/Index.hs b/src/Index.hs index 8c605d3..36b2374 100644 --- a/src/Index.hs +++ b/src/Index.hs @@ -1,8 +1,11 @@ +{-# LANGUAGE NumericUnderscores #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} module Index ( Index, initIndex, indexGetEventsLinear, + findEventIDLinear, indexGetEventsDay, indexNumEvents, indexCalendar, @@ -10,20 +13,27 @@ module Index ( import Prelude hiding (foldl') -- exported since GHC 9.10 (base 4.20) +import Control.Applicative (empty) import Control.Concurrent -import Control.Monad (forM, forM_, when) +import Control.Monad (forM, forM_, when, guard) +import Control.Monad.Trans.Class (lift) +import Control.Monad.Trans.Maybe import Data.ByteString qualified as BS import Data.ByteString (ByteString) -import Data.Char (isDigit) +import Data.Char (isDigit, chr, ord) +import Data.Functor ((<&>)) import Data.IORef -import Data.List (sort, scanl', foldl') +import Data.List (sort, scanl', foldl', minimumBy) import Data.Map.Strict (Map) import Data.Map.Strict qualified as Map import Data.Maybe (catMaybes) +import Data.Ord (comparing) import Data.Text qualified as T +import Data.Text (Text) import Data.Time import Data.Vector.Storable qualified as VS import Data.Word +import Numeric (showIntAtBase) import System.Clock qualified as Clock import System.Directory import System.FilePath @@ -64,6 +74,8 @@ data Index = Index !FilePath !(Map Channel (IORef ChanIndex)) !(Cache (Channel, YMD) (ByteString, VS.Vector Word32)) +type EventID = Text + -- init initIndex :: FilePath -> [Channel] -> IO Index @@ -195,7 +207,7 @@ indexUpdateImport index@(Index _ mp _) chan = do -- | Returns proper lazy list of events. Reading the files happens strictly, -- but parsing the events happens lazily. -indexGetEventsLinear :: Index -> Channel -> Int -> Int -> IO [(YMDHMS, Event)] +indexGetEventsLinear :: Index -> Channel -> Int -> Int -> IO [(YMDHMS, EventID, Event)] indexGetEventsLinear index@(Index _ mp _) chan from count = do ci <- readIORef (mp Map.! chan) if from + count < 0 || from >= ciTotal ci @@ -246,10 +258,12 @@ indexGetEventsLinear index@(Index _ mp _) chan from count = do | day == day2 = (0, Just off2) | otherwise = (0, Just neventsOnDay) ymd = ymdFromGregorian (toGregorian day) - fixDate = map $ \(tod, ev) -> (YMDHMS ymd tod, ev) in if neventsOnDay > 0 - then loadDay index chan ymd >>= \case - Just (bs, lineStarts) -> return (fixDate (parseLogRange range lineStarts bs)) + then loadDay index chan ymd <&> \case + Just (bs, lineStarts) -> + let events = parseLogRange range lineStarts bs + in zipWith (\(hms, ev) off -> (YMDHMS ymd hms, genEventID (YMDHMS ymd hms) off, ev)) + events [fst range ..] Nothing -> error $ "events on day " ++ show (dayToYMD day) ++ " but no file" else return [] @@ -278,6 +292,30 @@ binSearch vec needle in if vec IGV.! mid <= needle then go mid hi else go lo mid +-- | If ID is found, returns index in the linear list of events for this channel +findEventIDLinear :: Index -> Channel -> EventID -> IO (Maybe Int) +findEventIDLinear index@(Index _ mp _) chan eid = runMaybeT $ do + (YMDHMS ymd hms, idoff) <- hoistMaybe (parseEventID eid) + day <- hoistMaybe (uncurry3 fromGregorianValid (ymdToGregorian ymd)) + + ci <- lift $ readIORef (mp Map.! chan) + guard (ciStartDay ci <= day && day <= ciEndDay ci) + + (bs, lineStarts) <- MaybeT $ loadDay index chan ymd + let candidates = + map snd $ + takeWhile ((== hms) . fst) $ + dropWhile ((< hms) . fst) $ + zip (parseLogTimesOnly lineStarts bs) [0..] + case candidates of + [] -> empty + _ -> do + let dayidx = fromIntegral @Integer @Int (day `diffDays` ciStartDay ci) + eventsBeforeDay | dayidx == 0 = 0 + | otherwise = ciCountUntil ci IGV.! (dayidx - 1) + let dayoff = minimumBy (comparing (\off -> abs (off - idoff))) candidates + return (eventsBeforeDay + dayoff) + -- other methods indexNumEvents :: Index -> Channel -> IO Int @@ -316,6 +354,59 @@ loadDay (Index basedir _ cache) chan@(Channel network channel) ymd = do Nothing -> return Nothing -- file didn't exist Just (bs, lineStarts) -> return (Just (bs, lineStarts)) +-- | Takes the index of the event in the day's log (the "offset") in addition +-- to the timestamp, in order to disambiguate in case there are multiple events +-- with the same timestamp. +-- +-- >>> genEventID (YMDHMS (YMD 2026 4 7) (HMS 18 56 55)) 123 +-- "a5d9CtBVX" +genEventID :: YMDHMS -> Int -> EventID +genEventID (YMDHMS (YMD y m d) (HMS hh mm ss)) off + -- An event ID is a mixed-radix number. + -- Components: [offset, year, month, day, hour, minute, second] + -- Radix: [ --, 5000, 12, 31, 24, 60, 60] + -- Maximal offset is determined by: + -- > ceiling (2 ** (64 - logBase 2 (5000 * 12 * 31 * 24 * 60 * 60))) + -- 114787088 + -- to fit the ID number in a Word64. + -- Let's round that down converatively to 100_000_000, i.e. 100 million events per day max. + -- + -- The result number is encoded in base62, and an 'a' is prefixed as an ID version identifier. + | off >= 100_000_000 = error "Too many events per day" + | y >= 5000 = error "You should have better tech at this point" + | otherwise = + let cast :: Integral a => a -> Word64 ; cast = fromIntegral + num = (((((cast off * 5000 + cast y) * 12 + cast (m - 1)) * 31 + cast (d - 1)) + * 24 + cast hh) * 60 + cast mm) * 60 + cast ss + in T.pack ('a' : showIntAtBase 62 base62char num "") + where + base62char = chr . fromIntegral . (base62alphabet `BS.index`) + base62alphabet = BS.pack (map (fromIntegral . ord) (['0'..'9'] ++ ['A'..'Z'] ++ ['a'..'z'])) + +-- >>> parseEventID "a5d9CtBVX" +-- Just (YMDHMS (YMD 2026 4 7) (HMS 18 56 55),123) +parseEventID :: EventID -> Maybe (YMDHMS, Int) +parseEventID (T.uncons -> Just ('a', eid)) = do + num <- multiply <$> mapM (fmap (fromIntegral @Int @Word64) . unbase62char) (T.unpack eid) + let (num2, ss) = num `quotRem` 60 + let (num3, mm) = num2 `quotRem` 60 + let (num4, hh) = num3 `quotRem` 24 + let (num5, d') = num4 `quotRem` 31 + let (num6, m') = num5 `quotRem` 12 + let (off , y ) = num6 `quotRem` 5000 + let cast :: Num b => Word64 -> b ; cast = fromIntegral + return (YMDHMS (YMD (cast y) (cast m' + 1) (cast d' + 1)) + (HMS (cast hh) (cast mm) (cast ss)) + ,cast off) + where + multiply = sum . map (uncurry (*)) . zip (iterate (*62) 1) . reverse + unbase62char c + | '0' <= c, c <= '9' = Just (ord c - ord '0') + | 'A' <= c, c <= 'Z' = Just (ord c - ord 'A' + 10) + | 'a' <= c, c <= 'z' = Just (ord c - ord 'a' + 36) + | otherwise = Nothing +parseEventID _ = Nothing + parseFileName :: String -> Maybe YMD parseFileName name | (y, '-' : s1) <- splitAt 4 name diff --git a/src/Main.hs b/src/Main.hs index c516c5f..380c6be 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -1,5 +1,6 @@ -{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE MultiWayIf #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RankNTypes #-} {-# LANGUAGE ViewPatterns #-} module Main (main) where @@ -77,39 +78,51 @@ pageLog conf pages index req alias = Just chan -> do numEvents <- indexNumEvents index chan let npages = (numEvents + numPerPage - 1) `div` numPerPage - curpage | Just (readMaybe . BS8.unpack -> Just pg) <- query "page" = min npages (max 1 pg) - | otherwise = npages - ntoleft = min 5 (curpage - 1) - ntoright = min 5 (npages - curpage) - -- traceShowM (indexNumEvents index chan, npages, curpage, ntoleft, ntoright) - events <- indexGetEventsLinear index chan ((curpage - 1) * numPerPage) numPerPage - return $ sendPage200 pages "log" $ M.object - ["network" ~> chanNetwork chan - ,"channel" ~> chanChannel chan - ,"alias" ~> alias - ,"totalevents" ~> renderLargeNumber numEvents - ,"picker" ~> M.object - ["prevpage" ~> if curpage > 1 then Just (curpage - 1) else Nothing - ,"nextpage" ~> if curpage < npages then Just (curpage + 1) else Nothing - ,"firstpage" ~> (curpage > 6) - ,"leftdots" ~> (curpage > 7) - ,"rightdots" ~> (curpage < npages - 6) - ,"lastpage" ~> (curpage < npages - 5) - ,"leftnums" ~> [curpage - ntoleft .. curpage - 1] - ,"curnum" ~> curpage - ,"rightnums" ~> [curpage + 1 .. curpage + ntoright] - ,"npages" ~> npages] - ,"events" ~> [M.object - ["classlist" ~> classlist - ,"datetime" ~> let YMDHMS (YMD y mo d) (HMS h mi s) = time - in pad '0' 4 y ++ '-' : pad '0' 2 mo ++ '-' : pad '0' 2 d ++ ' ' : - pad '0' 2 h ++ ':' : pad '0' 2 mi ++ ':' : pad '0' 2 s - ,"nickwrap1" ~> nickw1 - ,"nick" ~> nick - ,"nickwrap2" ~> nickw2 - ,"message" ~> msg] - | (time, ev) <- events - , let (classlist, (nickw1, nick, nickw2), msg) = renderEvent ev]] + (mcurpage, mpagehighlight) <- + if | Just (readMaybe . BS8.unpack -> Just pg) <- query "page" -> return (Just (min npages (max 1 pg)), Nothing) + | Just (TE.decodeASCII' -> Just eventID) <- query "eid" -> do + mevidx <- findEventIDLinear index chan eventID + case mevidx of + Just evidx -> return (Just (evidx `div` numPerPage + 1), Just (evidx `mod` numPerPage)) + Nothing -> return (Nothing, Nothing) + | otherwise -> return (Just npages, Nothing) + case mcurpage of + Nothing -> page404 "Event ID not found" + Just curpage -> do + let ntoleft = min 5 (curpage - 1) + ntoright = min 5 (npages - curpage) + -- traceShowM (indexNumEvents index chan, npages, curpage, ntoleft, ntoright) + events <- indexGetEventsLinear index chan ((curpage - 1) * numPerPage) numPerPage + return $ sendPage200 pages "log" $ M.object + ["network" ~> chanNetwork chan + ,"channel" ~> chanChannel chan + ,"alias" ~> alias + ,"totalevents" ~> renderLargeNumber numEvents + ,"picker" ~> M.object + ["prevpage" ~> if curpage > 1 then Just (curpage - 1) else Nothing + ,"nextpage" ~> if curpage < npages then Just (curpage + 1) else Nothing + ,"firstpage" ~> (curpage > 6) + ,"leftdots" ~> (curpage > 7) + ,"rightdots" ~> (curpage < npages - 6) + ,"lastpage" ~> (curpage < npages - 5) + ,"leftnums" ~> [curpage - ntoleft .. curpage - 1] + ,"curnum" ~> curpage + ,"rightnums" ~> [curpage + 1 .. curpage + ntoright] + ,"npages" ~> npages] + ,"events" ~> [M.object + ["classlist" ~> if mpagehighlight == Just dayidx + then Just (classlist `classListAdd` "highlight") + else classlist + ,"datetime" ~> let YMDHMS (YMD y mo d) (HMS h mi s) = time + in pad '0' 4 y ++ '-' : pad '0' 2 mo ++ '-' : pad '0' 2 d ++ ' ' : + pad '0' 2 h ++ ':' : pad '0' 2 mi ++ ':' : pad '0' 2 s + ,"linkid" ~> eid + ,"nickwrap1" ~> nickw1 + ,"nick" ~> nick + ,"nickwrap2" ~> nickw2 + ,"message" ~> msg] + | ((time, eid, ev), dayidx) <- zip events [0..] + , let (classlist, (nickw1, nick, nickw2), msg) = renderEvent ev]] where numPerPage = 100 @@ -125,6 +138,10 @@ pageLog conf pages index req alias = | n < 1000 = T.show n | otherwise = renderLargeNumber (n `div` 1000) <> "," <> T.pack (pad '0' 3 (n `mod` 1000)) + classListAdd :: Maybe Text -> Text -> Text + classListAdd Nothing t = t + classListAdd (Just l) t = l <> " " <> t + pageCalendarDay :: Config -> Pages -> Index -> Text -> Text -> IO Response pageCalendarDay conf pages index alias datestr = case (econfAlias2Chan conf Map.!? alias, parseDatestr datestr) of diff --git a/src/Util.hs b/src/Util.hs index 95cc4ce..4c09705 100644 --- a/src/Util.hs +++ b/src/Util.hs @@ -25,7 +25,7 @@ data YMD = YMD {-# UNPACK #-} !Int data HMS = HMS {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8 - deriving (Show) + deriving (Show, Eq, Ord) pad :: Show a => Char -> Int -> a -> String pad c w val = @@ -4,6 +4,7 @@ module ZNC ( Nick, Event(..), preparseLog, parseLog, parseLogRange, + parseLogTimesOnly, ) where import Control.Applicative @@ -19,8 +20,6 @@ import Data.Text.Encoding qualified as TE import Data.Vector.Storable qualified as VS import Data.Word (Word8, Word32) -import Debug.Trace - import Util @@ -58,6 +57,12 @@ preparseLog = VS.fromList . findLineStarts 0 parseLog :: ByteString -> [(HMS, Event)] parseLog = map parseLogLine . BS8.lines +{-# INLINE parseLogTimesOnly #-} +parseLogTimesOnly :: VS.Vector Word32 -> ByteString -> [HMS] +parseLogTimesOnly linestarts bs = + map (fromRight (HMS 0 0 0) . P.parseOnly parseTOD) $ + splitWithLineStarts 0 linestarts bs + -- (start line, number of lines (default to rest of file)) {-# INLINE parseLogRange #-} parseLogRange :: (Int, Maybe Int) -> VS.Vector Word32 -> ByteString -> [(HMS, Event)] @@ -66,33 +71,27 @@ parseLogRange (startln, mnumln) linestarts topbs = splitted = splitWithLineStarts 0 (VS.slice startln numln linestarts) topbs in -- traceShow ("pLR"::String, splitted) $ map parseLogLine splitted + +{-# INLINE splitWithLineStarts #-} +splitWithLineStarts :: Int -> VS.Vector Word32 -> ByteString -> [ByteString] +splitWithLineStarts idx starts bs + | idx >= VS.length starts = [] + | idx == VS.length starts - 1 = + [BS.takeWhile (\b -> b /= 13 && b /= 10) (BS.drop (at idx) bs)] + | otherwise = + trimCR (BS.drop (at idx) (BS.take (at (idx + 1) - 1) bs)) + : splitWithLineStarts (idx + 1) starts bs where - {-# INLINE splitWithLineStarts #-} - splitWithLineStarts :: Int -> VS.Vector Word32 -> ByteString -> [ByteString] - splitWithLineStarts idx starts bs - | idx >= VS.length starts = [] - | idx == VS.length starts - 1 = - [BS.takeWhile (\b -> b /= 13 && b /= 10) (BS.drop (at idx) bs)] - | otherwise = - trimCR (BS.drop (at idx) (BS.take (at (idx + 1) - 1) bs)) - : splitWithLineStarts (idx + 1) starts bs - where - at i = fromIntegral @Word32 @Int (starts VS.! i) + at i = fromIntegral @Word32 @Int (starts VS.! i) trimCR :: ByteString -> ByteString - trimCR bs = case BS.unsnoc bs of - Just (bs', c) | c == 13 -> bs' - _ -> bs + trimCR b = case BS.unsnoc b of + Just (b', c) | c == 13 -> b' + _ -> b +{-# NOINLINE parseLogLine #-} parseLogLine :: ByteString -> (HMS, Event) -parseLogLine bs = - case parseLogLine' bs of - res@(HMS 0 0 0, ParseError) -> traceShow ("PE" :: String, bs) res - res -> res - -{-# NOINLINE parseLogLine' #-} -parseLogLine' :: ByteString -> (HMS, Event) -parseLogLine' = fromRight (HMS 0 0 0, ParseError) . P.parseOnly parseLine +parseLogLine = fromRight (HMS 0 0 0, ParseError) . P.parseOnly parseLine parseLine :: P.Parser (HMS, Event) parseLine = (,) <$> parseTOD <*> parseEvent |
