diff options
| author | Tom Smeding <tom@tomsmeding.com> | 2026-04-06 23:35:05 +0200 |
|---|---|---|
| committer | Tom Smeding <tom@tomsmeding.com> | 2026-04-06 23:36:28 +0200 |
| commit | 287d9e5c4fc50bcca2474b9783148181d7ede872 (patch) | |
| tree | 81a80cc5f5aabb2d3cffd3874438782d32096cff | |
| parent | 875da72c83b20260ac5af2bdcc8b992d657fd97e (diff) | |
Log watching
| -rw-r--r-- | aratamete-ircbrowse.cabal | 6 | ||||
| -rw-r--r-- | src/AtomicPrint.hs | 32 | ||||
| -rw-r--r-- | src/Cache.hs | 4 | ||||
| -rw-r--r-- | src/Config.hs | 8 | ||||
| -rw-r--r-- | src/Debounce.hs | 75 | ||||
| -rw-r--r-- | src/ImmutGrowVector.hs | 43 | ||||
| -rw-r--r-- | src/Index.hs | 290 | ||||
| -rw-r--r-- | src/Main.hs | 12 | ||||
| -rw-r--r-- | src/Mmap.hs | 62 | ||||
| -rw-r--r-- | src/ZNC.hs | 2 |
10 files changed, 409 insertions, 125 deletions
diff --git a/aratamete-ircbrowse.cabal b/aratamete-ircbrowse.cabal index 9ef818d..a46cd08 100644 --- a/aratamete-ircbrowse.cabal +++ b/aratamete-ircbrowse.cabal @@ -9,21 +9,25 @@ build-type: Simple executable aratamete-ircbrowse main-is: Main.hs other-modules: + AtomicPrint Cache Calendar Config + Debounce + ImmutGrowVector Index Mmap Util ZNC build-depends: - base >= 4.21, + base >= 4.19, attoparsec, bytestring, clock, containers, directory, filepath, + fsnotify, http-types, mustache, random, diff --git a/src/AtomicPrint.hs b/src/AtomicPrint.hs new file mode 100644 index 0000000..c2367dd --- /dev/null +++ b/src/AtomicPrint.hs @@ -0,0 +1,32 @@ +{-# OPTIONS_GHC -fno-full-laziness -fno-cse #-} +module AtomicPrint ( + atomicPrint, atomicPrintS, + atomicPrintNoWait, atomicPrintNoWaitS, +) where + +import Control.Concurrent +import Control.Monad (void) +import Data.Text qualified as T +import Data.Text.IO.Utf8 qualified as T +import Data.Text (Text) +import System.IO.Unsafe (unsafePerformIO) + + +{-# NOINLINE mutex #-} +mutex :: MVar () +mutex = unsafePerformIO (newMVar ()) + +atomicPrintS :: String -> IO () +atomicPrintS = atomicPrint . T.pack + +atomicPrint :: Text -> IO () +atomicPrint text = + withMVar mutex $ \() -> + T.putStrLn text + +atomicPrintNoWaitS :: String -> IO () +atomicPrintNoWaitS = atomicPrintNoWait . T.pack + +-- | Does not block, so if you've masked exceptions, nothing will come through here +atomicPrintNoWait :: Text -> IO () +atomicPrintNoWait text = void $ forkIO $ atomicPrint text diff --git a/src/Cache.hs b/src/Cache.hs index 4694aa0..d272112 100644 --- a/src/Cache.hs +++ b/src/Cache.hs @@ -26,6 +26,10 @@ cacheAdd cache@(Cache maxsize ref _) key val = do cacheLookup :: Ord k => Cache k v -> k -> IO (Maybe v) cacheLookup (Cache _ ref _) key = fmap fst . Map.lookup key <$> readIORef ref +cacheInvalidate :: Ord k => Cache k v -> k -> IO () +cacheInvalidate (Cache _ ref _) key = + atomicModifyIORef' ref $ \mp -> (Map.delete key mp, ()) + -- Uses 2-random LRU (https://danluu.com/2choices-eviction/) cachePrune :: Ord k => Cache k v -> IO () cachePrune (Cache maxsize ref genref) = do go =<< atomicModifyIORef' genref splitGen diff --git a/src/Config.hs b/src/Config.hs index ee2909c..f50c8b6 100644 --- a/src/Config.hs +++ b/src/Config.hs @@ -5,11 +5,14 @@ {-# LANGUAGE TypeFamilies #-} module Config ( Config, Config'(..), ConfigStage(..), IfFull, - Channel(..), + Channel(..), prettyChannel, readConfig, enrichConfig, ) where +import Prelude hiding (foldl') -- exported since GHC 9.10 (base 4.20) + import Data.Char (isSpace) +import Data.List (foldl') import Data.Map.Strict (Map) import Data.Map.Strict qualified as Map import Data.Text (Text) @@ -70,6 +73,9 @@ type family IfFull stage a where data Channel = Channel { chanNetwork :: Text, chanChannel :: Text } deriving (Show, Eq, Ord) +prettyChannel :: Channel -> Text +prettyChannel (Channel nw ch) = nw <> T.pack "/" <> ch + readConfig :: FilePath -> IO (Config' User) readConfig path = foldl' parseLine initConfig . lines <$> readFile path diff --git a/src/Debounce.hs b/src/Debounce.hs new file mode 100644 index 0000000..7e9ccab --- /dev/null +++ b/src/Debounce.hs @@ -0,0 +1,75 @@ +{-# LANGUAGE OverloadedStrings #-} +module Debounce ( + Debounce, + makeDebounce, debounceClick, +) where + +import Control.Concurrent +import Control.Monad (when, void) +import Data.IORef +import Data.Text (Text) +-- import Data.Text qualified as T + +-- import AtomicPrint + + +-- delay in microseconds; current waiter +data Debounce = Debounce Text Int (IO ()) (IORef State) + +data State = Idle -- ^ no event recently + | Waiting -- ^ recent event, debouncing + | Running -- ^ delay expired, running action + | RunningRerun -- ^ still running action but another event appeared in the mean time + deriving (Show) + +-- State transition diagram: +-- +-- ,---------> IDLE +-- | | +-- | click |,-----. click +-- | V | +-- | WAITING >-' +-- | | ^.______________ +-- | threadDelay | \ (completed) +-- | V | +-- '---------< RUNNING -------> RUNNINGRERUN >-, +-- (completed) click ^.___/ click +-- +-- In Idle, no debounceWait thread is running. +-- In Waiting, debounceWait is in its threadDelay. +-- In Running & RunningRerun, debounceWait is in its 'action'. +-- There is always <=1 debounceWait thread at a time. + +makeDebounce :: Text -> Double -> IO () -> IO Debounce +makeDebounce description secs action = Debounce description (round (secs * 1e6)) action <$> newIORef Idle + +debounceClick :: Debounce -> IO () +debounceClick deb@(Debounce _descr _ _ ref) = do + (gowait, _origstate) <- atomicModifyIORef' ref $ \st -> case st of + Idle -> (Waiting, (True, st)) + Waiting -> (Waiting, (False, st)) + Running -> (RunningRerun, (False, st)) + RunningRerun -> (RunningRerun, (False, st)) + + -- atomicPrint $ "debounce[" <> _descr <> "] @ " <> T.show _origstate + + when gowait $ + debounceWait deb + +-- Precondition: current state is Waiting +debounceWait :: Debounce -> IO () +debounceWait deb@(Debounce _descr delay action ref) = + void . forkIO $ do + threadDelay delay + atomicModifyIORef' ref $ \case + Waiting -> (Running, ()) + st -> error $ "debounce: unexpected " ++ show st ++ ", should be Waiting" + -- atomicPrint $ "debounce[" <> _descr <> "] running" + action + gowait <- atomicModifyIORef' ref $ \case + Running -> (Idle, False) + RunningRerun -> (Waiting, True) + st -> error $ "debounce: unexpected " ++ show st ++ ", should be Running(Rerun)" + -- atomicPrint $ "debounce[" <> _descr <> "] done, " <> (if gowait then "again" else "now idle") + when gowait $ + debounceWait deb diff --git a/src/ImmutGrowVector.hs b/src/ImmutGrowVector.hs new file mode 100644 index 0000000..d36209d --- /dev/null +++ b/src/ImmutGrowVector.hs @@ -0,0 +1,43 @@ +module ImmutGrowVector where + +import Foreign.Storable +import Data.Vector.Storable qualified as VS + + +-- | Acts like an immutable storable vector, except that it's split in a long +-- prefix and a short suffix so that modifications at the end are cheap. If the +-- suffix gets longer (by appending elements), the suffix elements are promoted +-- to prefix elements once in a while, resulting in a big copy at those times. +data ImmutGrowVector a = ImmutGrowVector (VS.Vector a) (VS.Vector a) + deriving (Show) + +empty :: Storable a => ImmutGrowVector a +empty = ImmutGrowVector VS.empty VS.empty + +fromListN :: Storable a => Int -> [a] -> ImmutGrowVector a +fromListN n l + | n > 2 = + let (l1, l2) = splitAt (n - 2) l + in ImmutGrowVector (VS.fromListN (n - 2) l1) (VS.fromListN 2 l2) + | otherwise = + ImmutGrowVector VS.empty (VS.fromListN n l) + +(!) :: Storable a => ImmutGrowVector a -> Int -> a +ImmutGrowVector prefix suffix ! i + | i < VS.length prefix = prefix VS.! i + | otherwise = suffix VS.! (i - VS.length prefix) + +length :: Storable a => ImmutGrowVector a -> Int +length (ImmutGrowVector prefix suffix) = VS.length prefix + VS.length suffix + +set :: Storable a => ImmutGrowVector a -> Int -> a -> ImmutGrowVector a +set (ImmutGrowVector prefix suffix) idx value + | idx < VS.length prefix = error "ImmutGrowVector: mutation in slow part" + | otherwise = ImmutGrowVector prefix (suffix VS.// [(idx - VS.length prefix, value)]) + +append :: Storable a => ImmutGrowVector a -> a -> ImmutGrowVector a +append (ImmutGrowVector prefix suffix) value + | VS.length suffix < 8 = ImmutGrowVector prefix (suffix `VS.snoc` value) + | otherwise = + let n = VS.length suffix + in ImmutGrowVector (prefix <> VS.take (n - 1) suffix) (VS.drop (n - 1) suffix `VS.snoc` value) diff --git a/src/Index.hs b/src/Index.hs index 5486692..8c605d3 100644 --- a/src/Index.hs +++ b/src/Index.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} module Index ( Index, initIndex, @@ -7,24 +8,33 @@ module Index ( indexCalendar, ) where -import Data.Time.Calendar -import Control.Monad (forM) +import Prelude hiding (foldl') -- exported since GHC 9.10 (base 4.20) + +import Control.Concurrent +import Control.Monad (forM, forM_, when) import Data.ByteString qualified as BS import Data.ByteString (ByteString) import Data.Char (isDigit) -import Data.List (sort, scanl') +import Data.IORef +import Data.List (sort, scanl', foldl') import Data.Map.Strict (Map) import Data.Map.Strict qualified as Map +import Data.Maybe (catMaybes) import Data.Text qualified as T +import Data.Time import Data.Vector.Storable qualified as VS -import Data.Word (Word32) +import Data.Word import System.Clock qualified as Clock import System.Directory import System.FilePath +import System.FSNotify qualified as FN import Text.Read (readMaybe) +import AtomicPrint +import Debounce import Cache -import Config (Channel(..)) +import Config (Channel(..), prettyChannel) +import ImmutGrowVector qualified as IGV import Mmap import Util import ZNC @@ -32,13 +42,26 @@ import ZNC data ChanIndex = ChanIndex { ciStartDay :: !Day - , ciEndDay :: !Day -- ^ inclusive - , ciCountUntil :: !(VS.Vector Int) -- ^ number of events up to and /including/ this day + , ciCountUntil :: !(IGV.ImmutGrowVector Int) -- ^ number of events up to and /including/ this day , ciTotal :: !Int } deriving (Show) +-- | Inclusive. +ciEndDay :: ChanIndex -> Day +ciEndDay ci = + let ndays = IGV.length (ciCountUntil ci) + in if ndays > 0 + then (ndays - 1) `addDays'` ciStartDay ci + else error "ciEndDay: ChanIndex with zero days!" + +-- 1. The ChanIndex is contained in an IORef, which means that we can't have a +-- proper critical section around modifying it. This is fine because it's +-- all pure, so we just allocate a new ChanIndex each time. +-- 2. There being no critical section also means that having multiple writers +-- would be problematic. Fortunately, there is only one writer, so this +-- simple data structure is fine. data Index = Index !FilePath - !(Map Channel ChanIndex) + !(Map Channel (IORef ChanIndex)) !(Cache (Channel, YMD) (ByteString, VS.Vector Word32)) -- init @@ -49,33 +72,68 @@ initIndex basedir toimport = do c_start <- Clock.getTime Clock.Realtime items <- - fmap concat . forM (map chanNetwork toimport) $ \nwT -> do + forM toimport $ \(Channel nwT chT) -> do let nw = T.unpack nwT - forM [ch | Channel nwT' ch <- toimport, nwT' == nwT] $ \chT -> do - let ch = T.unpack chT - files <- listDirectory (basedir </> nw </> ch) - days <- fmap sort . forM files $ \fn -> do - let path = basedir </> nw </> ch </> fn - -- putStrLn $ "Parsing " ++ path ++ " (" ++ show (parseFileName fn) ++ " -> " ++ show (dateToDay (parseFileName fn)) ++ ")" - events <- parseLog <$> BS.readFile path - return (uncurry3 fromGregorian (parseFileName fn), length events) - let minday = minimum (map fst days) - maxday = maximum (map fst days) - ndays = fromIntegral @Integer @Int (diffDays maxday minday + 1) - -- traceM $ nw ++ "/" ++ ch ++ ": days = " ++ show [(toFileName (dayToYMD d), i) | (d, i) <- days] - let countScan = VS.fromListN ndays (drop 1 $ scanl' (+) 0 (makeCounts [minday..maxday] days)) - let ntotal = sum (map snd days) - return (Channel nwT chT - ,ChanIndex - { ciStartDay = minday - , ciEndDay = maxday + ch = T.unpack chT + files <- listDirectory (basedir </> nw </> ch) + days <- fmap sort . forM files $ \fn -> do + let path = basedir </> nw </> ch </> fn + date = case parseFileName fn of + Just ymd -> ymdToGregorian ymd + Nothing -> error $ "Log file with unexpected file name: " ++ path + -- atomicPrintS $ "Parsing " ++ path ++ " (" ++ show date ++ " -> " ++ show (dateToDay date) ++ ")" + events <- parseLog <$> BS.readFile path + return (uncurry3 fromGregorian date, length events) + let minday = minimum (map fst days) + maxday = maximum (map fst days) + ndays = fromIntegral @Integer @Int (diffDays maxday minday + 1) + -- traceM $ nw ++ "/" ++ ch ++ ": days = " ++ show [(toFileName (dayToYMD d), i) | (d, i) <- days] + let countScan = IGV.fromListN ndays (drop 1 $ scanl' (+) 0 (makeCounts [minday..maxday] days)) + ntotal = sum (map snd days) + chanindex <- newIORef $ + ChanIndex { ciStartDay = minday , ciCountUntil = countScan - , ciTotal = ntotal}) + , ciTotal = ntotal } + return (Channel nwT chT, chanindex) c_end <- Clock.getTime Clock.Realtime let timetakenSecs = fromIntegral @_ @Double (Clock.toNanoSecs (Clock.diffTimeSpec c_start c_end)) / 1e9 - putStrLn $ "Parsing/indexing logs in " ++ show basedir ++ " took " ++ show timetakenSecs ++ " secs" + atomicPrintS $ "Parsing/indexing logs in " ++ show basedir ++ " took " ++ show timetakenSecs ++ " secs" + + let index = Index basedir (Map.fromList items) cache + + _ <- forkIO $ do + FN.withManager $ \mgr -> do + let predicate FN.Added{} = True + predicate FN.Modified{} = True + predicate FN.Removed{} = True + predicate FN.WatchedDirectoryRemoved{} = True + predicate _ = False + + handle deb _ FN.Added{} = debounceClick deb + handle deb chan ev@FN.Modified{} = do + atomicPrintS $ "path = " ++ show (FN.eventPath ev) ++ " fn = " ++ takeFileName (FN.eventPath ev) + case parseFileName (takeFileName (FN.eventPath ev)) of + Just ymd -> do + cacheInvalidate cache (chan, ymd) + debounceClick deb + Nothing -> return () + handle _ chan ev@FN.Removed{} = do + atomicPrintS $ "path = " ++ show (FN.eventPath ev) ++ " fn = " ++ takeFileName (FN.eventPath ev) + case parseFileName (takeFileName (FN.eventPath ev)) of + Just ymd -> do + cacheInvalidate cache (chan, ymd) + Nothing -> return () + handle _ _ ev@FN.WatchedDirectoryRemoved{} = -- not sure what to do here... + atomicPrintS $ "Directory " ++ FN.eventPath ev ++ " removed!" + handle _ _ _ = undefined - return (Index basedir (Map.fromList items) cache) + forM_ toimport $ \chan@(Channel nw ch) -> do + deb <- makeDebounce (nw <> T.pack "/" <> ch) 5.0 (indexUpdateImport index chan) + _ <- FN.watchDir mgr (basedir </> T.unpack nw </> T.unpack ch) predicate (handle deb chan) + atomicPrintS $ "Watching " <> basedir </> T.unpack nw </> T.unpack ch + let loop = threadDelay (round (1e9 :: Double)) >> loop in loop + + return index makeCounts :: [Day] -> [(Day, Int)] -> [Int] makeCounts [] [] = [] @@ -86,37 +144,86 @@ makeCounts (d:ds) ents@((d',n):ents') | otherwise = error $ "makeCounts: duplicate entry? " ++ show (d, toGregorian d, d', toGregorian d') makeCounts (_:ds) [] = 0 : makeCounts ds [] +indexUpdateImport :: Index -> Channel -> IO () +indexUpdateImport index@(Index _ mp _) chan = do + let ciRef = mp Map.! chan + ci <- readIORef ciRef + today <- utctDay <$> getCurrentTime + let todayish = max today (ciEndDay ci) + + readCounts <- fmap catMaybes . forM [-1 .. 1] $ \increment -> do + let day = increment `addDays` todayish + ymd = dayToYMD day + dayidx = fromIntegral @Integer @Int (day `diffDays` ciStartDay ci) + + loadDay index chan ymd >>= \case + Just (_bs, lineStarts) -> return (Just (dayidx, VS.length lineStarts)) + Nothing -> return Nothing + + atomicPrint $ "Update import for " <> prettyChannel chan <> ": " <> T.show readCounts <> " (len = " <> T.show (IGV.length (ciCountUntil ci)) <> ")" + + when (not (null readCounts)) $ + atomicModifyIORef' ciRef $ \ci' -> + let ciRes = foldl' (\ci2 (dayidx, count) -> recordCount ci2 dayidx count) ci' readCounts + in (ciRes, ()) + where + -- | How many events do we already have on this day? + eventsOnDayIdx :: ChanIndex -> Int -> Int + eventsOnDayIdx ci dayidx + | dayidx == 0 = scan IGV.! 0 + | dayidx < IGV.length scan = scan IGV.! dayidx - scan IGV.! (dayidx - 1) + | otherwise = 0 + where scan = ciCountUntil ci + + recordCount :: ChanIndex -> Int -> Int -> ChanIndex + recordCount ci dayidx count + | count <= alreadyHave = ci + | otherwise = + let ciExt = -- Ensure that there is space in the scan vector for our record + let nExtraDays = dayidx - IGV.length (ciCountUntil ci) + 1 + lastCount | IGV.length (ciCountUntil ci) == 0 = 0 + | otherwise = ciCountUntil ci IGV.! (IGV.length (ciCountUntil ci) - 1) + in ci { ciCountUntil = iterate (`IGV.append` lastCount) (ciCountUntil ci) !! nExtraDays } + addFrom di n vec + | di < IGV.length vec = addFrom (di + 1) n (IGV.set vec di (vec IGV.! di + n)) + | otherwise = vec + in ciExt { ciCountUntil = addFrom dayidx (count - alreadyHave) (ciCountUntil ciExt) + , ciTotal = ciTotal ciExt + count - alreadyHave } + where alreadyHave = eventsOnDayIdx ci dayidx + -- search -- | Returns proper lazy list of events. Reading the files happens strictly, -- but parsing the events happens lazily. indexGetEventsLinear :: Index -> Channel -> Int -> Int -> IO [(YMDHMS, Event)] -indexGetEventsLinear index@(Index _ mp _) chan from count - | from + count < 0 = return [] - | from >= ciTotal ci = return [] - | otherwise = do - let scan = ciCountUntil ci - day1idx = binSearch scan from +indexGetEventsLinear index@(Index _ mp _) chan from count = do + ci <- readIORef (mp Map.! chan) + if from + count < 0 || from >= ciTotal ci + then return [] + else go ci (ciCountUntil ci) + where + go ci scan = do + let day1idx = binSearch scan from day1 = day1idx `addDays'` ciStartDay ci neventBeforeDay1 | day1idx == 0 = 0 - | otherwise = scan VS.! (day1idx - 1) - neventInclDay1 = scan VS.! day1idx + | otherwise = scan IGV.! (day1idx - 1) + neventInclDay1 = scan IGV.! day1idx neventOnDay1 = neventInclDay1 - neventBeforeDay1 off1 = from - neventBeforeDay1 -- day2 is inclusive, off2 is exclusive (day2, off2) | from + count <= neventInclDay1 = (day1, off1 + count) - | day1idx == VS.length scan - 1 = (day1, neventOnDay1) + | day1idx == IGV.length scan - 1 = (day1, neventOnDay1) | otherwise = let loop day2idx nbefore nseen | nseen + nOnDay2 >= count = (day2idx `addDays'` ciStartDay ci, count - nseen) - | day2idx == VS.length scan - 1 = + | day2idx == IGV.length scan - 1 = (day2idx `addDays'` ciStartDay ci, nOnDay2) | otherwise = - loop (day2idx + 1) (scan VS.! day2idx) (nseen + nOnDay2) + loop (day2idx + 1) (scan IGV.! day2idx) (nseen + nOnDay2) where - nOnDay2 = scan VS.! day2idx - nbefore + nOnDay2 = scan IGV.! day2idx - nbefore in loop (day1idx + 1) (neventBeforeDay1 + neventOnDay1) (neventOnDay1 - off1) -- traceM ("ci = " ++ show ci) @@ -129,26 +236,24 @@ indexGetEventsLinear index@(Index _ mp _) chan from count -- traceM ("day2 = " ++ show day2) -- traceM ("off2 = " ++ show off2) evs <- forM (zip [day1 .. day2] [day1idx..]) $ \(day, dayidx) -> - let neventsOnDay | dayidx == 0 = scan VS.! 0 - | otherwise = scan VS.! dayidx - scan VS.! (dayidx - 1) - parse + let neventsOnDay | dayidx == 0 = scan IGV.! 0 + | otherwise = scan IGV.! dayidx - scan IGV.! (dayidx - 1) + range | day == day1 = if day1 == day2 - then parseLogRange (off1, Just (off2 - off1)) - else parseLogRange (off1, Nothing) - | day == day2 = parseLogRange (0, Just off2) - | otherwise = \_lineStarts -> parseLog - (y, month, d) = toGregorian day - ymd = YMD (fromIntegral y) (fromIntegral month) (fromIntegral d) + then (off1, Just (off2 - off1)) + else (off1, Just (neventsOnDay - off1)) + | day == day2 = (0, Just off2) + | otherwise = (0, Just neventsOnDay) + ymd = ymdFromGregorian (toGregorian day) fixDate = map $ \(tod, ev) -> (YMDHMS ymd tod, ev) in if neventsOnDay > 0 - then do (bs, lineStarts) <- loadDay index chan ymd - return (fixDate (parse lineStarts bs)) + then loadDay index chan ymd >>= \case + Just (bs, lineStarts) -> return (fixDate (parseLogRange range lineStarts bs)) + Nothing -> error $ "events on day " ++ show (dayToYMD day) ++ " but no file" else return [] return (concat evs) - where - ci = mp Map.! chan -- | The vector must be sorted. -- Returns index of the first element x such that needle < x. If there is no @@ -156,13 +261,13 @@ indexGetEventsLinear index@(Index _ mp _) chan from count -- the length of vec. -- -- TODO: proportional binary search -binSearch :: VS.Vector Int -> Int -> Int +binSearch :: IGV.ImmutGrowVector Int -> Int -> Int binSearch vec needle - | veclen == 0 || vec VS.! (veclen - 1) < needle = veclen - | needle < vec VS.! 0 = 0 + | veclen == 0 || vec IGV.! (veclen - 1) < needle = veclen + | needle < vec IGV.! 0 = 0 | otherwise = go 0 (veclen - 1) where - veclen = VS.length vec + veclen = IGV.length vec -- Invariant: vec[lo] <= needle < vec[hi] go :: Int -> Int -> Int @@ -170,62 +275,71 @@ binSearch vec needle | lo + 1 == hi = hi | otherwise = let mid = lo + (hi - lo) `div` 2 - in if vec VS.! mid <= needle + in if vec IGV.! mid <= needle then go mid hi else go lo mid -- other methods -indexNumEvents :: Index -> Channel -> Int -indexNumEvents (Index _ mp _) chan = ciTotal (mp Map.! chan) +indexNumEvents :: Index -> Channel -> IO Int +indexNumEvents (Index _ mp _) chan = ciTotal <$> readIORef (mp Map.! chan) -indexCalendar :: Index -> Channel -> ((Day, Day), [Int]) -indexCalendar (Index _ mp _) chan = - let ci = mp Map.! chan - in ((ciStartDay ci, ciEndDay ci) - ,[if i == 0 - then ciCountUntil ci VS.! 0 - else ciCountUntil ci VS.! i - ciCountUntil ci VS.! (i - 1) - | i <- [0 .. VS.length (ciCountUntil ci) - 1]]) +indexCalendar :: Index -> Channel -> IO ((Day, Day), [Int]) +indexCalendar (Index _ mp _) chan = do + ci <- readIORef (mp Map.! chan) + let scan = ciCountUntil ci + return ((ciStartDay ci, ciEndDay ci) + ,[if i == 0 + then scan IGV.! 0 + else scan IGV.! i - scan IGV.! (i - 1) + | i <- [0 .. IGV.length scan - 1]]) indexGetEventsDay :: Index -> Channel -> Day -> IO [(HMS, Event)] -indexGetEventsDay index@(Index _ mp _) chan day - | day < ciStartDay ci || day > ciEndDay ci = return [] - | otherwise = do - (bs, _lineStarts) <- loadDay index chan (dayToYMD day) - return (parseLog bs) - where - ci = mp Map.! chan +indexGetEventsDay index@(Index _ mp _) chan day = do + ci <- readIORef (mp Map.! chan) + if day < ciStartDay ci || day > ciEndDay ci + then return [] + else loadDay index chan (dayToYMD day) >>= \case + Just (bs, _lineStarts) -> return (parseLog bs) + Nothing -> return [] -- if the file doesn't exist, there ain't no events -- utilities -loadDay :: Index -> Channel -> YMD -> IO (ByteString, VS.Vector Word32) +loadDay :: Index -> Channel -> YMD -> IO (Maybe (ByteString, VS.Vector Word32)) loadDay (Index basedir _ cache) chan@(Channel network channel) ymd = do cacheLookup cache (chan, ymd) >>= \case Nothing -> do - bs <- mapFile (basedir </> T.unpack network </> T.unpack channel </> toFileName ymd) - let lineStarts = preparseLog bs - cacheAdd cache (chan, ymd) (bs, lineStarts) - return (bs, lineStarts) - Just (bs, lineStarts) -> return (bs, lineStarts) + mapFile (basedir </> T.unpack network </> T.unpack channel </> toFileName ymd) >>= \case + Just bs -> do + let lineStarts = preparseLog bs + cacheAdd cache (chan, ymd) (bs, lineStarts) + return (Just (bs, lineStarts)) + Nothing -> return Nothing -- file didn't exist + Just (bs, lineStarts) -> return (Just (bs, lineStarts)) -parseFileName :: String -> (Year, MonthOfYear, DayOfMonth) +parseFileName :: String -> Maybe YMD parseFileName name | (y, '-' : s1) <- splitAt 4 name , (m, '-' : s2) <- splitAt 2 s1 , (d, ".log") <- splitAt 2 s2 , all isDigit y, all isDigit m, all isDigit d - , let y' = read' y ; m' = read' m ; d' = read' d + , let y' = read' @Int y ; m' = read' @Int m ; d' = read' @Int d , 1 <= m', m' <= 12 - , 1 <= d', d' <= gregorianMonthLength y' m' - = (y', m', d') + , 1 <= d', d' <= gregorianMonthLength (fromIntegral @Int @Integer y') m' + = Just (YMD y' (fromIntegral @Int @Word8 m') (fromIntegral @Int @Word8 d')) | otherwise - = error $ "Invalid ZNC log file name: " ++ name + = Nothing where read' :: Read a => String -> a read' s = case readMaybe s of Just r -> r Nothing -> error $ "No read: " ++ show s +ymdToGregorian :: YMD -> (Year, MonthOfYear, DayOfMonth) +ymdToGregorian (YMD y m d) = (fromIntegral y, fromIntegral m, fromIntegral d) + +ymdFromGregorian :: (Year, MonthOfYear, DayOfMonth) -> YMD +ymdFromGregorian (y, m, d) = YMD (fromIntegral y) (fromIntegral m) (fromIntegral d) + toFileName :: YMD -> String toFileName ymd = ymdToString ymd ++ ".log" diff --git a/src/Main.hs b/src/Main.hs index bb5bdf7..c516c5f 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -18,7 +18,7 @@ import Data.Map.Strict qualified as Map import Data.Text (Text) import Data.Text qualified as T import Data.Text.Encoding qualified as TE -import Data.Text.IO qualified as T +import Data.Text.IO.Utf8 qualified as T import Data.Time (Day, fromGregorianValid) import Network.Wai import Network.HTTP.Types @@ -34,6 +34,7 @@ import Text.Read (readMaybe) -- import Debug.Trace +import AtomicPrint import Calendar import Config import Index @@ -74,7 +75,8 @@ pageLog conf pages index req alias = case econfAlias2Chan conf Map.!? alias of Nothing -> page404 "Channel not found" Just chan -> do - let npages = (indexNumEvents index chan + numPerPage - 1) `div` numPerPage + numEvents <- indexNumEvents index chan + let npages = (numEvents + numPerPage - 1) `div` numPerPage curpage | Just (readMaybe . BS8.unpack -> Just pg) <- query "page" = min npages (max 1 pg) | otherwise = npages ntoleft = min 5 (curpage - 1) @@ -85,7 +87,7 @@ pageLog conf pages index req alias = ["network" ~> chanNetwork chan ,"channel" ~> chanChannel chan ,"alias" ~> alias - ,"totalevents" ~> renderLargeNumber (indexNumEvents index chan) + ,"totalevents" ~> renderLargeNumber numEvents ,"picker" ~> M.object ["prevpage" ~> if curpage > 1 then Just (curpage - 1) else Nothing ,"nextpage" ~> if curpage < npages then Just (curpage + 1) else Nothing @@ -181,7 +183,7 @@ pageCalendar conf pages index alias = case econfAlias2Chan conf Map.!? alias of Nothing -> page404 "Channel not found" Just chan -> do - let ((startDay, endDay), counts) = indexCalendar index chan + ((startDay, endDay), counts) <- indexCalendar index chan return $ sendPage200 pages "calendar" $ M.object ["network" ~> chanNetwork chan ,"channel" ~> chanChannel chan @@ -254,7 +256,7 @@ mainServe confpath = do settings = defaultSettings & setFork checkedFork & setPort (confPort config) - putStrLn $ "Listening on port " ++ show (confPort config) + atomicPrintS $ "Listening on port " ++ show (confPort config) runSettings settings $ \req respond -> case pathInfo req of [] -> respond =<< pageIndex config pages diff --git a/src/Mmap.hs b/src/Mmap.hs index 94f5c49..d62dcab 100644 --- a/src/Mmap.hs +++ b/src/Mmap.hs @@ -9,6 +9,9 @@ import Foreign.C.Types import System.Posix.IO import System.Posix.Types +import AtomicPrint + + foreign import ccall "ircbrowse_mmap" c_mmap :: CInt -> Ptr CSize -> IO (Ptr Word8) -- fd out length @@ -17,36 +20,35 @@ foreign import ccall "ircbrowse_munmap" c_munmap :: Ptr Word8 -> CSize -> IO () -- addr length -mapFile :: FilePath -> IO ByteString +-- | Returns Nothing if the open(2) fails. +mapFile :: FilePath -> IO (Maybe ByteString) mapFile path = mask_ $ do - -- open can fail without repercussions - Fd fd <- openFd path ReadOnly defaultFileFlags - - -- do the mmap; if it fails, close the file, ignoring exceptions there - (addr, filelen) <- - catchNoPropagate @SomeException - (alloca $ \lengthp -> do - addr <- c_mmap fd lengthp - lengthval <- peek lengthp - return (addr, lengthval)) - (\e -> do - catch @SomeException (closeFd (Fd fd)) - (\_ -> return ()) - rethrowIO e) + -- if open fails, we simply return Nothing, and nothing has happened yet so that's fine + try @IOException (openFd path ReadOnly defaultFileFlags) >>= \case + Left _ -> return Nothing + Right (Fd fd) -> do + -- do the mmap; if it fails, close the file, ignoring exceptions there + (addr, filelen) <- + onException + (alloca $ \lengthp -> do + addr <- c_mmap fd lengthp + lengthval <- peek lengthp + return (addr, lengthval)) + (catch @SomeException (closeFd (Fd fd)) + (\_ -> return ())) - -- mmap succeeded, close the file - catchNoPropagate @SomeException (closeFd (Fd fd)) - (\e -> do - -- putStrLn ("[munmap " ++ show addr ++ " as close(2) handler]") - c_munmap addr filelen - rethrowIO e) + -- mmap succeeded, close the file; if closing fails, something is badly wrong, so unmap, and if that throws an exception propagate that + onException + (closeFd (Fd fd)) + (do atomicPrintNoWaitS ("[munmap " ++ show addr ++ " as close(2) exception handler]") + c_munmap addr filelen) - -- close succeeded, we're safe now since bytestring construction will not - -- fail (and no exceptions are coming from outside as we're masked) - if addr == nullPtr - then fail "mapFile: could not mmap" - else do bs <- BS.unsafePackCStringFinalizer addr (fromIntegral @CSize @Int filelen) - (do -- putStrLn ("[munmap " ++ show addr ++ "]") - c_munmap addr filelen) - -- putStrLn ("[mmap " ++ show addr ++ "]") - return bs + -- close succeeded, we're safe now since bytestring construction will not + -- fail (and no exceptions are coming from outside as we're masked) + if addr == nullPtr + then fail "mapFile: could not mmap" + else do bs <- BS.unsafePackCStringFinalizer addr (fromIntegral @CSize @Int filelen) + (do atomicPrintNoWaitS ("[munmap " ++ show addr ++ "]") + c_munmap addr filelen) + atomicPrintNoWaitS ("[mmap " ++ show addr ++ "]") + return (Just bs) @@ -40,6 +40,8 @@ data Event | ParseError deriving (Show) +-- | Returned vector has one entry for each line in the file, excepting the +-- empty "line" after the final newline, if any. preparseLog :: ByteString -> VS.Vector Word32 preparseLog = VS.fromList . findLineStarts 0 where |
