summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Cache.hs110
-rw-r--r--src/Index.hs69
-rw-r--r--src/Mmap.hs8
-rw-r--r--src/Util.hs5
-rw-r--r--src/ZNC.hs61
5 files changed, 129 insertions, 124 deletions
diff --git a/src/Cache.hs b/src/Cache.hs
index 53d2185..4694aa0 100644
--- a/src/Cache.hs
+++ b/src/Cache.hs
@@ -1,87 +1,43 @@
module Cache where
-import Control.Concurrent.STM
-import Control.Monad (forM_)
-import Data.ByteString (ByteString)
-import Data.ByteString qualified as BS
+import Control.Monad (when)
+import Data.IORef
import Data.Map.Strict (Map)
import Data.Map.Strict qualified as Map
+import System.Clock qualified as Clock
+import System.Random
-import ZNC
+data Cache k v = Cache Int (IORef (Map k (v, Clock.TimeSpec))) (IORef StdGen)
--- value, previous (more recently accessed), next (less recently accessed)
-data LRUNode a = LRUNode a (TVar (Maybe (LRUNode a))) (TVar (Maybe (LRUNode a)))
+cacheNew :: Int -> IO (Cache k v)
+cacheNew maxsize = Cache maxsize <$> newIORef Map.empty <*> (newIORef =<< initStdGen)
--- head (most recent), last (least recent)
-data LRUChain a = LRUChain (TVar (Maybe (LRUNode a))) (TVar (Maybe (LRUNode a)))
+cacheAdd :: (Ord k, Show k) => Cache k v -> k -> v -> IO ()
+cacheAdd cache@(Cache maxsize ref _) key val = do
+ now <- Clock.getTime Clock.Monotonic
+ sz <- atomicModifyIORef' ref $ \mp ->
+ case fst (Map.insertLookupWithKey (\_ _ _ -> (val, now)) key (val, now) mp) of
+ Nothing -> (Map.insert key (val, now) mp, Map.size mp + 1)
+ Just _ -> (Map.insert key (val, now) mp, 0)
+ when (sz > maxsize) $
+ cachePrune cache
-lruchainNew :: IO (LRUChain a)
-lruchainNew = LRUChain <$> newTVarIO Nothing <*> newTVarIO Nothing
+cacheLookup :: Ord k => Cache k v -> k -> IO (Maybe v)
+cacheLookup (Cache _ ref _) key = fmap fst . Map.lookup key <$> readIORef ref
-lruchainAdd :: LRUChain a -> a -> STM (LRUNode a)
-lruchainAdd (LRUChain hd tl) x = do
- mhdnode <- readTVar hd
- case mhdnode of
- Nothing -> do -- chain empty
- node <- LRUNode x <$> newTVar Nothing <*> newTVar Nothing
- writeTVar hd (Just node)
- writeTVar tl (Just node)
- return node
- Just hdnode -> do
- node <- LRUNode x <$> newTVar Nothing <*> newTVar (Just hdnode)
- writeTVar hd (Just node)
- return node
-
-lruchainBump :: LRUChain a -> LRUNode a -> STM ()
-lruchainBump (LRUChain hd _) node@(LRUNode _ prevvar nextvar) = do
- mprev <- readTVar prevvar
- case mprev of
- Nothing -> return () -- already most recent
- Just (LRUNode _ _ prevnextvar) -> do
- -- remove node from chain where it is now
- mnext <- readTVar nextvar
- writeTVar prevnextvar mnext
- case mnext of
- Nothing -> return ()
- Just (LRUNode _ nextprevvar _) -> writeTVar nextprevvar mprev
- -- add node to chain at head
- writeTVar nextvar =<< readTVar hd
- writeTVar hd (Just node)
-
-lruchainEvict :: LRUChain a -> STM (Maybe a)
-lruchainEvict (LRUChain _ tl) = do
- mtlnode <- readTVar tl
- case mtlnode of
- Nothing -> return Nothing
- Just (LRUNode x prevvar _) -> do
- mprev <- readTVar prevvar
- writeTVar tl mprev
- case mprev of
- Nothing -> return ()
- Just (LRUNode _ _ prevnextvar) -> writeTVar prevnextvar Nothing
- return (Just x)
-
--- maxsize, elements in cache, linked list
-data LRU a = LRU Int (TVar (Map a (LRUNode a))) (LRUChain a)
-
-lruNew :: Ord a => Int -> IO (LRU a)
-lruNew maxsize = LRU maxsize <$> newTVarIO mempty <*> lruchainNew
-
--- | If a value got evicted, it is returned
-lruBump :: Ord a => LRU a -> a -> IO (Maybe a)
-lruBump (LRU maxsize mapvar chain) value = atomically $ do
- mp <- readTVar mapvar
- case Map.lookup value mp of
- Nothing -> do
- node <- lruchainAdd chain value
- writeTVar mapvar $! Map.insert value node mp
- if Map.size mp >= maxsize
- then do
- mret <- lruchainEvict chain
- forM_ mret $ \ret -> writeTVar mapvar $! Map.delete ret mp
- return mret
- else return Nothing
- Just node -> do
- lruchainBump chain node
- return Nothing
+-- Uses 2-random LRU (https://danluu.com/2choices-eviction/)
+cachePrune :: Ord k => Cache k v -> IO ()
+cachePrune (Cache maxsize ref genref) = do go =<< atomicModifyIORef' genref splitGen
+ where
+ go gen0 = do
+ (gen', sz') <- atomicModifyIORef' ref $ \mp ->
+ let sz = Map.size mp
+ (idx1, gen1) = uniformR (0, sz - 1) gen0 ; (_, (_, tm1)) = Map.elemAt idx1 mp
+ (idx2, gen2) = uniformR (0, sz - 1) gen1 ; (_, (_, tm2)) = Map.elemAt idx2 mp
+ oldestIdx = if tm1 < tm2 then idx1 else idx2
+ in if sz > maxsize
+ then (Map.deleteAt oldestIdx mp, (gen2, sz - 1))
+ else (mp, (gen0, sz))
+ when (sz' > maxsize) $
+ go gen'
diff --git a/src/Index.hs b/src/Index.hs
index 4de4c6c..5754e33 100644
--- a/src/Index.hs
+++ b/src/Index.hs
@@ -5,22 +5,22 @@ module Index (
indexNumEvents,
) where
-import Chronos hiding (day)
+import Data.Time.Calendar
import Control.Monad (forM)
import Data.ByteString qualified as BS
+import Data.ByteString (ByteString)
import Data.Char (isDigit)
import Data.List (sort, scanl')
import Data.Map.Strict (Map)
import Data.Map.Strict qualified as Map
import Data.Text qualified as T
import Data.Vector.Storable qualified as VS
+import Data.Word (Word32)
import System.Directory
import System.FilePath
import Text.Read (readMaybe)
-import Torsor (add, difference)
-
-import Debug.Trace
+import Cache
import Config (Channel(..))
import Mmap
import Util
@@ -34,13 +34,14 @@ data ChanIndex = ChanIndex
, ciTotal :: Int }
deriving (Show)
-data Index = Index FilePath (Map Channel ChanIndex)
- deriving (Show)
+data Index = Index FilePath (Map Channel ChanIndex) (Cache (Channel, YMD) (ByteString, VS.Vector Word32))
-- init
initIndex :: FilePath -> [Channel] -> IO Index
initIndex basedir toimport = do
+ cache <- cacheNew 100
+
items <-
fmap concat . forM (map chanNetwork toimport) $ \nwT -> do
let nw = T.unpack nwT
@@ -51,10 +52,10 @@ initIndex basedir toimport = do
let path = basedir </> nw </> ch </> fn
-- putStrLn $ "Parsing " ++ path ++ " (" ++ show (parseFileName fn) ++ " -> " ++ show (dateToDay (parseFileName fn)) ++ ")"
events <- parseLog <$> BS.readFile path
- return (dateToDay (parseFileName fn), length events)
+ return (uncurry3 fromGregorian (parseFileName fn), length events)
let minday = minimum (map fst days)
maxday = maximum (map fst days)
- ndays = difference maxday minday + 1
+ ndays = fromIntegral @Integer @Int (diffDays maxday minday + 1)
-- traceM $ nw ++ "/" ++ ch ++ ": days = " ++ show [(toFileName (dayToDate d), i) | (d, i) <- days]
let countScan = VS.fromListN ndays (drop 1 $ scanl' (+) 0 (makeCounts [minday..maxday] days))
let ntotal = sum (map snd days)
@@ -64,7 +65,7 @@ initIndex basedir toimport = do
, ciEndDay = maxday
, ciCountUntil = countScan
, ciTotal = ntotal})
- return (Index basedir (Map.fromList items))
+ return (Index basedir (Map.fromList items) cache)
makeCounts :: [Day] -> [(Day, Int)] -> [Int]
makeCounts [] [] = []
@@ -72,7 +73,7 @@ makeCounts [] _ = error "makeCounts: more entries than days in range"
makeCounts (d:ds) ents@((d',n):ents')
| d == d' = n : makeCounts ds ents'
| d < d' = 0 : makeCounts ds ents
- | otherwise = error $ "makeCounts: duplicate entry? " ++ show (d, dayToDate d, d', dayToDate d')
+ | otherwise = error $ "makeCounts: duplicate entry? " ++ show (d, toGregorian d, d', toGregorian d')
makeCounts (_:ds) [] = 0 : makeCounts ds []
-- search
@@ -80,13 +81,13 @@ makeCounts (_:ds) [] = 0 : makeCounts ds []
-- | Returns proper lazy list of events. Reading the files happens strictly,
-- but parsing the events happens lazily.
indexGetEventsLinear :: Index -> Channel -> Int -> Int -> IO [(YMDHMS, Event)]
-indexGetEventsLinear (Index basedir mp) chan@(Channel network channel) from count
+indexGetEventsLinear (Index basedir mp cache) chan@(Channel network channel) from count
| from + count < 0 = return []
| from >= ciTotal ci = return []
| otherwise = do
let scan = ciCountUntil ci
day1idx = binSearch scan from
- day1 = day1idx `add` ciStartDay ci
+ day1 = day1idx `addDays'` ciStartDay ci
neventBeforeDay1 | day1idx == 0 = 0
| otherwise = scan VS.! (day1idx - 1)
neventInclDay1 = scan VS.! day1idx
@@ -99,9 +100,9 @@ indexGetEventsLinear (Index basedir mp) chan@(Channel network channel) from coun
| otherwise =
let loop day2idx nbefore nseen
| nseen + nOnDay2 >= count =
- (day2idx `add` ciStartDay ci, count - nseen)
+ (day2idx `addDays'` ciStartDay ci, count - nseen)
| day2idx == VS.length scan - 1 =
- (day2idx `add` ciStartDay ci, nOnDay2)
+ (day2idx `addDays'` ciStartDay ci, nOnDay2)
| otherwise =
loop (day2idx + 1) (scan VS.! day2idx) (nseen + nOnDay2)
where
@@ -126,14 +127,19 @@ indexGetEventsLinear (Index basedir mp) chan@(Channel network channel) from coun
then parseLogRange (off1, Just (off2 - off1))
else parseLogRange (off1, Nothing)
| day == day2 = parseLogRange (0, Just off2)
- | otherwise = parseLog
- Date (Year y) (Month monthm1) (DayOfMonth d) = dayToDate day
- fixDate = map $ \(tod, ev) ->
- (YMDHMS (YMD y (fromIntegral monthm1 + 1) (fromIntegral d)) tod
- ,ev)
+ | otherwise = \_lineStarts -> parseLog
+ (y, month, d) = toGregorian day
+ ymd = YMD (fromIntegral y) (fromIntegral month) (fromIntegral d)
+ fixDate = map $ \(tod, ev) -> (YMDHMS ymd tod, ev)
in if neventsOnDay > 0
- then fixDate . parse <$>
- mapFile (basedir </> T.unpack network </> T.unpack channel </> toFileName (dayToDate day))
+ then do (bs, lineStarts) <- cacheLookup cache (chan, ymd) >>= \case
+ Nothing -> do
+ bs <- mapFile (basedir </> T.unpack network </> T.unpack channel </> toFileName (toGregorian day))
+ let lineStarts = preparseLog bs
+ cacheAdd cache (chan, ymd) (bs, lineStarts)
+ return (bs, lineStarts)
+ Just (bs, lineStarts) -> return (bs, lineStarts)
+ return (fixDate (parse lineStarts bs))
else return []
return (concat evs)
@@ -166,20 +172,20 @@ binSearch vec needle
-- other methods
indexNumEvents :: Index -> Channel -> Int
-indexNumEvents (Index _ mp) chan = ciTotal (mp Map.! chan)
+indexNumEvents (Index _ mp _) chan = ciTotal (mp Map.! chan)
-- utilities
-parseFileName :: String -> Date
+parseFileName :: String -> (Year, MonthOfYear, DayOfMonth)
parseFileName name
| (y, '-' : s1) <- splitAt 4 name
, (m, '-' : s2) <- splitAt 2 s1
, (d, ".log") <- splitAt 2 s2
, all isDigit y, all isDigit m, all isDigit d
- , let y' = read' y ; m' = read' m - 1 ; d' = read' d
- , 0 <= m', m' < 12
- , 1 <= d', d' <= daysInMonth (isLeapYear (Year y')) (Month m')
- = Date (Year y') (Month m') (DayOfMonth d')
+ , let y' = read' y ; m' = read' m ; d' = read' d
+ , 1 <= m', m' <= 12
+ , 1 <= d', d' <= gregorianMonthLength y' m'
+ = (y', m', d')
| otherwise
= error $ "Invalid ZNC log file name: " ++ name
where
@@ -188,6 +194,9 @@ parseFileName name
Just r -> r
Nothing -> error $ "No read: " ++ show s
-toFileName :: Date -> String
-toFileName (Date (Year y) (Month m) (DayOfMonth d)) =
- pad '0' 4 y ++ '-' : pad '0' 2 (m + 1) ++ '-' : pad '0' 2 d ++ ".log"
+toFileName :: (Year, MonthOfYear, DayOfMonth) -> String
+toFileName (y, m, d) =
+ pad '0' 4 y ++ '-' : pad '0' 2 m ++ '-' : pad '0' 2 d ++ ".log"
+
+addDays' :: Int -> Day -> Day
+addDays' = addDays . fromIntegral
diff --git a/src/Mmap.hs b/src/Mmap.hs
index bfe6042..94f5c49 100644
--- a/src/Mmap.hs
+++ b/src/Mmap.hs
@@ -45,6 +45,8 @@ mapFile path = mask_ $ do
-- fail (and no exceptions are coming from outside as we're masked)
if addr == nullPtr
then fail "mapFile: could not mmap"
- else BS.unsafePackCStringFinalizer addr (fromIntegral @CSize @Int filelen)
- (do -- putStrLn ("[munmap " ++ show addr ++ "]")
- c_munmap addr filelen)
+ else do bs <- BS.unsafePackCStringFinalizer addr (fromIntegral @CSize @Int filelen)
+ (do -- putStrLn ("[munmap " ++ show addr ++ "]")
+ c_munmap addr filelen)
+ -- putStrLn ("[mmap " ++ show addr ++ "]")
+ return bs
diff --git a/src/Util.hs b/src/Util.hs
index ca31258..1e10eec 100644
--- a/src/Util.hs
+++ b/src/Util.hs
@@ -17,7 +17,7 @@ data YMDHMS = YMDHMS {-# UNPACK #-} !YMD
data YMD = YMD {-# UNPACK #-} !Int
{-# UNPACK #-} !Word8 -- ^ 1-based
{-# UNPACK #-} !Word8
- deriving (Show)
+ deriving (Show, Eq, Ord)
-- | Time-of-day in seconds, in unspecified time zone
data HMS = HMS {-# UNPACK #-} !Word8
@@ -29,3 +29,6 @@ pad :: Show a => Char -> Int -> a -> String
pad c w val =
let s = show val
in replicate (w - length s) c ++ s
+
+uncurry3 :: (a -> b -> c -> d) -> (a, b, c) -> d
+uncurry3 f (x, y, z) = f x y z
diff --git a/src/ZNC.hs b/src/ZNC.hs
index 35eafe0..c23ffe2 100644
--- a/src/ZNC.hs
+++ b/src/ZNC.hs
@@ -2,6 +2,7 @@
module ZNC (
-- Log(..),
Nick, Event(..),
+ preparseLog,
parseLog, parseLogRange,
) where
@@ -14,15 +15,13 @@ import Data.Char (ord)
import Data.Either (fromRight)
import Data.Text (Text)
import Data.Text.Encoding qualified as TE
--- import Data.Vector qualified as V
--- import Data.Vector (Vector)
-import Data.Word (Word8)
+import Data.Vector.Storable qualified as VS
+import Data.Word (Word8, Word32)
-import Util
+import Debug.Trace
+import Util
--- newtype Log = Log (Vector (TOD, Event))
--- deriving (Show)
type Nick = Text
@@ -41,20 +40,56 @@ data Event
| ParseError
deriving (Show)
+preparseLog :: ByteString -> VS.Vector Word32
+preparseLog = VS.fromList . findLineStarts 0
+ where
+ findLineStarts :: Int -> ByteString -> [Word32]
+ findLineStarts off bs =
+ case BS.findIndex (== 10) (BS.drop off bs) of
+ Nothing | BS.length bs == off -> []
+ | otherwise -> [fromIntegral off]
+ Just i -> fromIntegral off : findLineStarts (off + i + 1) bs
+
-- these INLINE/NOINLINE pragmas are optimisation without testing or profiling, have fun
{-# INLINE parseLog #-}
parseLog :: ByteString -> [(HMS, Event)]
-parseLog = parseLogRange (0, Nothing)
+parseLog = map parseLogLine . BS8.lines
-- (start line, number of lines (default to rest of file))
{-# INLINE parseLogRange #-}
-parseLogRange :: (Int, Maybe Int) -> ByteString -> [(HMS, Event)]
-parseLogRange (startln, mnumln) =
- -- Log . V.fromList .
- map go . maybe id take mnumln . drop startln . BS8.lines
+parseLogRange :: (Int, Maybe Int) -> VS.Vector Word32 -> ByteString -> [(HMS, Event)]
+parseLogRange (startln, mnumln) linestarts topbs =
+ let numln = maybe (VS.length linestarts - startln) id mnumln
+ splitted = splitWithLineStarts 0 (VS.slice startln numln linestarts) topbs
+ in -- traceShow ("pLR"::String, splitted) $
+ map parseLogLine splitted
where
- {-# NOINLINE go #-}
- go = fromRight (HMS 0 0 0, ParseError) . P.parseOnly parseLine
+ {-# INLINE splitWithLineStarts #-}
+ splitWithLineStarts :: Int -> VS.Vector Word32 -> ByteString -> [ByteString]
+ splitWithLineStarts idx starts bs
+ | idx >= VS.length starts = []
+ | idx == VS.length starts - 1 =
+ [BS.takeWhile (\b -> b /= 13 && b /= 10) (BS.drop (at idx) bs)]
+ | otherwise =
+ trimCR (BS.drop (at idx) (BS.take (at (idx + 1) - 1) bs))
+ : splitWithLineStarts (idx + 1) starts bs
+ where
+ at i = fromIntegral @Word32 @Int (starts VS.! i)
+
+ trimCR :: ByteString -> ByteString
+ trimCR bs = case BS.unsnoc bs of
+ Just (bs', c) | c == 13 -> bs'
+ _ -> bs
+
+parseLogLine :: ByteString -> (HMS, Event)
+parseLogLine bs =
+ case parseLogLine' bs of
+ res@(HMS 0 0 0, ParseError) -> traceShow ("PE" :: String, bs) res
+ res -> res
+
+{-# NOINLINE parseLogLine' #-}
+parseLogLine' :: ByteString -> (HMS, Event)
+parseLogLine' = fromRight (HMS 0 0 0, ParseError) . P.parseOnly parseLine
parseLine :: P.Parser (HMS, Event)
parseLine = (,) <$> parseTOD <*> parseEvent