5 files changed, 129 insertions, 124 deletions
diff --git a/src/Cache.hs b/src/Cache.hs
index 53d2185..4694aa0 100644
--- a/src/Cache.hs
+++ b/src/Cache.hs
@@ -1,87 +1,43 @@
 module Cache where
 
-import Control.Concurrent.STM
-import Control.Monad (forM_)
-import Data.ByteString (ByteString)
-import Data.ByteString qualified as BS
+import Control.Monad (when)
+import Data.IORef
 import Data.Map.Strict (Map)
 import Data.Map.Strict qualified as Map
+import System.Clock qualified as Clock
+import System.Random
 
-import ZNC
 
+data Cache k v = Cache Int (IORef (Map k (v, Clock.TimeSpec))) (IORef StdGen)
 
--- value, previous (more recently accessed), next (less recently accessed)
-data LRUNode a = LRUNode a (TVar (Maybe (LRUNode a))) (TVar (Maybe (LRUNode a)))
+cacheNew :: Int -> IO (Cache k v)
+cacheNew maxsize = Cache maxsize <$> newIORef Map.empty <*> (newIORef =<< initStdGen)
 
--- head (most recent), last (least recent)
-data LRUChain a = LRUChain (TVar (Maybe (LRUNode a))) (TVar (Maybe (LRUNode a)))
+cacheAdd :: (Ord k, Show k) => Cache k v -> k -> v -> IO ()
+cacheAdd cache@(Cache maxsize ref _) key val = do
+  now <- Clock.getTime Clock.Monotonic
+  sz <- atomicModifyIORef' ref $ \mp ->
+    case fst (Map.insertLookupWithKey (\_ _ _ -> (val, now)) key (val, now) mp) of
+      Nothing -> (Map.insert key (val, now) mp, Map.size mp + 1)
+      Just _ -> (Map.insert key (val, now) mp, 0)
+  when (sz > maxsize) $
+    cachePrune cache
 
-lruchainNew :: IO (LRUChain a)
-lruchainNew = LRUChain <$> newTVarIO Nothing <*> newTVarIO Nothing
+cacheLookup :: Ord k => Cache k v -> k -> IO (Maybe v)
+cacheLookup (Cache _ ref _) key = fmap fst . Map.lookup key <$> readIORef ref
 
-lruchainAdd :: LRUChain a -> a -> STM (LRUNode a)
-lruchainAdd (LRUChain hd tl) x = do
-  mhdnode <- readTVar hd
-  case mhdnode of
-    Nothing -> do  -- chain empty
-      node <- LRUNode x <$> newTVar Nothing <*> newTVar Nothing
-      writeTVar hd (Just node)
-      writeTVar tl (Just node)
-      return node
-    Just hdnode -> do
-      node <- LRUNode x <$> newTVar Nothing <*> newTVar (Just hdnode)
-      writeTVar hd (Just node)
-      return node
-
-lruchainBump :: LRUChain a -> LRUNode a -> STM ()
-lruchainBump (LRUChain hd _) node@(LRUNode _ prevvar nextvar) = do
-  mprev <- readTVar prevvar
-  case mprev of
-    Nothing -> return ()  -- already most recent
-    Just (LRUNode _ _ prevnextvar) -> do
-      -- remove node from chain where it is now
-      mnext <- readTVar nextvar
-      writeTVar prevnextvar mnext
-      case mnext of
-        Nothing -> return ()
-        Just (LRUNode _ nextprevvar _) -> writeTVar nextprevvar mprev
-      -- add node to chain at head
-      writeTVar nextvar =<< readTVar hd
-      writeTVar hd (Just node)
-
-lruchainEvict :: LRUChain a -> STM (Maybe a)
-lruchainEvict (LRUChain _ tl) = do
-  mtlnode <- readTVar tl
-  case mtlnode of
-    Nothing -> return Nothing
-    Just (LRUNode x prevvar _) -> do
-      mprev <- readTVar prevvar
-      writeTVar tl mprev
-      case mprev of
-        Nothing -> return ()
-        Just (LRUNode _ _ prevnextvar) -> writeTVar prevnextvar Nothing
-      return (Just x)
-
--- maxsize, elements in cache, linked list
-data LRU a = LRU Int (TVar (Map a (LRUNode a))) (LRUChain a)
-
-lruNew :: Ord a => Int -> IO (LRU a)
-lruNew maxsize = LRU maxsize <$> newTVarIO mempty <*> lruchainNew
-
--- | If a value got evicted, it is returned
-lruBump :: Ord a => LRU a -> a -> IO (Maybe a)
-lruBump (LRU maxsize mapvar chain) value = atomically $ do
-  mp <- readTVar mapvar
-  case Map.lookup value mp of
-    Nothing -> do
-      node <- lruchainAdd chain value
-      writeTVar mapvar $! Map.insert value node mp
-      if Map.size mp >= maxsize
-        then do
-          mret <- lruchainEvict chain
-          forM_ mret $ \ret -> writeTVar mapvar $! Map.delete ret mp
-          return mret
-        else return Nothing
-    Just node -> do
-      lruchainBump chain node
-      return Nothing
+-- Uses 2-random LRU (https://danluu.com/2choices-eviction/)
+cachePrune :: Ord k => Cache k v -> IO ()
+cachePrune (Cache maxsize ref genref) = do go =<< atomicModifyIORef' genref splitGen
+  where
+    go gen0 = do
+      (gen', sz') <- atomicModifyIORef' ref $ \mp ->
+        let sz = Map.size mp
+            (idx1, gen1) = uniformR (0, sz - 1) gen0 ; (_, (_, tm1)) = Map.elemAt idx1 mp
+            (idx2, gen2) = uniformR (0, sz - 1) gen1 ; (_, (_, tm2)) = Map.elemAt idx2 mp
+            oldestIdx = if tm1 < tm2 then idx1 else idx2
+        in if sz > maxsize
+             then (Map.deleteAt oldestIdx mp, (gen2, sz - 1))
+             else (mp, (gen0, sz))
+      when (sz' > maxsize) $
+        go gen'
diff --git a/src/Index.hs b/src/Index.hs
index 4de4c6c..5754e33 100644
--- a/src/Index.hs
+++ b/src/Index.hs
@@ -5,22 +5,22 @@ module Index (
   indexNumEvents,
 ) where
 
-import Chronos hiding (day)
+import Data.Time.Calendar
 import Control.Monad (forM)
 import Data.ByteString qualified as BS
+import Data.ByteString (ByteString)
 import Data.Char (isDigit)
 import Data.List (sort, scanl')
 import Data.Map.Strict (Map)
 import Data.Map.Strict qualified as Map
 import Data.Text qualified as T
 import Data.Vector.Storable qualified as VS
+import Data.Word (Word32)
 import System.Directory
 import System.FilePath
 import Text.Read (readMaybe)
-import Torsor (add, difference)
-
-import Debug.Trace
 
+import Cache
 import Config (Channel(..))
 import Mmap
 import Util
@@ -34,13 +34,14 @@ data ChanIndex = ChanIndex
   , ciTotal :: Int }
   deriving (Show)
 
-data Index = Index FilePath (Map Channel ChanIndex)
-  deriving (Show)
+data Index = Index FilePath (Map Channel ChanIndex) (Cache (Channel, YMD) (ByteString, VS.Vector Word32))
 
 -- init
 
 initIndex :: FilePath -> [Channel] -> IO Index
 initIndex basedir toimport = do
+  cache <- cacheNew 100
+
   items <-
     fmap concat . forM (map chanNetwork toimport) $ \nwT -> do
       let nw = T.unpack nwT
@@ -51,10 +52,10 @@ initIndex basedir toimport = do
           let path = basedir </> nw </> ch </> fn
           -- putStrLn $ "Parsing " ++ path ++ " (" ++ show (parseFileName fn) ++ " -> " ++ show (dateToDay (parseFileName fn)) ++ ")"
           events <- parseLog <$> BS.readFile path
-          return (dateToDay (parseFileName fn), length events)
+          return (uncurry3 fromGregorian (parseFileName fn), length events)
         let minday = minimum (map fst days)
             maxday = maximum (map fst days)
-            ndays = difference maxday minday + 1
+            ndays = fromIntegral @Integer @Int (diffDays maxday minday + 1)
         -- traceM $ nw ++ "/" ++ ch ++ ": days = " ++ show [(toFileName (dayToDate d), i) | (d, i) <- days]
         let countScan = VS.fromListN ndays (drop 1 $ scanl' (+) 0 (makeCounts [minday..maxday] days))
         let ntotal = sum (map snd days)
@@ -64,7 +65,7 @@ initIndex basedir toimport = do
                   , ciEndDay = maxday
                   , ciCountUntil = countScan
                   , ciTotal = ntotal})
-  return (Index basedir (Map.fromList items))
+  return (Index basedir (Map.fromList items) cache)
 
 makeCounts :: [Day] -> [(Day, Int)] -> [Int]
 makeCounts [] [] = []
@@ -72,7 +73,7 @@ makeCounts [] _ = error "makeCounts: more entries than days in range"
 makeCounts (d:ds) ents@((d',n):ents')
   | d == d' = n : makeCounts ds ents'
   | d < d'  = 0 : makeCounts ds ents
-  | otherwise = error $ "makeCounts: duplicate entry? " ++ show (d, dayToDate d, d', dayToDate d')
+  | otherwise = error $ "makeCounts: duplicate entry? " ++ show (d, toGregorian d, d', toGregorian d')
 makeCounts (_:ds) [] = 0 : makeCounts ds []
 
 -- search
@@ -80,13 +81,13 @@ makeCounts (_:ds) [] = 0 : makeCounts ds []
 -- | Returns proper lazy list of events. Reading the files happens strictly,
 -- but parsing the events happens lazily.
 indexGetEventsLinear :: Index -> Channel -> Int -> Int -> IO [(YMDHMS, Event)]
-indexGetEventsLinear (Index basedir mp) chan@(Channel network channel) from count
+indexGetEventsLinear (Index basedir mp cache) chan@(Channel network channel) from count
   | from + count < 0 = return []
   | from >= ciTotal ci = return []
   | otherwise = do
       let scan = ciCountUntil ci
           day1idx = binSearch scan from
-          day1 = day1idx `add` ciStartDay ci
+          day1 = day1idx `addDays'` ciStartDay ci
           neventBeforeDay1 | day1idx == 0 = 0
                            | otherwise = scan VS.! (day1idx - 1)
           neventInclDay1 = scan VS.! day1idx
@@ -99,9 +100,9 @@ indexGetEventsLinear (Index basedir mp) chan@(Channel network channel) from coun
             | otherwise =
                 let loop day2idx nbefore nseen 
                       | nseen + nOnDay2 >= count =
-                          (day2idx `add` ciStartDay ci, count - nseen)
+                          (day2idx `addDays'` ciStartDay ci, count - nseen)
                       | day2idx == VS.length scan - 1 =
-                          (day2idx `add` ciStartDay ci, nOnDay2)
+                          (day2idx `addDays'` ciStartDay ci, nOnDay2)
                       | otherwise =
                           loop (day2idx + 1) (scan VS.! day2idx) (nseen + nOnDay2)
                       where
@@ -126,14 +127,19 @@ indexGetEventsLinear (Index basedir mp) chan@(Channel network channel) from coun
                            then parseLogRange (off1, Just (off2 - off1))
                            else parseLogRange (off1, Nothing)
                      | day == day2 = parseLogRange (0, Just off2)
-                     | otherwise = parseLog
-                   Date (Year y) (Month monthm1) (DayOfMonth d) = dayToDate day
-                   fixDate = map $ \(tod, ev) ->
-                               (YMDHMS (YMD y (fromIntegral monthm1 + 1) (fromIntegral d)) tod
-                               ,ev)
+                     | otherwise = \_lineStarts -> parseLog
+                   (y, month, d) = toGregorian day
+                   ymd = YMD (fromIntegral y) (fromIntegral month) (fromIntegral d)
+                   fixDate = map $ \(tod, ev) -> (YMDHMS ymd tod, ev)
                in if neventsOnDay > 0
-                    then fixDate . parse <$>
-                           mapFile (basedir </> T.unpack network </> T.unpack channel </> toFileName (dayToDate day))
+                    then do (bs, lineStarts) <- cacheLookup cache (chan, ymd) >>= \case
+                              Nothing -> do
+                                bs <- mapFile (basedir </> T.unpack network </> T.unpack channel </> toFileName (toGregorian day))
+                                let lineStarts = preparseLog bs
+                                cacheAdd cache (chan, ymd) (bs, lineStarts)
+                                return (bs, lineStarts)
+                              Just (bs, lineStarts) -> return (bs, lineStarts)
+                            return (fixDate (parse lineStarts bs))
                     else return []
 
       return (concat evs)
@@ -166,20 +172,20 @@ binSearch vec needle
 -- other methods
 
 indexNumEvents :: Index -> Channel -> Int
-indexNumEvents (Index _ mp) chan = ciTotal (mp Map.! chan)
+indexNumEvents (Index _ mp _) chan = ciTotal (mp Map.! chan)
 
 -- utilities
 
-parseFileName :: String -> Date
+parseFileName :: String -> (Year, MonthOfYear, DayOfMonth)
 parseFileName name
   | (y, '-' : s1) <- splitAt 4 name
   , (m, '-' : s2) <- splitAt 2 s1
   , (d, ".log") <- splitAt 2 s2
   , all isDigit y, all isDigit m, all isDigit d
-  , let y' = read' y ; m' = read' m - 1 ; d' = read' d
-  , 0 <= m', m' < 12
-  , 1 <= d', d' <= daysInMonth (isLeapYear (Year y')) (Month m')
-  = Date (Year y') (Month m') (DayOfMonth d')
+  , let y' = read' y ; m' = read' m ; d' = read' d
+  , 1 <= m', m' <= 12
+  , 1 <= d', d' <= gregorianMonthLength y' m'
+  = (y', m', d')
   | otherwise
   = error $ "Invalid ZNC log file name: " ++ name
   where
@@ -188,6 +194,9 @@ parseFileName name
                 Just r -> r
                 Nothing -> error $ "No read: " ++ show s
 
-toFileName :: Date -> String
-toFileName (Date (Year y) (Month m) (DayOfMonth d)) =
-  pad '0' 4 y ++ '-' : pad '0' 2 (m + 1) ++ '-' : pad '0' 2 d ++ ".log"
+toFileName :: (Year, MonthOfYear, DayOfMonth) -> String
+toFileName (y, m, d) =
+  pad '0' 4 y ++ '-' : pad '0' 2 m ++ '-' : pad '0' 2 d ++ ".log"
+
+addDays' :: Int -> Day -> Day
+addDays' = addDays . fromIntegral
diff --git a/src/Mmap.hs b/src/Mmap.hs
index bfe6042..94f5c49 100644
--- a/src/Mmap.hs
+++ b/src/Mmap.hs
@@ -45,6 +45,8 @@ mapFile path = mask_ $ do
   -- fail (and no exceptions are coming from outside as we're masked)
   if addr == nullPtr
     then fail "mapFile: could not mmap"
-    else BS.unsafePackCStringFinalizer addr (fromIntegral @CSize @Int filelen)
-            (do -- putStrLn ("[munmap " ++ show addr ++ "]")
-                c_munmap addr filelen)
+    else do bs <- BS.unsafePackCStringFinalizer addr (fromIntegral @CSize @Int filelen)
+                    (do -- putStrLn ("[munmap " ++ show addr ++ "]")
+                        c_munmap addr filelen)
+            -- putStrLn ("[mmap " ++ show addr ++ "]")
+            return bs
diff --git a/src/Util.hs b/src/Util.hs
index ca31258..1e10eec 100644
--- a/src/Util.hs
+++ b/src/Util.hs
@@ -17,7 +17,7 @@ data YMDHMS = YMDHMS {-# UNPACK #-} !YMD
 data YMD = YMD {-# UNPACK #-} !Int
                {-# UNPACK #-} !Word8  -- ^ 1-based
                {-# UNPACK #-} !Word8
-  deriving (Show)
+  deriving (Show, Eq, Ord)
 
 -- | Time-of-day in seconds, in unspecified time zone
 data HMS = HMS {-# UNPACK #-} !Word8
@@ -29,3 +29,6 @@ pad :: Show a => Char -> Int -> a -> String
 pad c w val =
   let s = show val
   in replicate (w - length s) c ++ s
+
+uncurry3 :: (a -> b -> c -> d) -> (a, b, c) -> d
+uncurry3 f (x, y, z) = f x y z
diff --git a/src/ZNC.hs b/src/ZNC.hs
index 35eafe0..c23ffe2 100644
--- a/src/ZNC.hs
+++ b/src/ZNC.hs
@@ -2,6 +2,7 @@
 module ZNC (
   -- Log(..),
   Nick, Event(..),
+  preparseLog,
   parseLog, parseLogRange,
 ) where
 
@@ -14,15 +15,13 @@ import Data.Char (ord)
 import Data.Either (fromRight)
 import Data.Text (Text)
 import Data.Text.Encoding qualified as TE
--- import Data.Vector qualified as V
--- import Data.Vector (Vector)
-import Data.Word (Word8)
+import Data.Vector.Storable qualified as VS
+import Data.Word (Word8, Word32)
 
-import Util
+import Debug.Trace
 
+import Util
 
--- newtype Log = Log (Vector (TOD, Event))
---   deriving (Show)
 
 type Nick = Text
 
@@ -41,20 +40,56 @@ data Event
   | ParseError
   deriving (Show)
 
+preparseLog :: ByteString -> VS.Vector Word32
+preparseLog = VS.fromList . findLineStarts 0
+  where
+    findLineStarts :: Int -> ByteString -> [Word32]
+    findLineStarts off bs =
+      case BS.findIndex (== 10) (BS.drop off bs) of
+        Nothing | BS.length bs == off -> []
+                | otherwise -> [fromIntegral off]
+        Just i -> fromIntegral off : findLineStarts (off + i + 1) bs
+
 -- these INLINE/NOINLINE pragmas are optimisation without testing or profiling, have fun
 {-# INLINE parseLog #-}
 parseLog :: ByteString -> [(HMS, Event)]
-parseLog = parseLogRange (0, Nothing)
+parseLog = map parseLogLine . BS8.lines
 
 -- (start line, number of lines (default to rest of file))
 {-# INLINE parseLogRange #-}
-parseLogRange :: (Int, Maybe Int) -> ByteString -> [(HMS, Event)]
-parseLogRange (startln, mnumln) =
-  -- Log . V.fromList .
-  map go . maybe id take mnumln . drop startln . BS8.lines
+parseLogRange :: (Int, Maybe Int) -> VS.Vector Word32 -> ByteString -> [(HMS, Event)]
+parseLogRange (startln, mnumln) linestarts topbs =
+  let numln = maybe (VS.length linestarts - startln) id mnumln
+      splitted = splitWithLineStarts 0 (VS.slice startln numln linestarts) topbs
+  in -- traceShow ("pLR"::String, splitted) $
+     map parseLogLine splitted
   where
-    {-# NOINLINE go #-}
-    go = fromRight (HMS 0 0 0, ParseError) . P.parseOnly parseLine
+    {-# INLINE splitWithLineStarts #-}
+    splitWithLineStarts :: Int -> VS.Vector Word32 -> ByteString -> [ByteString]
+    splitWithLineStarts idx starts bs
+      | idx >= VS.length starts = []
+      | idx == VS.length starts - 1 =
+          [BS.takeWhile (\b -> b /= 13 && b /= 10) (BS.drop (at idx) bs)]
+      | otherwise =
+          trimCR (BS.drop (at idx) (BS.take (at (idx + 1) - 1) bs))
+          : splitWithLineStarts (idx + 1) starts bs
+      where
+        at i = fromIntegral @Word32 @Int (starts VS.! i)
+
+    trimCR :: ByteString -> ByteString
+    trimCR bs = case BS.unsnoc bs of
+                  Just (bs', c) | c == 13 -> bs'
+                  _ -> bs
+
+parseLogLine :: ByteString -> (HMS, Event)
+parseLogLine bs =
+  case parseLogLine' bs of
+    res@(HMS 0 0 0, ParseError) -> traceShow ("PE" :: String, bs) res
+    res -> res
+
+{-# NOINLINE parseLogLine' #-}
+parseLogLine' :: ByteString -> (HMS, Event)
+parseLogLine' = fromRight (HMS 0 0 0, ParseError) . P.parseOnly parseLine
 
 parseLine :: P.Parser (HMS, Event)
 parseLine = (,) <$> parseTOD <*> parseEvent