1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
|
module Index (
Index,
initIndex,
indexGetEventsLinear,
indexNumEvents,
) where
import Chronos hiding (day)
import Control.Monad (forM)
import Data.ByteString qualified as BS
import Data.Char (isDigit)
import Data.List (sort, scanl')
import Data.Map.Strict (Map)
import Data.Map.Strict qualified as Map
import Data.Text qualified as T
import Data.Vector.Storable qualified as VS
import System.Directory
import System.FilePath
import Text.Read (readMaybe)
import Torsor (add, difference)
import Debug.Trace
import Config (Channel(..))
import Mmap
import Util
import ZNC
data ChanIndex = ChanIndex
{ ciStartDay :: Day
, ciEndDay :: Day
, ciCountUntil :: VS.Vector Int -- ^ number of events up to and /including/ this day
, ciTotal :: Int }
deriving (Show)
data Index = Index FilePath (Map Channel ChanIndex)
deriving (Show)
-- init
initIndex :: FilePath -> [Channel] -> IO Index
initIndex basedir toimport = do
items <-
fmap concat . forM (map chanNetwork toimport) $ \nwT -> do
let nw = T.unpack nwT
forM [ch | Channel nwT' ch <- toimport, nwT' == nwT] $ \chT -> do
let ch = T.unpack chT
files <- listDirectory (basedir </> nw </> ch)
days <- fmap sort . forM files $ \fn -> do
let path = basedir </> nw </> ch </> fn
-- putStrLn $ "Parsing " ++ path ++ " (" ++ show (parseFileName fn) ++ " -> " ++ show (dateToDay (parseFileName fn)) ++ ")"
events <- parseLog <$> BS.readFile path
return (dateToDay (parseFileName fn), length events)
let minday = minimum (map fst days)
maxday = maximum (map fst days)
ndays = difference maxday minday + 1
-- traceM $ nw ++ "/" ++ ch ++ ": days = " ++ show [(toFileName (dayToDate d), i) | (d, i) <- days]
let countScan = VS.fromListN ndays (drop 1 $ scanl' (+) 0 (makeCounts [minday..maxday] days))
let ntotal = sum (map snd days)
return (Channel nwT chT
,ChanIndex
{ ciStartDay = minday
, ciEndDay = maxday
, ciCountUntil = countScan
, ciTotal = ntotal})
return (Index basedir (Map.fromList items))
makeCounts :: [Day] -> [(Day, Int)] -> [Int]
makeCounts [] [] = []
makeCounts [] _ = error "makeCounts: more entries than days in range"
makeCounts (d:ds) ents@((d',n):ents')
| d == d' = n : makeCounts ds ents'
| d < d' = 0 : makeCounts ds ents
| otherwise = error $ "makeCounts: duplicate entry? " ++ show (d, dayToDate d, d', dayToDate d')
makeCounts (_:ds) [] = 0 : makeCounts ds []
-- search
-- | Returns proper lazy list of events. Reading the files happens strictly,
-- but parsing the events happens lazily.
indexGetEventsLinear :: Index -> Channel -> Int -> Int -> IO [(YMDHMS, Event)]
indexGetEventsLinear (Index basedir mp) chan@(Channel network channel) from count
| from + count < 0 = return []
| from >= ciTotal ci = return []
| otherwise = do
let scan = ciCountUntil ci
day1idx = binSearch scan from
day1 = day1idx `add` ciStartDay ci
neventBeforeDay1 | day1idx == 0 = 0
| otherwise = scan VS.! (day1idx - 1)
neventInclDay1 = scan VS.! day1idx
neventOnDay1 = neventInclDay1 - neventBeforeDay1
off1 = from - neventBeforeDay1
-- day2 is inclusive, off2 is exclusive
(day2, off2)
| from + count <= neventInclDay1 = (day1, off1 + count)
| day1idx == VS.length scan - 1 = (day1, neventOnDay1)
| otherwise =
let loop day2idx nbefore nseen
| nseen + nOnDay2 >= count =
(day2idx `add` ciStartDay ci, count - nseen)
| day2idx == VS.length scan - 1 =
(day2idx `add` ciStartDay ci, nOnDay2)
| otherwise =
loop (day2idx + 1) (scan VS.! day2idx) (nseen + nOnDay2)
where
nOnDay2 = scan VS.! day2idx - nbefore
in loop (day1idx + 1) (neventBeforeDay1 + neventOnDay1) (neventOnDay1 - off1)
-- traceM ("ci = " ++ show ci)
-- traceM ("binSearch " ++ show from ++ " =")
-- traceM (" " ++ show day1idx)
-- traceM ("day1 = " ++ show day1)
-- traceM ("off1 = " ++ show off1)
-- traceM ("neventOnDay1 = " ++ show neventOnDay1)
-- traceM ("count = " ++ show count)
-- traceM ("day2 = " ++ show day2)
-- traceM ("off2 = " ++ show off2)
evs <- forM (zip [day1 .. day2] [day1idx..]) $ \(day, dayidx) ->
let neventsOnDay | dayidx == 0 = scan VS.! 0
| otherwise = scan VS.! dayidx - scan VS.! (dayidx - 1)
parse
| day == day1 =
if day1 == day2
then parseLogRange (off1, Just (off2 - off1))
else parseLogRange (off1, Nothing)
| day == day2 = parseLogRange (0, Just off2)
| otherwise = parseLog
Date (Year y) (Month monthm1) (DayOfMonth d) = dayToDate day
fixDate = map $ \(tod, ev) ->
(YMDHMS (YMD y (fromIntegral monthm1 + 1) (fromIntegral d)) tod
,ev)
in if neventsOnDay > 0
then fixDate . parse <$>
mapFile (basedir </> T.unpack network </> T.unpack channel </> toFileName (dayToDate day))
else return []
return (concat evs)
where
ci = mp Map.! chan
-- | The vector must be sorted.
-- Returns index of the first element x such that needle < x. If there is no
-- such element (i.e. needle is greater-equal the last element of vec), returns
-- the length of vec.
--
-- TODO: proportional binary search
binSearch :: VS.Vector Int -> Int -> Int
binSearch vec needle
| veclen == 0 || vec VS.! (veclen - 1) < needle = veclen
| needle < vec VS.! 0 = 0
| otherwise = go 0 (veclen - 1)
where
veclen = VS.length vec
-- Invariant: vec[lo] <= needle < vec[hi]
go :: Int -> Int -> Int
go lo hi
| lo + 1 == hi = hi
| otherwise =
let mid = lo + (hi - lo) `div` 2
in if vec VS.! mid <= needle
then go mid hi else go lo mid
-- other methods
indexNumEvents :: Index -> Channel -> Int
indexNumEvents (Index _ mp) chan = ciTotal (mp Map.! chan)
-- utilities
parseFileName :: String -> Date
parseFileName name
| (y, '-' : s1) <- splitAt 4 name
, (m, '-' : s2) <- splitAt 2 s1
, (d, ".log") <- splitAt 2 s2
, all isDigit y, all isDigit m, all isDigit d
, let y' = read' y ; m' = read' m - 1 ; d' = read' d
, 0 <= m', m' < 12
, 1 <= d', d' <= daysInMonth (isLeapYear (Year y')) (Month m')
= Date (Year y') (Month m') (DayOfMonth d')
| otherwise
= error $ "Invalid ZNC log file name: " ++ name
where
read' :: Read a => String -> a
read' s = case readMaybe s of
Just r -> r
Nothing -> error $ "No read: " ++ show s
toFileName :: Date -> String
toFileName (Date (Year y) (Month m) (DayOfMonth d)) =
pad '0' 4 y ++ '-' : pad '0' 2 (m + 1) ++ '-' : pad '0' 2 d ++ ".log"
|