1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
|
{-# LANGUAGE OverloadedStrings #-}
module ZNC (
-- Log(..),
Nick, Event(..),
preparseLog,
parseLog, parseLogRange,
) where
import Control.Applicative
import Data.Attoparsec.ByteString.Char8 qualified as P
import Data.ByteString (ByteString)
import Data.ByteString qualified as BS
import Data.ByteString.Char8 qualified as BS8
import Data.Char (ord)
import Data.Either (fromRight)
import Data.Text (Text)
import Data.Text.Encoding qualified as TE
import Data.Vector.Storable qualified as VS
import Data.Word (Word8, Word32)
import Debug.Trace
import Util
type Nick = Text
-- Adapted from clogparse by Keegan McAllister (BSD3) (https://hackage.haskell.org/package/clogparse).
data Event
= Join Nick Text -- ^ User joined.
| Part Nick Text Text -- ^ User left the channel. (address, reason)
| Quit Nick Text Text -- ^ User quit the server. (address, reason)
| ReNick Nick Nick -- ^ User changed from one to another nick.
| Talk Nick Text -- ^ User spoke (@PRIVMSG@).
| Notice Nick Text -- ^ User spoke (@NOTICE@).
| Act Nick Text -- ^ User acted (@CTCP ACTION@).
| Kick Nick Nick Text -- ^ User was kicked by user. (kicked, kicker, reason)
| Mode Nick Text -- ^ User set mode on the channel.
| Topic Nick Text -- ^ Topic change.
| ParseError
deriving (Show)
preparseLog :: ByteString -> VS.Vector Word32
preparseLog = VS.fromList . findLineStarts 0
where
findLineStarts :: Int -> ByteString -> [Word32]
findLineStarts off bs =
case BS.findIndex (== 10) (BS.drop off bs) of
Nothing | BS.length bs == off -> []
| otherwise -> [fromIntegral off]
Just i -> fromIntegral off : findLineStarts (off + i + 1) bs
-- these INLINE/NOINLINE pragmas are optimisation without testing or profiling, have fun
{-# INLINE parseLog #-}
parseLog :: ByteString -> [(HMS, Event)]
parseLog = map parseLogLine . BS8.lines
-- (start line, number of lines (default to rest of file))
{-# INLINE parseLogRange #-}
parseLogRange :: (Int, Maybe Int) -> VS.Vector Word32 -> ByteString -> [(HMS, Event)]
parseLogRange (startln, mnumln) linestarts topbs =
let numln = maybe (VS.length linestarts - startln) id mnumln
splitted = splitWithLineStarts 0 (VS.slice startln numln linestarts) topbs
in -- traceShow ("pLR"::String, splitted) $
map parseLogLine splitted
where
{-# INLINE splitWithLineStarts #-}
splitWithLineStarts :: Int -> VS.Vector Word32 -> ByteString -> [ByteString]
splitWithLineStarts idx starts bs
| idx >= VS.length starts = []
| idx == VS.length starts - 1 =
[BS.takeWhile (\b -> b /= 13 && b /= 10) (BS.drop (at idx) bs)]
| otherwise =
trimCR (BS.drop (at idx) (BS.take (at (idx + 1) - 1) bs))
: splitWithLineStarts (idx + 1) starts bs
where
at i = fromIntegral @Word32 @Int (starts VS.! i)
trimCR :: ByteString -> ByteString
trimCR bs = case BS.unsnoc bs of
Just (bs', c) | c == 13 -> bs'
_ -> bs
parseLogLine :: ByteString -> (HMS, Event)
parseLogLine bs =
case parseLogLine' bs of
res@(HMS 0 0 0, ParseError) -> traceShow ("PE" :: String, bs) res
res -> res
{-# NOINLINE parseLogLine' #-}
parseLogLine' :: ByteString -> (HMS, Event)
parseLogLine' = fromRight (HMS 0 0 0, ParseError) . P.parseOnly parseLine
parseLine :: P.Parser (HMS, Event)
parseLine = (,) <$> parseTOD <*> parseEvent
parseTOD :: P.Parser HMS
parseTOD = do
_ <- P.char '['
tod <- HMS <$> pTwoDigs <*> (P.char ':' >> pTwoDigs) <*> (P.char ':' >> pTwoDigs)
_ <- P.string "] "
return tod
-- Adapted from clogparse by Keegan McAllister (BSD3) (https://hackage.haskell.org/package/clogparse).
parseEvent :: P.Parser Event
parseEvent = asum
[ P.string "*** " *> asum
[ userAct Join "Joins: "
, userAct' Part "Parts: "
, userAct' Quit "Quits: "
, ReNick <$> nick <*> (P.string " is now known as " *> nick)
, Mode <$> nick <*> (P.string " sets mode: " *> remaining)
, Kick <$> (nick <* P.string " was kicked by ") <*> nick <* P.char ' ' <*> encloseTail '(' ')'
, Topic <$> (nick <* P.string " changes topic to ") <*> encloseTail '\'' '\''
]
, Talk <$ P.char '<' <*> nick <* P.string "> " <*> remaining
, Notice <$ P.char '-' <*> nick <*> remaining -- FIXME: parse host
, Act <$ P.string "* " <*> nick <* P.char ' ' <*> remaining
] where
nick = utf8 <$> P.takeWhile (not . P.inClass " \n\r\t\v<>")
userAct f x = f <$ P.string x <*> nick <* P.char ' ' <*> parens
userAct' f x = f <$ P.string x <*> nick <* P.char ' ' <*> parens <* P.char ' ' <*> encloseTail '(' ')'
parens = P.char '(' >> (utf8 <$> P.takeWhile (/= ')')) <* P.char ')'
encloseTail c1 c2 = do _ <- P.char c1
bs <- P.takeByteString
case BS.unsnoc bs of
Just (s, c) | c == fromIntegral (ord c2) -> return (utf8 s)
_ -> fail "Wrong end char"
utf8 = TE.decodeUtf8Lenient
remaining = utf8 <$> P.takeByteString
pTwoDigs :: P.Parser Word8
pTwoDigs = do
let digit = do
c <- P.satisfy (\c -> '0' <= c && c <= '9')
return (fromIntegral (ord c - ord '0'))
c1 <- digit
c2 <- digit
return (10 * c1 + c2)
|