{-# LANGUAGE OverloadedStrings #-} module ZNC ( -- Log(..), Nick, Event(..), preparseLog, parseLog, parseLogRange, ) where import Control.Applicative import Data.Attoparsec.ByteString.Char8 qualified as P import Data.ByteString (ByteString) import Data.ByteString qualified as BS import Data.ByteString.Char8 qualified as BS8 import Data.Char (ord) import Data.Either (fromRight) import Data.Text (Text) import Data.Text.Encoding qualified as TE import Data.Vector.Storable qualified as VS import Data.Word (Word8, Word32) import Debug.Trace import Util type Nick = Text -- Adapted from clogparse by Keegan McAllister (BSD3) (https://hackage.haskell.org/package/clogparse). data Event = Join Nick Text -- ^ User joined. | Part Nick Text Text -- ^ User left the channel. (address, reason) | Quit Nick Text Text -- ^ User quit the server. (address, reason) | ReNick Nick Nick -- ^ User changed from one to another nick. | Talk Nick Text -- ^ User spoke (@PRIVMSG@). | Notice Nick Text -- ^ User spoke (@NOTICE@). | Act Nick Text -- ^ User acted (@CTCP ACTION@). | Kick Nick Nick Text -- ^ User was kicked by user. (kicked, kicker, reason) | Mode Nick Text -- ^ User set mode on the channel. | Topic Nick Text -- ^ Topic change. | ParseError deriving (Show) preparseLog :: ByteString -> VS.Vector Word32 preparseLog = VS.fromList . findLineStarts 0 where findLineStarts :: Int -> ByteString -> [Word32] findLineStarts off bs = case BS.findIndex (== 10) (BS.drop off bs) of Nothing | BS.length bs == off -> [] | otherwise -> [fromIntegral off] Just i -> fromIntegral off : findLineStarts (off + i + 1) bs -- these INLINE/NOINLINE pragmas are optimisation without testing or profiling, have fun {-# INLINE parseLog #-} parseLog :: ByteString -> [(HMS, Event)] parseLog = map parseLogLine . BS8.lines -- (start line, number of lines (default to rest of file)) {-# INLINE parseLogRange #-} parseLogRange :: (Int, Maybe Int) -> VS.Vector Word32 -> ByteString -> [(HMS, Event)] parseLogRange (startln, mnumln) linestarts topbs = let numln = maybe (VS.length linestarts - startln) id mnumln splitted = splitWithLineStarts 0 (VS.slice startln numln linestarts) topbs in -- traceShow ("pLR"::String, splitted) $ map parseLogLine splitted where {-# INLINE splitWithLineStarts #-} splitWithLineStarts :: Int -> VS.Vector Word32 -> ByteString -> [ByteString] splitWithLineStarts idx starts bs | idx >= VS.length starts = [] | idx == VS.length starts - 1 = [BS.takeWhile (\b -> b /= 13 && b /= 10) (BS.drop (at idx) bs)] | otherwise = trimCR (BS.drop (at idx) (BS.take (at (idx + 1) - 1) bs)) : splitWithLineStarts (idx + 1) starts bs where at i = fromIntegral @Word32 @Int (starts VS.! i) trimCR :: ByteString -> ByteString trimCR bs = case BS.unsnoc bs of Just (bs', c) | c == 13 -> bs' _ -> bs parseLogLine :: ByteString -> (HMS, Event) parseLogLine bs = case parseLogLine' bs of res@(HMS 0 0 0, ParseError) -> traceShow ("PE" :: String, bs) res res -> res {-# NOINLINE parseLogLine' #-} parseLogLine' :: ByteString -> (HMS, Event) parseLogLine' = fromRight (HMS 0 0 0, ParseError) . P.parseOnly parseLine parseLine :: P.Parser (HMS, Event) parseLine = (,) <$> parseTOD <*> parseEvent parseTOD :: P.Parser HMS parseTOD = do _ <- P.char '[' tod <- HMS <$> pTwoDigs <*> (P.char ':' >> pTwoDigs) <*> (P.char ':' >> pTwoDigs) _ <- P.string "] " return tod -- Adapted from clogparse by Keegan McAllister (BSD3) (https://hackage.haskell.org/package/clogparse). parseEvent :: P.Parser Event parseEvent = asum [ P.string "*** " *> asum [ userAct Join "Joins: " , userAct' Part "Parts: " , userAct' Quit "Quits: " , ReNick <$> nick <*> (P.string " is now known as " *> nick) , Mode <$> nick <*> (P.string " sets mode: " *> remaining) , Kick <$> (nick <* P.string " was kicked by ") <*> nick <* P.char ' ' <*> encloseTail '(' ')' , Topic <$> (nick <* P.string " changes topic to ") <*> encloseTail '\'' '\'' ] , Talk <$ P.char '<' <*> nick <* P.string "> " <*> remaining , Notice <$ P.char '-' <*> nick <*> remaining -- FIXME: parse host , Act <$ P.string "* " <*> nick <* P.char ' ' <*> remaining ] where nick = utf8 <$> P.takeWhile (not . P.inClass " \n\r\t\v<>") userAct f x = f <$ P.string x <*> nick <* P.char ' ' <*> parens userAct' f x = f <$ P.string x <*> nick <* P.char ' ' <*> parens <* P.char ' ' <*> encloseTail '(' ')' parens = P.char '(' >> (utf8 <$> P.takeWhile (/= ')')) <* P.char ')' encloseTail c1 c2 = do _ <- P.char c1 bs <- P.takeByteString case BS.unsnoc bs of Just (s, c) | c == fromIntegral (ord c2) -> return (utf8 s) _ -> fail "Wrong end char" utf8 = TE.decodeUtf8Lenient remaining = utf8 <$> P.takeByteString pTwoDigs :: P.Parser Word8 pTwoDigs = do let digit = do c <- P.satisfy (\c -> '0' <= c && c <= '9') return (fromIntegral (ord c - ord '0')) c1 <- digit c2 <- digit return (10 * c1 + c2)