{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeApplications #-} module Network.HTTP.Server.Mini.Parser where import Control.Monad (guard) import Control.Monad.Trans.Maybe import Data.ByteString (ByteString) import Data.ByteString qualified as BS import Data.ByteString.Short (ShortByteString) import Data.ByteString.Short qualified as SBS import Data.Char import Data.Word import FlatParse.Basic qualified as P import Network.HTTP.Server.Mini.Types import Network.HTTP.Server.Mini.URI import Network.HTTP.Server.Mini.Util readRequest :: Monad m => Int -> (Int -> m ByteString) -> m (Maybe Request) readRequest maxsize recv = runMaybeT $ do bs <- MaybeT $ readUntilTerminator maxsize recv [] hoistMaybe (parseRequest bs) parseRequest :: ByteString -> Maybe Request parseRequest bs = case P.runParser pRequest bs of P.OK req rest | BS.null rest -> Just req _ -> Nothing pRequest :: P.Parser e Request pRequest = do (method, uri) <- pFirstLine headers <- P.many pHeader let (path, query) = parseURI uri return Request { reqMethod = method , reqURI = uri , reqHeaders = headers , reqPath = path , reqQuery = query } pFirstLine :: P.Parser e (ShortByteString, ByteString) pFirstLine = do method <- pTakeUntil (ord8 ' ') guard (BS.length method <= 32) -- will put it in an SBS later P.word8 32 uri <- pTakeUntil (ord8 ' ') P.word8 32 $(P.bytes (map (fromIntegral @Int @Word . ord) "HTTP/1.")) version <- P.anyWord8 guard (version == ord8 '0' || version == ord8 '1') pNewline return (SBS.pack (map toAsciiUpperCase (BS.unpack method)), uri) pHeader :: P.Parser e (Header, ByteString) pHeader = do key <- pTakeUntil (ord8 ':') guard (BS.length key <= 128) -- will put it in an SBS later P.word8 (ord8 ':') P.skipMany (P.satisfy isLinearSpace) value <- pValue [] return (SBS.pack (map toAsciiLowerCase (BS.unpack key)), value) where pValue revlines = do line <- trimCR <$> pTakeUntil 10 P.word8 10 P.withOption (P.lookahead (P.skipSatisfyAscii isLinearSpace)) (\_ -> pValue (line : revlines)) (return (BS.concat (reverse (line : revlines)))) trimCR bs | len >= 1, bs `BS.index` (len - 1) == 13 = BS.take (len - 1) bs | otherwise = bs where len = BS.length bs isLinearSpace :: Char -> Bool isLinearSpace c = c == ' ' || c == '\t' pTakeUntil :: Word8 -> P.Parser e ByteString pTakeUntil term = do midx <- P.lookahead (BS.elemIndex term <$> P.takeRest) case midx of Just idx -> P.take idx Nothing -> P.failed pNewline :: P.Parser e () pNewline = P.withAnyWord8 $ \case 10 -> return () 13 -> P.word8 10 _ -> P.failed readUntilTerminator :: Monad m => Int -> (Int -> m ByteString) -> [ByteString] -> m (Maybe ByteString) readUntilTerminator 0 _ _ = return Nothing readUntilTerminator maxsize recv toprevprefix = do text <- recv maxsize if BS.length text == 0 then return Nothing else goFind toprevprefix text where goFind revprefix text = case findTerminator text of NoTerm -> readUntilTerminator (maxsize - BS.length text) recv (text : revprefix) IncompleteTerm haveCR | maxsize == BS.length text -> return Nothing | otherwise -> do text2 <- recv (maxsize - BS.length text) text2' <- case BS.length text2 of 1 -> (text2 <>) <$> recv (maxsize - BS.length text - 1) -- premature concat but rare _ -> return text2 -- also for length 0 (i.e. EOF)! -- now text2' has length at least 2 if available before EOF case (haveCR, BS.length text2') of (_, 0) -> return Nothing (False, _) | text2' `BS.index` 0 == 10 -> return (Just (BS.concat (reverse revprefix))) (False, n) | n >= 2, text2' `BS.index` 0 == 13, text2' `BS.index` 0 == 10 -> return (Just (BS.concat (reverse revprefix))) (True, _) | text2' `BS.index` 0 == 10 -> return (Just (BS.init (BS.concat (reverse revprefix)))) (_, 1) -> return Nothing -- second recv returned EOF and no terminator found before that _ -> goFind (text : revprefix) text2' FoundTerm i -> return (Just (BS.concat (reverse (BS.take i text : revprefix)))) data FoundTerminator = NoTerm | IncompleteTerm {-# UNPACK #-} !Bool -- ^ Bool indicates "have CR". -- If False, then LF at end of segment, requires LF or CRLF at start of next. -- If True, then LF CR at end of segment, requires LF at start of next. | FoundTerm Int -- ^ Index right after the first [CR]LF sequence in the terminator deriving (Show) findTerminator :: ByteString -> FoundTerminator findTerminator bs = -- We're looking for [CR] LF [CR] LF. Look for the first LF, and see if it's -- followed by CRLF or LF. go (BS.elemIndices 10 bs) where len = BS.length bs go [] = NoTerm go (i:is) | i == len - 1 = IncompleteTerm False | i == len - 2 = case bs `BS.index` (i+1) of 10 -> FoundTerm (i+1) 13 -> IncompleteTerm True _ -> NoTerm -- now certainly two bytes after this LF | otherwise = case bs `BS.index` (i+1) of 10 -> FoundTerm (i+1) -- [i+2] is read twice if this check fails, but LF CR (!=LF) will be very rare so it's fine 13 | bs `BS.index` (i+2) == 10 -> FoundTerm (i+1) _ -> go is toAsciiUpperCase :: Word8 -> Word8 toAsciiUpperCase c | ord8 'a' <= c, c <= ord8 'z' = c - 32 | otherwise = c toAsciiLowerCase :: Word8 -> Word8 toAsciiLowerCase c | ord8 'A' <= c, c <= ord8 'Z' = c + 32 | otherwise = c