diff options
Diffstat (limited to 'mini-http-server/Network/HTTP/Server/Mini/Internal/Parser.hs')
| -rw-r--r-- | mini-http-server/Network/HTTP/Server/Mini/Internal/Parser.hs | 162 |
1 files changed, 162 insertions, 0 deletions
diff --git a/mini-http-server/Network/HTTP/Server/Mini/Internal/Parser.hs b/mini-http-server/Network/HTTP/Server/Mini/Internal/Parser.hs new file mode 100644 index 0000000..6278326 --- /dev/null +++ b/mini-http-server/Network/HTTP/Server/Mini/Internal/Parser.hs @@ -0,0 +1,162 @@ +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE TypeApplications #-} +module Network.HTTP.Server.Mini.Internal.Parser where + +import Control.Monad (guard) +import Control.Monad.Trans.Maybe +import Data.ByteString (ByteString) +import Data.ByteString qualified as BS +import Data.ByteString.Short (ShortByteString) +import Data.ByteString.Short qualified as SBS +import Data.Char +import Data.Word +import FlatParse.Basic qualified as P + +import Network.HTTP.Server.Mini.Types +import Network.HTTP.Server.Mini.URI +import Network.HTTP.Server.Mini.Util + + +readRequest :: Monad m => Int -> (Int -> m ByteString) -> m (Maybe Request) +readRequest maxsize recv = runMaybeT $ do + bs <- MaybeT $ readUntilTerminator maxsize recv [] + hoistMaybe (parseRequest bs) + +parseRequest :: ByteString -> Maybe Request +parseRequest bs = + case P.runParser pRequest bs of + P.OK req rest | BS.null rest -> Just req + _ -> Nothing + +pRequest :: P.Parser e Request +pRequest = do + (method, uri) <- pFirstLine + headers <- P.many pHeader + let (path, query) = parseURI uri + return Request { reqMethod = method + , reqURI = uri + , reqHeaders = headers + , reqPath = path + , reqQuery = query } + +pFirstLine :: P.Parser e (ShortByteString, ByteString) +pFirstLine = do + method <- pTakeUntil (ord8 ' ') + guard (BS.length method <= 32) -- will put it in an SBS later + P.word8 32 + uri <- pTakeUntil (ord8 ' ') + P.word8 32 + $(P.bytes (map (fromIntegral @Int @Word . ord) "HTTP/1.")) + version <- P.anyWord8 + guard (version == ord8 '0' || version == ord8 '1') + pNewline + return (SBS.pack (map toAsciiUpperCase (BS.unpack method)), uri) + +pHeader :: P.Parser e (Header, ByteString) +pHeader = do + key <- pTakeUntil (ord8 ':') + guard (BS.length key <= 128) -- will put it in an SBS later + P.word8 (ord8 ':') + P.skipMany (P.satisfy isLinearSpace) + value <- pValue [] + return (SBS.pack (map toAsciiLowerCase (BS.unpack key)), value) + where + pValue revlines = do + line <- trimCR <$> pTakeUntil 10 + P.word8 10 + P.withOption (P.lookahead (P.skipSatisfyAscii isLinearSpace)) + (\_ -> pValue (line : revlines)) + (return (BS.concat (reverse (line : revlines)))) + + trimCR bs + | len >= 1, bs `BS.index` (len - 1) == 13 = BS.take (len - 1) bs + | otherwise = bs + where len = BS.length bs + +isLinearSpace :: Char -> Bool +isLinearSpace c = c == ' ' || c == '\t' + +pTakeUntil :: Word8 -> P.Parser e ByteString +pTakeUntil term = do + midx <- P.lookahead (BS.elemIndex term <$> P.takeRest) + case midx of + Just idx -> P.take idx + Nothing -> P.failed + +pNewline :: P.Parser e () +pNewline = P.withAnyWord8 $ \case + 10 -> return () + 13 -> P.word8 10 + _ -> P.failed + +readUntilTerminator :: Monad m => Int -> (Int -> m ByteString) -> [ByteString] -> m (Maybe ByteString) +readUntilTerminator 0 _ _ = return Nothing +readUntilTerminator maxsize recv toprevprefix = do + text <- recv maxsize + if BS.length text == 0 + then return Nothing + else goFind toprevprefix text + where + goFind revprefix text = + case findTerminator text of + NoTerm -> readUntilTerminator (maxsize - BS.length text) recv (text : revprefix) + IncompleteTerm haveCR + | maxsize == BS.length text -> return Nothing + | otherwise -> do + text2 <- recv (maxsize - BS.length text) + text2' <- case BS.length text2 of + 1 -> (text2 <>) <$> recv (maxsize - BS.length text - 1) -- premature concat but rare + _ -> return text2 -- also for length 0 (i.e. EOF)! + -- now text2' has length at least 2 if available before EOF + case (haveCR, BS.length text2') of + (_, 0) -> return Nothing + (False, _) | text2' `BS.index` 0 == 10 -> return (Just (BS.concat (reverse (text : revprefix)))) + (False, n) | n >= 2, text2' `BS.index` 0 == 13, text2' `BS.index` 1 == 10 -> + return (Just (BS.concat (reverse (text : revprefix)))) + (True, _) | text2' `BS.index` 0 == 10 -> return (Just (BS.init (BS.concat (reverse (text : revprefix))))) + (_, 1) -> return Nothing -- second recv returned EOF and no terminator found before that + _ -> goFind (text : revprefix) text2' + FoundTerm i -> return (Just (BS.concat (reverse (BS.take i text : revprefix)))) + +data FoundTerminator + = NoTerm + | IncompleteTerm {-# UNPACK #-} !Bool + -- ^ Bool indicates "have CR". + -- If False, then LF at end of segment, requires LF or CRLF at start of next. + -- If True, then LF CR at end of segment, requires LF at start of next. + | FoundTerm Int -- ^ Index right after the first [CR]LF sequence in the terminator + deriving (Show) + +findTerminator :: ByteString -> FoundTerminator +findTerminator bs = + -- We're looking for [CR] LF [CR] LF. Look for the first LF, and see if it's + -- followed by CRLF or LF. + go (BS.elemIndices 10 bs) + where + len = BS.length bs + + go [] = NoTerm + go (i:is) + | i == len - 1 = IncompleteTerm False + | i == len - 2 = + case bs `BS.index` (i+1) of + 10 -> FoundTerm (i+1) + 13 -> IncompleteTerm True + _ -> NoTerm + -- now certainly two bytes after this LF + | otherwise = + case bs `BS.index` (i+1) of + 10 -> FoundTerm (i+1) + -- [i+2] is read twice if this check fails, but LF CR (!=LF) will be very rare so it's fine + 13 | bs `BS.index` (i+2) == 10 -> FoundTerm (i+1) + _ -> go is + +toAsciiUpperCase :: Word8 -> Word8 +toAsciiUpperCase c + | ord8 'a' <= c, c <= ord8 'z' = c - 32 + | otherwise = c + +toAsciiLowerCase :: Word8 -> Word8 +toAsciiLowerCase c + | ord8 'A' <= c, c <= ord8 'Z' = c + 32 + | otherwise = c |
