1 files changed, 162 insertions, 0 deletions
diff --git a/mini-http-server/Network/HTTP/Server/Mini/Internal/Parser.hs b/mini-http-server/Network/HTTP/Server/Mini/Internal/Parser.hs
new file mode 100644
index 0000000..6278326
--- /dev/null
+++ b/mini-http-server/Network/HTTP/Server/Mini/Internal/Parser.hs
@@ -0,0 +1,162 @@
+{-# LANGUAGE TemplateHaskell #-}
+{-# LANGUAGE TypeApplications #-}
+module Network.HTTP.Server.Mini.Internal.Parser where
+
+import Control.Monad (guard)
+import Control.Monad.Trans.Maybe
+import Data.ByteString (ByteString)
+import Data.ByteString qualified as BS
+import Data.ByteString.Short (ShortByteString)
+import Data.ByteString.Short qualified as SBS
+import Data.Char
+import Data.Word
+import FlatParse.Basic qualified as P
+
+import Network.HTTP.Server.Mini.Types
+import Network.HTTP.Server.Mini.URI
+import Network.HTTP.Server.Mini.Util
+
+
+readRequest :: Monad m => Int -> (Int -> m ByteString) -> m (Maybe Request)
+readRequest maxsize recv = runMaybeT $ do
+  bs <- MaybeT $ readUntilTerminator maxsize recv []
+  hoistMaybe (parseRequest bs)
+
+parseRequest :: ByteString -> Maybe Request
+parseRequest bs =
+  case P.runParser pRequest bs of
+    P.OK req rest | BS.null rest -> Just req
+    _ -> Nothing
+
+pRequest :: P.Parser e Request
+pRequest = do
+  (method, uri) <- pFirstLine
+  headers <- P.many pHeader
+  let (path, query) = parseURI uri
+  return Request { reqMethod = method
+                 , reqURI = uri
+                 , reqHeaders = headers
+                 , reqPath = path
+                 , reqQuery = query }
+
+pFirstLine :: P.Parser e (ShortByteString, ByteString)
+pFirstLine = do
+  method <- pTakeUntil (ord8 ' ')
+  guard (BS.length method <= 32)  -- will put it in an SBS later
+  P.word8 32
+  uri <- pTakeUntil (ord8 ' ')
+  P.word8 32
+  $(P.bytes (map (fromIntegral @Int @Word . ord) "HTTP/1."))
+  version <- P.anyWord8
+  guard (version == ord8 '0' || version == ord8 '1')
+  pNewline
+  return (SBS.pack (map toAsciiUpperCase (BS.unpack method)), uri)
+
+pHeader :: P.Parser e (Header, ByteString)
+pHeader = do
+  key <- pTakeUntil (ord8 ':')
+  guard (BS.length key <= 128)  -- will put it in an SBS later
+  P.word8 (ord8 ':')
+  P.skipMany (P.satisfy isLinearSpace)
+  value <- pValue []
+  return (SBS.pack (map toAsciiLowerCase (BS.unpack key)), value)
+  where
+    pValue revlines = do
+      line <- trimCR <$> pTakeUntil 10
+      P.word8 10
+      P.withOption (P.lookahead (P.skipSatisfyAscii isLinearSpace))
+        (\_ -> pValue (line : revlines))
+        (return (BS.concat (reverse (line : revlines))))
+
+    trimCR bs
+      | len >= 1, bs `BS.index` (len - 1) == 13 = BS.take (len - 1) bs
+      | otherwise = bs
+      where len = BS.length bs
+
+isLinearSpace :: Char -> Bool
+isLinearSpace c = c == ' ' || c == '\t'
+
+pTakeUntil :: Word8 -> P.Parser e ByteString
+pTakeUntil term = do
+  midx <- P.lookahead (BS.elemIndex term <$> P.takeRest)
+  case midx of
+    Just idx -> P.take idx
+    Nothing -> P.failed
+
+pNewline :: P.Parser e ()
+pNewline = P.withAnyWord8 $ \case
+  10 -> return ()
+  13 -> P.word8 10
+  _ -> P.failed
+
+readUntilTerminator :: Monad m => Int -> (Int -> m ByteString) -> [ByteString] -> m (Maybe ByteString)
+readUntilTerminator 0 _ _ = return Nothing
+readUntilTerminator maxsize recv toprevprefix = do
+  text <- recv maxsize
+  if BS.length text == 0
+    then return Nothing
+    else goFind toprevprefix text
+  where
+    goFind revprefix text =
+      case findTerminator text of
+        NoTerm -> readUntilTerminator (maxsize - BS.length text) recv (text : revprefix)
+        IncompleteTerm haveCR
+          | maxsize == BS.length text -> return Nothing
+          | otherwise -> do
+              text2 <- recv (maxsize - BS.length text)
+              text2' <- case BS.length text2 of
+                1 -> (text2 <>) <$> recv (maxsize - BS.length text - 1)  -- premature concat but rare
+                _ -> return text2  -- also for length 0 (i.e. EOF)!
+              -- now text2' has length at least 2 if available before EOF
+              case (haveCR, BS.length text2') of
+                (_, 0) -> return Nothing
+                (False, _) | text2' `BS.index` 0 == 10 -> return (Just (BS.concat (reverse (text : revprefix))))
+                (False, n) | n >= 2, text2' `BS.index` 0 == 13, text2' `BS.index` 1 == 10 ->
+                  return (Just (BS.concat (reverse (text : revprefix))))
+                (True, _) | text2' `BS.index` 0 == 10 -> return (Just (BS.init (BS.concat (reverse (text : revprefix)))))
+                (_, 1) -> return Nothing  -- second recv returned EOF and no terminator found before that
+                _ -> goFind (text : revprefix) text2'
+        FoundTerm i -> return (Just (BS.concat (reverse (BS.take i text : revprefix))))
+
+data FoundTerminator
+  = NoTerm
+  | IncompleteTerm {-# UNPACK #-} !Bool
+      -- ^ Bool indicates "have CR".
+      -- If False, then LF at end of segment, requires LF or CRLF at start of next.
+      -- If True, then LF CR at end of segment, requires LF at start of next.
+  | FoundTerm Int  -- ^ Index right after the first [CR]LF sequence in the terminator
+  deriving (Show)
+
+findTerminator :: ByteString -> FoundTerminator
+findTerminator bs =
+  -- We're looking for [CR] LF [CR] LF. Look for the first LF, and see if it's
+  -- followed by CRLF or LF.
+  go (BS.elemIndices 10 bs)
+  where
+    len = BS.length bs
+
+    go [] = NoTerm
+    go (i:is)
+      | i == len - 1 = IncompleteTerm False
+      | i == len - 2 =
+          case bs `BS.index` (i+1) of
+            10 -> FoundTerm (i+1)
+            13 -> IncompleteTerm True
+            _ -> NoTerm
+      -- now certainly two bytes after this LF
+      | otherwise =
+          case bs `BS.index` (i+1) of
+            10 -> FoundTerm (i+1)
+            -- [i+2] is read twice if this check fails, but LF CR (!=LF) will be very rare so it's fine
+            13 | bs `BS.index` (i+2) == 10 -> FoundTerm (i+1)
+            _ -> go is
+
+toAsciiUpperCase :: Word8 -> Word8
+toAsciiUpperCase c
+  | ord8 'a' <= c, c <= ord8 'z' = c - 32
+  | otherwise = c
+
+toAsciiLowerCase :: Word8 -> Word8
+toAsciiLowerCase c
+  | ord8 'A' <= c, c <= ord8 'Z' = c + 32
+  | otherwise = c