mini-http/Network/HTTP/Server/Mini/Parser.hs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162

{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
module Network.HTTP.Server.Mini.Parser where

import Control.Monad (guard)
import Control.Monad.Trans.Maybe
import Data.ByteString (ByteString)
import Data.ByteString qualified as BS
import Data.ByteString.Short (ShortByteString)
import Data.ByteString.Short qualified as SBS
import Data.Char
import Data.Word
import FlatParse.Basic qualified as P

import Network.HTTP.Server.Mini.Types
import Network.HTTP.Server.Mini.URI
import Network.HTTP.Server.Mini.Util


readRequest :: Monad m => Int -> (Int -> m ByteString) -> m (Maybe Request)
readRequest maxsize recv = runMaybeT $ do
  bs <- MaybeT $ readUntilTerminator maxsize recv []
  hoistMaybe (parseRequest bs)

parseRequest :: ByteString -> Maybe Request
parseRequest bs =
  case P.runParser pRequest bs of
    P.OK req rest | BS.null rest -> Just req
    _ -> Nothing

pRequest :: P.Parser e Request
pRequest = do
  (method, uri) <- pFirstLine
  headers <- P.many pHeader
  let (path, query) = parseURI uri
  return Request { reqMethod = method
                 , reqURI = uri
                 , reqHeaders = headers
                 , reqPath = path
                 , reqQuery = query }

pFirstLine :: P.Parser e (ShortByteString, ByteString)
pFirstLine = do
  method <- pTakeUntil (ord8 ' ')
  guard (BS.length method <= 32)  -- will put it in an SBS later
  P.word8 32
  uri <- pTakeUntil (ord8 ' ')
  P.word8 32
  $(P.bytes (map (fromIntegral @Int @Word . ord) "HTTP/1."))
  version <- P.anyWord8
  guard (version == ord8 '0' || version == ord8 '1')
  pNewline
  return (SBS.pack (map toAsciiUpperCase (BS.unpack method)), uri)

pHeader :: P.Parser e (Header, ByteString)
pHeader = do
  key <- pTakeUntil (ord8 ':')
  guard (BS.length key <= 128)  -- will put it in an SBS later
  P.word8 (ord8 ':')
  P.skipMany (P.satisfy isLinearSpace)
  value <- pValue []
  return (SBS.pack (map toAsciiLowerCase (BS.unpack key)), value)
  where
    pValue revlines = do
      line <- trimCR <$> pTakeUntil 10
      P.word8 10
      P.withOption (P.lookahead (P.skipSatisfyAscii isLinearSpace))
        (\_ -> pValue (line : revlines))
        (return (BS.concat (reverse (line : revlines))))

    trimCR bs
      | len >= 1, bs `BS.index` (len - 1) == 13 = BS.take (len - 1) bs
      | otherwise = bs
      where len = BS.length bs

isLinearSpace :: Char -> Bool
isLinearSpace c = c == ' ' || c == '\t'

pTakeUntil :: Word8 -> P.Parser e ByteString
pTakeUntil term = do
  midx <- P.lookahead (BS.elemIndex term <$> P.takeRest)
  case midx of
    Just idx -> P.take idx
    Nothing -> P.failed

pNewline :: P.Parser e ()
pNewline = P.withAnyWord8 $ \case
  10 -> return ()
  13 -> P.word8 10
  _ -> P.failed

readUntilTerminator :: Monad m => Int -> (Int -> m ByteString) -> [ByteString] -> m (Maybe ByteString)
readUntilTerminator 0 _ _ = return Nothing
readUntilTerminator maxsize recv toprevprefix = do
  text <- recv maxsize
  if BS.length text == 0
    then return Nothing
    else goFind toprevprefix text
  where
    goFind revprefix text =
      case findTerminator text of
        NoTerm -> readUntilTerminator (maxsize - BS.length text) recv (text : revprefix)
        IncompleteTerm haveCR
          | maxsize == BS.length text -> return Nothing
          | otherwise -> do
              text2 <- recv (maxsize - BS.length text)
              text2' <- case BS.length text2 of
                1 -> (text2 <>) <$> recv (maxsize - BS.length text - 1)  -- premature concat but rare
                _ -> return text2  -- also for length 0 (i.e. EOF)!
              -- now text2' has length at least 2 if available before EOF
              case (haveCR, BS.length text2') of
                (_, 0) -> return Nothing
                (False, _) | text2' `BS.index` 0 == 10 -> return (Just (BS.concat (reverse revprefix)))
                (False, n) | n >= 2, text2' `BS.index` 0 == 13, text2' `BS.index` 0 == 10 ->
                  return (Just (BS.concat (reverse revprefix)))
                (True, _) | text2' `BS.index` 0 == 10 -> return (Just (BS.init (BS.concat (reverse revprefix))))
                (_, 1) -> return Nothing  -- second recv returned EOF and no terminator found before that
                _ -> goFind (text : revprefix) text2'
        FoundTerm i -> return (Just (BS.concat (reverse (BS.take i text : revprefix))))

data FoundTerminator
  = NoTerm
  | IncompleteTerm {-# UNPACK #-} !Bool
      -- ^ Bool indicates "have CR".
      -- If False, then LF at end of segment, requires LF or CRLF at start of next.
      -- If True, then LF CR at end of segment, requires LF at start of next.
  | FoundTerm Int  -- ^ Index right after the first [CR]LF sequence in the terminator
  deriving (Show)

findTerminator :: ByteString -> FoundTerminator
findTerminator bs =
  -- We're looking for [CR] LF [CR] LF. Look for the first LF, and see if it's
  -- followed by CRLF or LF.
  go (BS.elemIndices 10 bs)
  where
    len = BS.length bs

    go [] = NoTerm
    go (i:is)
      | i == len - 1 = IncompleteTerm False
      | i == len - 2 =
          case bs `BS.index` (i+1) of
            10 -> FoundTerm (i+1)
            13 -> IncompleteTerm True
            _ -> NoTerm
      -- now certainly two bytes after this LF
      | otherwise =
          case bs `BS.index` (i+1) of
            10 -> FoundTerm (i+1)
            -- [i+2] is read twice if this check fails, but LF CR (!=LF) will be very rare so it's fine
            13 | bs `BS.index` (i+2) == 10 -> FoundTerm (i+1)
            _ -> go is

toAsciiUpperCase :: Word8 -> Word8
toAsciiUpperCase c
  | ord8 'a' <= c, c <= ord8 'z' = c - 32
  | otherwise = c

toAsciiLowerCase :: Word8 -> Word8
toAsciiLowerCase c
  | ord8 'A' <= c, c <= ord8 'Z' = c + 32
  | otherwise = c