aboutsummaryrefslogtreecommitdiff
path: root/src/Parser.hs
diff options
context:
space:
mode:
authorTom Smeding <tom@tomsmeding.com>2024-02-17 09:22:49 +0100
committerTom Smeding <tom@tomsmeding.com>2024-02-17 09:22:49 +0100
commit3faca807fe96f2cefa50023fe373d8bcf1430121 (patch)
tree00ee8524bb93fb589468edea0502e626f7ff3df2 /src/Parser.hs
parent3ef786673ff8298124cd3b5ef50c35dbb23f77e2 (diff)
Move to src/, working HLS in examples/
Diffstat (limited to 'src/Parser.hs')
-rw-r--r--src/Parser.hs824
1 files changed, 824 insertions, 0 deletions
diff --git a/src/Parser.hs b/src/Parser.hs
new file mode 100644
index 0000000..0f0bd0c
--- /dev/null
+++ b/src/Parser.hs
@@ -0,0 +1,824 @@
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE StandaloneDeriving #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE TypeOperators #-}
+-- I don't want a warning for 'head' and 'tail' in this file. But I also don't
+-- want GHCs before 9.8 to complain that they don't know the x-partial warning.
+{-# OPTIONS_GHC -Wno-unrecognised-warning-flags -Wno-x-partial #-}
+module Parser (
+ parse,
+ printErrMsg,
+ -- * Re-exports
+ These(..),
+) where
+
+import Control.Applicative
+import Control.Monad
+import Control.Monad.Chronicle
+import Control.Monad.Reader
+import Control.Monad.State.Lazy
+import Data.Bifunctor (first)
+import Data.Char
+import Data.Either (partitionEithers)
+import Data.List.NonEmpty (NonEmpty(..))
+import Data.These
+import Data.Tuple (swap)
+
+-- import Debug.Trace
+
+import AST
+
+
+-- Positions are zero-based in both dimensions.
+-- See 'isInsideBlock' and 'isAtBlockLeft' for the two relevant "inside the
+-- block" conditions.
+data PS = PS
+ { psBlkLine :: Int -- ^ Start line of current layout block
+ , psBlkCol :: Int -- ^ Start column of current layout block
+ , psLine :: Int -- ^ Current line
+ , psCol :: Int -- ^ Current column
+ , psRest :: String -- ^ Rest of the input
+ }
+ deriving (Show)
+
+data Context = Context
+ { ctxFile :: FilePath
+ , ctxStack :: [String] -- ^ Stack of syntax scopes, for error reporting
+ }
+ deriving (Show)
+
+-- ReaderT Context (ChronicleT [ErrMsg] (State PS) a)
+-- Context -> ChronicleT [ErrMsg] (State PS) a
+-- Context -> State PS (These [ErrMsg] a)
+-- Context -> PS -> Identity (These [ErrMsg] a, PS)
+-- Context -> PS -> (These [ErrMsg] a, PS)
+-- whereas I want:
+-- Context -> PS -> These [ErrMsg] (a, PS)
+-- which is not any transformer stack, but a new monad.
+newtype Parser a = Parser { runParser :: Context -> PS -> These [ErrMsg] (PS, a) }
+
+instance Functor Parser where
+ fmap f (Parser g) = Parser (\ctx ps -> fmap (fmap f) (g ctx ps))
+
+instance Applicative Parser where
+ pure x = Parser (\_ ps -> That (ps, x))
+ (<*>) = ap
+
+instance Monad Parser where
+ Parser g >>= f = Parser $ \ctx ps ->
+ case g ctx ps of
+ This errs -> This errs
+ That (ps', x) -> runParser (f x) ctx ps'
+ These errs (ps', x) -> case runParser (f x) ctx ps' of
+ This errs' -> This (errs <> errs')
+ That res -> These errs res
+ These errs' res -> These (errs <> errs') res
+
+instance Alternative Parser where
+ empty = Parser (\_ _ -> This [])
+ Parser f <|> Parser g = Parser $ \ctx ps ->
+ case f ctx ps of
+ This _ -> g ctx ps
+ success -> success
+
+instance MonadState PS Parser where
+ state f = Parser $ \_ ps -> That (swap (f ps))
+
+instance MonadReader Context Parser where
+ reader f = Parser $ \ctx ps -> That (ps, f ctx)
+ local f (Parser g) = Parser (g . f)
+
+instance MonadChronicle [ErrMsg] Parser where
+ dictate errs = Parser (\_ ps -> These errs (ps, ()))
+ confess errs = Parser (\_ _ -> This errs)
+ memento (Parser f) = Parser (\ctx ps -> case f ctx ps of
+ This errs -> That (ps, Left errs)
+ That res -> That (Right <$> res)
+ These errs res -> These errs (Right <$> res))
+ absolve def (Parser f) = Parser (\ctx ps -> case f ctx ps of
+ This _ -> That (ps, def)
+ success -> success)
+ condemn (Parser f) = Parser (\ctx ps -> case f ctx ps of
+ These errs _ -> This errs
+ res -> res)
+ retcon g (Parser f) = Parser (\ctx ps -> first g (f ctx ps))
+ chronicle th = Parser (\_ ps -> (ps,) <$> th)
+
+-- Positions are zero-based in both dimensions
+data ErrMsg = ErrMsg
+ { errFile :: FilePath
+ , errStk :: [String]
+ , errLine :: Int
+ , errCol :: Int
+ , errMsg :: String }
+ deriving (Show)
+
+printErrMsg :: ErrMsg -> String
+printErrMsg (ErrMsg fp stk y x s) =
+ unlines (map (\descr -> "In " ++ descr ++ ":") (reverse stk)) ++
+ fp ++ ":" ++ show (y + 1) ++ ":" ++ show (x + 1) ++ ": " ++ s
+
+parse :: FilePath -> String -> These [ErrMsg] (Program ())
+parse fp source = fmap snd $ runParser pProgram (Context fp []) (PS 0 0 0 0 source)
+
+pProgram :: Parser (Program ())
+pProgram = do
+ defs <- many pTopDef
+ let (datadefs, fundefs) = partitionEithers defs
+ skipWhiteComment
+ assertEOF Error
+ return (Program datadefs fundefs)
+
+pTopDef :: Parser (Either DataDef (FunDef ()))
+pTopDef = do
+ skipWhiteComment
+ Left <$> pDataDef0 <|> Right <$> pFunDef0
+
+pDataDef0 :: Parser DataDef
+pDataDef0 = do
+ pKeyword "data"
+ inlineWhite
+ name <- pIdentifier0 InBlock Uppercase
+ params <- many (inlineWhite >> pIdentifier0 InBlock Lowercase)
+ cons <- pDatacons "="
+ return (DataDef name params cons)
+ where
+ pDatacons :: String -> Parser [(Name, [Type])]
+ pDatacons leader = do
+ inlineWhite
+ pKeySym leader
+ inlineWhite
+ name <- pIdentifier0 InBlock Uppercase
+ fields <- many pTypeAtom
+ rest <- pDatacons "|" <|> return []
+ return ((name, fields) : rest)
+
+pFunDef0 :: Parser (FunDef ())
+pFunDef0 = do
+ mtypesig <- optional pStandaloneTypesig0
+ let mname = fst <$> mtypesig
+ mtype = snd <$> mtypesig
+ (clauses, name) <- someClauses mname
+ return (FunDef name mtype clauses)
+ where
+ someClauses :: Maybe Name -> Parser (NonEmpty (FunEq ()), Name)
+ someClauses Nothing = do
+ clause@(FunEq name _ _) <- pFunEq Nothing
+ (,name) . (clause :|) <$> many (pFunEq (Just name))
+ someClauses (Just name) = (,name) <$> someNE (pFunEq (Just name))
+
+-- | Given the name of the type signature, if any.
+pFunEq :: Maybe Name -> Parser (FunEq ())
+pFunEq mCheckName = do
+ skipWhiteComment
+ assertAtBlockLeft Fatal "Expected function clause, found indented stuff"
+
+ name <- pIdentifier0 AtLeft Lowercase
+ case mCheckName of
+ Just checkName | name /= checkName ->
+ raise Fatal "Name of function clause does not correspond with type signature"
+ _ -> return ()
+
+ pats <- many (pPattern 11)
+ rhs <- pRHS "="
+ return (FunEq name pats rhs)
+
+-- | Pass "=" for function definitions and "->" for case clauses.
+pRHS :: String -> Parser (RHS ())
+pRHS sepsym = do
+ -- TODO: parse guards
+ inlineWhite
+ pKeySym sepsym
+ Plain <$> pExpr
+
+pPattern :: Int -> Parser (Pattern ())
+pPattern d = inlineWhite >> pPattern0 d
+
+pPattern0 :: Int -> Parser (Pattern ())
+pPattern0 d = do
+ asum [pPatWildcard0
+ ,pPatVarOrAs0
+ ,pPatCon0
+ ,pPatList0
+ ,pPatParens0]
+ where
+ pPatWildcard0 = pKeySym "_" >> return (PWildcard ())
+ pPatVarOrAs0 = do
+ var <- pIdentifier0 InBlock Lowercase
+ asum [do inlineWhite
+ pKeySym "@"
+ p <- pPattern 11
+ return (PAs () var p)
+ ,return (PVar () var)]
+ pPatCon0 = do
+ con <- pIdentifier0 InBlock Uppercase
+ if d > 0
+ then return (PCon () con [])
+ else do args <- many (pPattern 11)
+ return (PCon () con args)
+ pPatList0 = do
+ char '[' -- special syntax, no need for pKeySym
+ ps <- pPattern 0 `sepBy` (inlineWhite >> char ',')
+ inlineWhite
+ char ']'
+ return (PList () ps)
+ pPatParens0 = do
+ char '('
+ inlineWhite
+ asum [do char ')'
+ return (PTup () [])
+ ,do p <- pPattern0 0
+ inlineWhite
+ asum [do char ')'
+ return p
+ ,do char ','
+ ps <- pPattern 0 `sepBy1` (inlineWhite >> char ',')
+ return (PTup () (p : ps))]]
+
+pExpr :: Parser (Expr ())
+pExpr = do
+ inlineWhite
+ -- basics: lit, list, var, con, tup
+ -- expression atom: application of basics
+ -- expression parser: op
+ -- around: let, case, if
+ asum [pELet0
+ ,pECase0
+ ,pEIf0
+ ,pExprOpExpr0 0]
+
+pELet0 :: Parser (Expr ())
+pELet0 = do
+ pKeyword "let"
+ inlineWhite
+ startLayoutBlock $ do
+ -- The first occurrence is also going to skip whitespace in front,
+ -- which is redundant -- but not harmful.
+ defs <- many $ do
+ skipWhiteComment
+ -- Note: now not necessarily in the indented block. Which is
+ -- precisely what we need here. If we see "in", let the 'many'
+ -- choice fail so that the defs loop ends. But let it fail outside
+ -- this asum so that it is the many that picks it up, not this
+ -- asum.
+ res <- asum [Nothing <$ lookAhead (pKeyword "in")
+ ,Just <$> pFunDef0]
+ case res of
+ Nothing -> empty
+ Just def -> return def
+ inlineWhite
+ body <- pExpr
+ return (ELet () defs body)
+
+pECase0 :: Parser (Expr ())
+pECase0 = do
+ pKeyword "case"
+ e <- pExpr
+ inlineWhite
+ pKeyword "of"
+ inlineWhite
+ startLayoutBlock $ do
+ -- The first clause is going to skip whitespace, but that's harmless
+ -- (though redundant).
+ let pClause = do
+ skipWhiteComment
+ whenM (not <$> isInsideBlock) (() <$ empty)
+ pat <- pPattern0 0
+ rhs <- pRHS "->"
+ return (pat, rhs)
+ clauses <- many pClause
+ return (ECase () e clauses)
+
+pEIf0 :: Parser (Expr ())
+pEIf0 = do
+ pKeyword "if"
+ e1 <- pExpr
+ inlineWhite
+ pKeyword "then"
+ e2 <- pExpr
+ inlineWhite
+ pKeyword "else"
+ e3 <- pExpr
+ return (EIf () e1 e2 e3)
+
+pExprOpExpr :: Int -> Parser (Expr ())
+pExprOpExpr d = inlineWhite >> pExprOpExpr0 d
+
+pExprOpExpr0 :: Int -> Parser (Expr ())
+pExprOpExpr0 d = do
+ e0 <- pEApp0
+ climbRight e0 Nothing
+ where
+ climbRight :: Expr () -> Maybe ParsedOperator -> Parser (Expr ())
+ climbRight lhs mlhsop = do
+ asum [do paop@(PaOp op d2 a2) <- pInfixOp
+ guard (d2 >= d) -- respect global minimum precedence
+ case mlhsop of -- check operator compatibility
+ Just (PaOp _ d1 a1) ->
+ guard (d1 > d2 || (d1 == d2 && a1 == a2 && a1 /= AssocNone))
+ Nothing ->
+ return ()
+ let oprhsd = case a2 of AssocRight -> d2 ; _ -> d2 + 1
+ rhs <- pExprOpExpr oprhsd
+ climbRight (EOp () lhs op rhs) (Just paop)
+ ,return lhs]
+
+pEApp0 :: Parser (Expr ())
+pEApp0 = do
+ e1 <- pEAtom0
+ es <- many (inlineWhite >> pEAtom0)
+ case es of
+ [] -> return e1
+ _ -> return (EApp () e1 es)
+
+pEAtom0 :: Parser (Expr ())
+pEAtom0 = (ELit () <$> pLiteral0) <|> pEList0 <|> pEVar0 <|> pECon0 <|> pEParens0
+
+pLiteral0 :: Parser Literal
+pLiteral0 = asum
+ [do as <- some (satisfy isDigit)
+ let a = read as :: Integer
+ asum
+ [do char '.'
+ bs <- some (satisfy isDigit)
+ let b = read bs :: Integer
+ cs <- optional $ do
+ char 'e'
+ cs <- some (satisfy isDigit)
+ return cs
+ let c = maybe 0 read cs :: Integer
+ return (LFloat ((fromIntegral a + fromIntegral b / 10 ^ length bs) * 10 ^ c))
+ ,return (LInt a)]
+ ,do char '\''
+ c <- pStringChar
+ char '\''
+ return (LChar c)
+ ,do char '"'
+ s <- many pStringChar
+ char '"'
+ return (LString s)]
+
+pStringChar :: Parser Char
+pStringChar = asum
+ [do char '\\'
+ char 'x'
+ let hexdig = do
+ c <- satisfy $ \c' ->
+ let c = toLower c'
+ in 'a' <= c && c <= 'f' || '0' <= c && c <= '9'
+ return $ if 'a' <= c then 10 + ord c - ord 'a'
+ else ord c - ord '0'
+ digs <- some hexdig
+ return (chr (sum (zipWith (*) (reverse digs) (iterate (*16) 1))))
+ ,do char '\\'
+ satisfy (const True) >>= \case
+ 'n' -> return '\n'
+ 'r' -> return '\r'
+ 't' -> return '\t'
+ 'a' -> return '\a'
+ 'b' -> return '\b'
+ '\'' -> return '\''
+ '\"' -> return '\"'
+ '0' -> return '\0'
+ c -> do raise Error $ "Invalid escape sequence: \\" ++ [c]
+ return '?'
+ ,do satisfy (\c -> c `notElem` "\n\r\\\'")]
+
+pEList0 :: Parser (Expr ())
+pEList0 = do
+ char '[' -- special syntax, no need for pKeySym
+ es <- sepBy pExpr (inlineWhite >> char ',')
+ inlineWhite
+ char ']'
+ return (EList () es)
+
+pEVar0 :: Parser (Expr ())
+pEVar0 = EVar () <$> pIdentifier0 InBlock Lowercase
+
+pECon0 :: Parser (Expr ())
+pECon0 = ECon () <$> pIdentifier0 InBlock Uppercase
+
+pEParens0 :: Parser (Expr ())
+pEParens0 = do
+ char '('
+ e <- pExpr
+ inlineWhite
+ char ')'
+ return e
+
+data Associativity = AssocLeft | AssocRight | AssocNone
+ deriving (Show, Eq)
+
+data ParsedOperator = PaOp Operator Int Associativity
+ deriving (Show)
+
+pInfixOp :: Parser ParsedOperator
+pInfixOp = do
+ inlineWhite
+ asum [PaOp OEqu 4 AssocNone <$ pKeySym "=="
+ ,PaOp OAdd 6 AssocLeft <$ pKeySym "+"
+ ,PaOp OSub 6 AssocLeft <$ pKeySym "-"
+ ,PaOp OMul 7 AssocLeft <$ pKeySym "*"
+ ,PaOp ODiv 7 AssocLeft <$ pKeySym "/"
+ ,PaOp OMod 7 AssocLeft <$ pKeySym "%"
+ ,PaOp OPow 8 AssocRight <$ pKeySym "^"
+ ]
+
+pStandaloneTypesig0 :: Parser (Name, Type)
+pStandaloneTypesig0 = do
+ assertAtBlockLeft Fatal "Expected top-level definition, found indented stuff"
+ name@(Name namestr) <- pIdentifier0 AtLeft Lowercase
+ inlineWhite
+ pKeySym "::"
+ pushContext ("type signature for '" ++ namestr ++ "'") $ do
+ ty <- pType
+ return (name, ty)
+
+pType :: Parser Type
+pType = do
+ ty1 <- pTypeApp
+ asum [do inlineWhite
+ pKeySym "->"
+ ty2 <- pType
+ return (TFun ty1 ty2)
+ ,return ty1]
+
+pTypeApp :: Parser Type
+pTypeApp = many pTypeAtom >>= \case
+ [] -> raise Fatal "Expected type"
+ [t] -> return t
+ t:ts -> return (TApp t ts)
+
+pTypeAtom :: Parser Type
+pTypeAtom = pTypeParens <|> pTypeList <|> pTypeName
+ where
+ pTypeParens = do
+ inlineWhite
+ char '('
+ asum [do inlineWhite
+ char ')'
+ return (TTup [])
+ ,do ty1 <- pType
+ ty2s <- many $ do
+ inlineWhite
+ char ','
+ pType
+ inlineWhite
+ char ')'
+ case ty2s of
+ [] -> return ty1
+ _ -> return (TTup (ty1 : ty2s))]
+
+ pTypeList = do
+ inlineWhite
+ char '['
+ ty <- pType
+ char ']'
+ return (TList ty)
+
+ pTypeName = do
+ inlineWhite
+ (cs, name) <- pIdentifier0 InBlock Don'tCare
+ case cs of
+ Uppercase -> return (TCon name)
+ Lowercase -> return (TVar name)
+
+-- | Parse the given name-like keyword, ensuring that it is the entire word.
+pKeyword :: String -> Parser ()
+pKeyword s = do
+ string s
+ notFollowedBy (() <$ satisfy isNameContChar)
+
+-- | Parse the given symbol-like keyword, ensuring that it is the entire symbol.
+pKeySym :: String -> Parser ()
+pKeySym s = do
+ string s
+ notFollowedBy (() <$ satisfy isSymbolChar)
+
+data Case care where
+ Uppercase :: Case 'True
+ Lowercase :: Case 'True
+ Don'tCare :: Case 'False
+deriving instance Show (Case care)
+
+type family WithCaseOutput care a where
+ WithCaseOutput 'True a = a
+ WithCaseOutput 'False a = (Case 'True, a)
+
+-- | Consumes an identifier (word or parenthesised operator) at the current
+-- position. The `var` production in Haskell2010.
+-- var -> varid | "(" varsym ")"
+pIdentifier0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name)
+pIdentifier0 bpos cs = pAlphaName0 bpos cs <|> pParens0 (pSymbol0 bpos cs)
+ where
+ -- | Parser between parens, with the opening paren at the current position.
+ pParens0 :: Parser a -> Parser a
+ pParens0 p = do
+ char '('
+ inlineWhite
+ res <- p
+ inlineWhite
+ char ')'
+ return res
+
+-- | Consumes a word-like name at the current position with the given case. The
+-- `varid` production in Haskell2010 for 'Lowercase', `conid' for 'Uppercase'.
+--
+-- > varid -> (small {small | large | digit | "'"}) without reservedid
+pAlphaName0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name)
+pAlphaName0 bpos cs = do
+ (_, s) <- readToken
+ bpos
+ (\atfst mc -> case (atfst, mc) of
+ (True , Just c) | isNameHeadChar c -> Just (Right False)
+ (True , _ ) -> Nothing
+ (False, Just c) | isNameContChar c -> Just (Right False)
+ (False, _ ) -> Just (Left ()))
+ True
+ (name, adjoin) <- case cs of
+ Uppercase
+ | isLower (head s) -> do
+ raise Error "Unexpected uppercase word at this position, assuming typo"
+ return (toUpper (head s) : tail s, id)
+ | otherwise -> return (s, id)
+ Lowercase
+ | isUpper (head s) -> do
+ raise Error "Unexpected lowercase word at this position, assuming typo"
+ return (toLower (head s) : tail s, id)
+ | otherwise -> return (s, id)
+ Don'tCare
+ | isLower (head s) -> return (s, (Lowercase,))
+ | otherwise -> return (s, (Lowercase,))
+ guard (name `notElem` ["case", "class", "data", "default", "deriving", "do", "else"
+ ,"foreign", "if", "import", "in", "infix", "infixl"
+ ,"infixr", "instance", "let", "module", "newtype", "of"
+ ,"then", "type", "where", "_"])
+ return (adjoin (Name name))
+
+isNameHeadChar :: Char -> Bool
+isNameHeadChar c = isLetter c || c == '_'
+
+isNameContChar :: Char -> Bool
+isNameContChar c = isNameHeadChar c || isDigit c || c == '\''
+
+-- | Consumes a symbol at the current position. The `varsym` production in
+-- Haskell2010 for 'Lowercase', `consym` otherwise, and either if 'Don'tCare'.
+--
+-- > varsym -> ((symbol without ":") {symbol}) without (reservedop | dashes)
+-- > consym -> (":" {symbol}) without reservedop
+-- > symbol -> ascSymbol | uniSymbol without (special | "_" | "\"" | "'")
+-- > ascSymbol -> ```!#$%&⋆+./<=>?@^|-~:```
+-- > uniSymbol -> unicode symbol or punctuation
+-- > dashes -> "--" {"-"}
+-- > special -> ```(),;[]`{}```
+-- > reservedop -> ".." | ":" | "::" | "=" | "\" | "|" | "<-" | "->" | "@" | "~" | "=>"
+pSymbol0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name)
+pSymbol0 bpos cs = do
+ case bpos of
+ AtLeft -> assertAtBlockLeft Fatal "Expected symbol, but found indented expression"
+ InBlock -> assertInsideBlock Fatal "Expected symbol, but found end of indented expression"
+ (c1, adjoin) <-
+ case cs of Lowercase -> (,id) <$> satisfy (\c -> isSymbolChar c && c /= ':')
+ Uppercase -> (,id) <$> satisfy (== ':')
+ Don'tCare -> do c1 <- satisfy (\c -> isSymbolChar c)
+ return (c1, if c1 == ':' then (Uppercase,) else (Lowercase,))
+ crest <- many (satisfy isSymbolChar)
+ let name = c1 : crest
+ guard (name `notElem` ["..", ":", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"])
+ guard (take 2 name /= "--")
+ return (adjoin (Name name))
+
+isSymbolChar :: Char -> Bool
+isSymbolChar c = (isAscSymbol || isUniSymbol) && not isSpecialExt
+ where
+ isSpecialExt = c `elem` "(),;[]`{}_\"'"
+ isAscSymbol = c `elem` "!#$%&⋆+./<=>?@^|-~:"
+ isUniSymbol = ord c > 127 && (isSymbol c || isPunctuation c)
+
+
+sepBy1 :: Parser a -> Parser sep -> Parser [a]
+sepBy1 p psep = do
+ x1 <- p
+ (psep >> (x1 :) <$> sepBy1 p psep) <|> return [x1]
+
+sepBy :: Parser a -> Parser sep -> Parser [a]
+sepBy p psep = sepBy1 p psep <|> return []
+
+-- | Start a new layout block at the current position. The old layout block is
+-- restored after completion of this subparser.
+startLayoutBlock :: Parser a -> Parser a
+startLayoutBlock p = do
+ ps0 <- get
+ put (ps0 { psBlkLine = psLine ps0
+ , psBlkCol = psCol ps0 })
+ res <- p
+ modify (\ps -> ps { psBlkLine = psBlkLine ps0
+ , psBlkCol = psBlkCol ps0 })
+ return res
+
+data Fatality fatal where
+ Error :: Fatality 'False
+ Fatal :: Fatality 'True
+deriving instance Show (Fatality fatal)
+
+type family FatalCtx fatal a where
+ FatalCtx 'False a = a ~ ()
+ FatalCtx 'True a = ()
+
+-- | Raise an error with the given fatality and description.
+raise :: FatalCtx fatal a => Fatality fatal -> String -> Parser a
+raise fat msg = do
+ Context { ctxFile = fp , ctxStack = stk } <- ask
+ PS { psLine = line, psCol = col } <- get
+ let fun = case fat of
+ Error -> dictate . pure
+ Fatal -> confess . pure
+ fun (ErrMsg fp stk line col msg)
+
+raise' :: Fatality fatal -> String -> Parser ()
+raise' Error = raise Error
+raise' Fatal = raise Fatal
+
+-- | Registers a scope description on the stack for error reporting.
+pushContext :: String -> Parser a -> Parser a
+pushContext descr = local (\c -> c { ctxStack = descr : ctxStack c })
+
+data BlockPos = AtLeft | InBlock
+ deriving (Show)
+
+-- | Consumes a token at the current position, asserting that we are
+-- in the position indicated by the 'BlockPos' argument. The token is defined
+-- by a pure stateful parser. If encountering a newline or EOF, the parser is
+-- run on this character ('Nothing' for EOF); if this produces a result, the
+-- result is returned; otherwise, the parser fails. The newline is not consumed.
+readToken :: BlockPos -> (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String)
+readToken bpos f s0 = do
+ case bpos of
+ AtLeft -> assertAtBlockLeft Fatal "Expected token, but found indented expression"
+ InBlock -> assertInsideBlock Fatal "Expected token, but found end of indented expression"
+ let loop :: (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String)
+ loop f' st = do
+ ps <- get
+ case psRest ps of
+ [] | Just (Left res) <- f' st Nothing -> return (res, "")
+ | otherwise -> empty
+ '\n' : _ | Just (Left res) <- f' st (Just '\n') -> return (res, "")
+ c : cs -> case f' st (Just c) of
+ Nothing -> empty
+ Just (Left res) -> return (res, "")
+ Just (Right st') -> do
+ put (ps { psCol = psCol ps + 1, psRest = cs })
+ fmap (c :) <$> loop f' st'
+ loop f s0
+
+-- | Consumes all whitespace and comments (including newlines), but only if
+-- this then leaves the parser inside the current block. If not, this fails.
+inlineWhite :: Parser ()
+inlineWhite = do
+ skipWhiteComment
+ whenM (not <$> isInsideBlock) empty
+
+-- | Consumes all whitespace and comments (including newlines). Note: this may
+-- end outside the current block.
+skipWhiteComment :: Parser ()
+skipWhiteComment = do
+ inlineSpaces
+ _ <- many (blockComment >> inlineSpaces)
+ optional_ lineComment
+ optional_ (consumeNewline >> skipWhiteComment)
+ where
+ -- | Consumes some inline whitespace. Stops before newlines.
+ inlineSpaces :: Parser ()
+ inlineSpaces = readWhileInline isSpace
+
+-- | Consumes an delimited comment including both end markers. Note: this may
+-- end outside the current block.
+blockComment :: Parser ()
+blockComment = do
+ string "{-" -- no need for pKeySym here
+ let loop = do
+ readWhileInline (`notElem` "{-") -- "-}" also starts with '-'
+ asum [string "-}"
+ ,eof >> raise Error "Unfinished {- -} comment at end of file"
+ ,blockComment >> loop
+ ,consumeNewline >> loop]
+ loop
+
+-- | Consumes a line comment marker and the rest of the line, excluding
+-- newline.
+lineComment :: Parser ()
+lineComment = do
+ -- '--!' is an operator, so we need to parse a whole symbol here.
+ pKeySym "--"
+ readWhileInline (const True)
+
+-- | Raises an error if we're not currently at the given column.
+assertAtBlockLeft :: Fatality fatal -> String -> Parser ()
+assertAtBlockLeft fat msg = whenM (not <$> isAtBlockLeft) $ raise' fat msg
+
+-- | Raises an error if psCol is not greater than psRefCol.
+assertInsideBlock :: Fatality fatal -> String -> Parser ()
+assertInsideBlock fat msg = whenM (not <$> isInsideBlock) $ raise' fat msg
+
+-- | Raises an error if we're not currently at EOF.
+assertEOF :: Fatality fatal -> Parser ()
+assertEOF fat = gets psRest >>= \case
+ [] -> return ()
+ _ -> raise' fat "Unexpected stuff"
+
+-- | Returns whether the current position is _within_ the current block, for
+-- soft-wrapping content. This means that col > blkCol.
+isInsideBlock :: Parser Bool
+isInsideBlock = do
+ ps <- get
+ return $ psLine ps >= psBlkLine ps && psCol ps > psBlkCol ps
+
+-- | Returns whether the current position is at the left border of the block;
+-- this is for list content such as function definitions or let bindings. This
+-- means that col == blkCol.
+isAtBlockLeft :: Parser Bool
+isAtBlockLeft = do
+ ps <- get
+ return $ psLine ps >= psBlkLine ps && psCol ps == psBlkCol ps
+
+-- | Consumes characters while the predicate holds or until (and excluding)
+-- a newline, whichever comes first.
+readWhileInline :: (Char -> Bool) -> Parser ()
+readWhileInline p = do
+ (taken, rest) <- span (\c -> p c && c /= '\n') <$> gets psRest
+ modify (\ps -> ps { psCol = psCol ps + length taken
+ , psRest = rest })
+
+-- | Consumes exactly one newline at the current position.
+consumeNewline :: Parser ()
+consumeNewline = gets psRest >>= \case
+ '\n' : rest -> modify (\ps -> ps { psLine = psLine ps + 1
+ , psCol = 0
+ , psRest = rest })
+ _ -> empty
+
+-- | Consumes exactly one character, unequal to newline, at the current position.
+satisfy :: (Char -> Bool) -> Parser Char
+satisfy p = do
+ -- traceM "entering satisfy"
+ r <- gets psRest
+ -- traceM "got rest"
+ r `seq` return ()
+ -- traceM "seqd rest"
+ -- traceM ("rest is " ++ show r)
+ case r of
+ c : rest | c /= '\n', p c -> do
+ modify (\ps -> ps { psCol = psCol ps + 1
+ , psRest = rest })
+ return c
+ _ -> empty
+
+-- | Consumes exactly this character at the current position. Must not be a
+-- newline.
+char :: Char -> Parser ()
+char c = string [c]
+
+-- | Consumes exactly this string at the current position. The string must not
+-- contain a newline.
+string :: String -> Parser ()
+string s | any (== '\n') s = error "Newline in 'string' argument"
+string s = do
+ ps <- get
+ if take (length s) (psRest ps) == s
+ then put (ps { psCol = psCol ps + length s
+ , psRest = drop (length s) (psRest ps) })
+ else empty
+
+lookAhead :: Parser () -> Parser ()
+lookAhead p = do
+ ps <- get
+ success <- absolve False (True <$ p)
+ put ps -- restore state, as if nothing happened
+ when (not success) empty
+
+notFollowedBy :: Parser () -> Parser ()
+notFollowedBy p = do
+ ps <- get
+ success <- absolve True (False <$ p)
+ put ps -- restore state, as if nothing happened
+ when (not success) empty
+
+-- | Only succeeds at EOF.
+eof :: Parser ()
+eof = gets psRest >>= \case [] -> return ()
+ _ -> empty
+
+whenM :: (Monad m, Monoid a) => m Bool -> m a -> m a
+whenM mb mx = mb >>= \b -> if b then mx else return mempty
+
+optional_ :: Alternative f => f a -> f ()
+optional_ a = (() <$ a) <|> pure ()
+
+someNE :: Alternative f => f a -> f (NonEmpty a)
+someNE a = (:|) <$> a <*> many a