aboutsummaryrefslogtreecommitdiff
path: root/Parser.hs
diff options
context:
space:
mode:
authorTom Smeding <tom@tomsmeding.com>2024-02-17 09:22:49 +0100
committerTom Smeding <tom@tomsmeding.com>2024-02-17 09:22:49 +0100
commit3faca807fe96f2cefa50023fe373d8bcf1430121 (patch)
tree00ee8524bb93fb589468edea0502e626f7ff3df2 /Parser.hs
parent3ef786673ff8298124cd3b5ef50c35dbb23f77e2 (diff)
Move to src/, working HLS in examples/
Diffstat (limited to 'Parser.hs')
-rw-r--r--Parser.hs824
1 files changed, 0 insertions, 824 deletions
diff --git a/Parser.hs b/Parser.hs
deleted file mode 100644
index 0f0bd0c..0000000
--- a/Parser.hs
+++ /dev/null
@@ -1,824 +0,0 @@
-{-# LANGUAGE LambdaCase #-}
-{-# LANGUAGE FlexibleInstances #-}
-{-# LANGUAGE MultiParamTypeClasses #-}
-{-# LANGUAGE TupleSections #-}
-{-# LANGUAGE DataKinds #-}
-{-# LANGUAGE GADTs #-}
-{-# LANGUAGE StandaloneDeriving #-}
-{-# LANGUAGE TypeFamilies #-}
-{-# LANGUAGE RankNTypes #-}
-{-# LANGUAGE TypeOperators #-}
--- I don't want a warning for 'head' and 'tail' in this file. But I also don't
--- want GHCs before 9.8 to complain that they don't know the x-partial warning.
-{-# OPTIONS_GHC -Wno-unrecognised-warning-flags -Wno-x-partial #-}
-module Parser (
- parse,
- printErrMsg,
- -- * Re-exports
- These(..),
-) where
-
-import Control.Applicative
-import Control.Monad
-import Control.Monad.Chronicle
-import Control.Monad.Reader
-import Control.Monad.State.Lazy
-import Data.Bifunctor (first)
-import Data.Char
-import Data.Either (partitionEithers)
-import Data.List.NonEmpty (NonEmpty(..))
-import Data.These
-import Data.Tuple (swap)
-
--- import Debug.Trace
-
-import AST
-
-
--- Positions are zero-based in both dimensions.
--- See 'isInsideBlock' and 'isAtBlockLeft' for the two relevant "inside the
--- block" conditions.
-data PS = PS
- { psBlkLine :: Int -- ^ Start line of current layout block
- , psBlkCol :: Int -- ^ Start column of current layout block
- , psLine :: Int -- ^ Current line
- , psCol :: Int -- ^ Current column
- , psRest :: String -- ^ Rest of the input
- }
- deriving (Show)
-
-data Context = Context
- { ctxFile :: FilePath
- , ctxStack :: [String] -- ^ Stack of syntax scopes, for error reporting
- }
- deriving (Show)
-
--- ReaderT Context (ChronicleT [ErrMsg] (State PS) a)
--- Context -> ChronicleT [ErrMsg] (State PS) a
--- Context -> State PS (These [ErrMsg] a)
--- Context -> PS -> Identity (These [ErrMsg] a, PS)
--- Context -> PS -> (These [ErrMsg] a, PS)
--- whereas I want:
--- Context -> PS -> These [ErrMsg] (a, PS)
--- which is not any transformer stack, but a new monad.
-newtype Parser a = Parser { runParser :: Context -> PS -> These [ErrMsg] (PS, a) }
-
-instance Functor Parser where
- fmap f (Parser g) = Parser (\ctx ps -> fmap (fmap f) (g ctx ps))
-
-instance Applicative Parser where
- pure x = Parser (\_ ps -> That (ps, x))
- (<*>) = ap
-
-instance Monad Parser where
- Parser g >>= f = Parser $ \ctx ps ->
- case g ctx ps of
- This errs -> This errs
- That (ps', x) -> runParser (f x) ctx ps'
- These errs (ps', x) -> case runParser (f x) ctx ps' of
- This errs' -> This (errs <> errs')
- That res -> These errs res
- These errs' res -> These (errs <> errs') res
-
-instance Alternative Parser where
- empty = Parser (\_ _ -> This [])
- Parser f <|> Parser g = Parser $ \ctx ps ->
- case f ctx ps of
- This _ -> g ctx ps
- success -> success
-
-instance MonadState PS Parser where
- state f = Parser $ \_ ps -> That (swap (f ps))
-
-instance MonadReader Context Parser where
- reader f = Parser $ \ctx ps -> That (ps, f ctx)
- local f (Parser g) = Parser (g . f)
-
-instance MonadChronicle [ErrMsg] Parser where
- dictate errs = Parser (\_ ps -> These errs (ps, ()))
- confess errs = Parser (\_ _ -> This errs)
- memento (Parser f) = Parser (\ctx ps -> case f ctx ps of
- This errs -> That (ps, Left errs)
- That res -> That (Right <$> res)
- These errs res -> These errs (Right <$> res))
- absolve def (Parser f) = Parser (\ctx ps -> case f ctx ps of
- This _ -> That (ps, def)
- success -> success)
- condemn (Parser f) = Parser (\ctx ps -> case f ctx ps of
- These errs _ -> This errs
- res -> res)
- retcon g (Parser f) = Parser (\ctx ps -> first g (f ctx ps))
- chronicle th = Parser (\_ ps -> (ps,) <$> th)
-
--- Positions are zero-based in both dimensions
-data ErrMsg = ErrMsg
- { errFile :: FilePath
- , errStk :: [String]
- , errLine :: Int
- , errCol :: Int
- , errMsg :: String }
- deriving (Show)
-
-printErrMsg :: ErrMsg -> String
-printErrMsg (ErrMsg fp stk y x s) =
- unlines (map (\descr -> "In " ++ descr ++ ":") (reverse stk)) ++
- fp ++ ":" ++ show (y + 1) ++ ":" ++ show (x + 1) ++ ": " ++ s
-
-parse :: FilePath -> String -> These [ErrMsg] (Program ())
-parse fp source = fmap snd $ runParser pProgram (Context fp []) (PS 0 0 0 0 source)
-
-pProgram :: Parser (Program ())
-pProgram = do
- defs <- many pTopDef
- let (datadefs, fundefs) = partitionEithers defs
- skipWhiteComment
- assertEOF Error
- return (Program datadefs fundefs)
-
-pTopDef :: Parser (Either DataDef (FunDef ()))
-pTopDef = do
- skipWhiteComment
- Left <$> pDataDef0 <|> Right <$> pFunDef0
-
-pDataDef0 :: Parser DataDef
-pDataDef0 = do
- pKeyword "data"
- inlineWhite
- name <- pIdentifier0 InBlock Uppercase
- params <- many (inlineWhite >> pIdentifier0 InBlock Lowercase)
- cons <- pDatacons "="
- return (DataDef name params cons)
- where
- pDatacons :: String -> Parser [(Name, [Type])]
- pDatacons leader = do
- inlineWhite
- pKeySym leader
- inlineWhite
- name <- pIdentifier0 InBlock Uppercase
- fields <- many pTypeAtom
- rest <- pDatacons "|" <|> return []
- return ((name, fields) : rest)
-
-pFunDef0 :: Parser (FunDef ())
-pFunDef0 = do
- mtypesig <- optional pStandaloneTypesig0
- let mname = fst <$> mtypesig
- mtype = snd <$> mtypesig
- (clauses, name) <- someClauses mname
- return (FunDef name mtype clauses)
- where
- someClauses :: Maybe Name -> Parser (NonEmpty (FunEq ()), Name)
- someClauses Nothing = do
- clause@(FunEq name _ _) <- pFunEq Nothing
- (,name) . (clause :|) <$> many (pFunEq (Just name))
- someClauses (Just name) = (,name) <$> someNE (pFunEq (Just name))
-
--- | Given the name of the type signature, if any.
-pFunEq :: Maybe Name -> Parser (FunEq ())
-pFunEq mCheckName = do
- skipWhiteComment
- assertAtBlockLeft Fatal "Expected function clause, found indented stuff"
-
- name <- pIdentifier0 AtLeft Lowercase
- case mCheckName of
- Just checkName | name /= checkName ->
- raise Fatal "Name of function clause does not correspond with type signature"
- _ -> return ()
-
- pats <- many (pPattern 11)
- rhs <- pRHS "="
- return (FunEq name pats rhs)
-
--- | Pass "=" for function definitions and "->" for case clauses.
-pRHS :: String -> Parser (RHS ())
-pRHS sepsym = do
- -- TODO: parse guards
- inlineWhite
- pKeySym sepsym
- Plain <$> pExpr
-
-pPattern :: Int -> Parser (Pattern ())
-pPattern d = inlineWhite >> pPattern0 d
-
-pPattern0 :: Int -> Parser (Pattern ())
-pPattern0 d = do
- asum [pPatWildcard0
- ,pPatVarOrAs0
- ,pPatCon0
- ,pPatList0
- ,pPatParens0]
- where
- pPatWildcard0 = pKeySym "_" >> return (PWildcard ())
- pPatVarOrAs0 = do
- var <- pIdentifier0 InBlock Lowercase
- asum [do inlineWhite
- pKeySym "@"
- p <- pPattern 11
- return (PAs () var p)
- ,return (PVar () var)]
- pPatCon0 = do
- con <- pIdentifier0 InBlock Uppercase
- if d > 0
- then return (PCon () con [])
- else do args <- many (pPattern 11)
- return (PCon () con args)
- pPatList0 = do
- char '[' -- special syntax, no need for pKeySym
- ps <- pPattern 0 `sepBy` (inlineWhite >> char ',')
- inlineWhite
- char ']'
- return (PList () ps)
- pPatParens0 = do
- char '('
- inlineWhite
- asum [do char ')'
- return (PTup () [])
- ,do p <- pPattern0 0
- inlineWhite
- asum [do char ')'
- return p
- ,do char ','
- ps <- pPattern 0 `sepBy1` (inlineWhite >> char ',')
- return (PTup () (p : ps))]]
-
-pExpr :: Parser (Expr ())
-pExpr = do
- inlineWhite
- -- basics: lit, list, var, con, tup
- -- expression atom: application of basics
- -- expression parser: op
- -- around: let, case, if
- asum [pELet0
- ,pECase0
- ,pEIf0
- ,pExprOpExpr0 0]
-
-pELet0 :: Parser (Expr ())
-pELet0 = do
- pKeyword "let"
- inlineWhite
- startLayoutBlock $ do
- -- The first occurrence is also going to skip whitespace in front,
- -- which is redundant -- but not harmful.
- defs <- many $ do
- skipWhiteComment
- -- Note: now not necessarily in the indented block. Which is
- -- precisely what we need here. If we see "in", let the 'many'
- -- choice fail so that the defs loop ends. But let it fail outside
- -- this asum so that it is the many that picks it up, not this
- -- asum.
- res <- asum [Nothing <$ lookAhead (pKeyword "in")
- ,Just <$> pFunDef0]
- case res of
- Nothing -> empty
- Just def -> return def
- inlineWhite
- body <- pExpr
- return (ELet () defs body)
-
-pECase0 :: Parser (Expr ())
-pECase0 = do
- pKeyword "case"
- e <- pExpr
- inlineWhite
- pKeyword "of"
- inlineWhite
- startLayoutBlock $ do
- -- The first clause is going to skip whitespace, but that's harmless
- -- (though redundant).
- let pClause = do
- skipWhiteComment
- whenM (not <$> isInsideBlock) (() <$ empty)
- pat <- pPattern0 0
- rhs <- pRHS "->"
- return (pat, rhs)
- clauses <- many pClause
- return (ECase () e clauses)
-
-pEIf0 :: Parser (Expr ())
-pEIf0 = do
- pKeyword "if"
- e1 <- pExpr
- inlineWhite
- pKeyword "then"
- e2 <- pExpr
- inlineWhite
- pKeyword "else"
- e3 <- pExpr
- return (EIf () e1 e2 e3)
-
-pExprOpExpr :: Int -> Parser (Expr ())
-pExprOpExpr d = inlineWhite >> pExprOpExpr0 d
-
-pExprOpExpr0 :: Int -> Parser (Expr ())
-pExprOpExpr0 d = do
- e0 <- pEApp0
- climbRight e0 Nothing
- where
- climbRight :: Expr () -> Maybe ParsedOperator -> Parser (Expr ())
- climbRight lhs mlhsop = do
- asum [do paop@(PaOp op d2 a2) <- pInfixOp
- guard (d2 >= d) -- respect global minimum precedence
- case mlhsop of -- check operator compatibility
- Just (PaOp _ d1 a1) ->
- guard (d1 > d2 || (d1 == d2 && a1 == a2 && a1 /= AssocNone))
- Nothing ->
- return ()
- let oprhsd = case a2 of AssocRight -> d2 ; _ -> d2 + 1
- rhs <- pExprOpExpr oprhsd
- climbRight (EOp () lhs op rhs) (Just paop)
- ,return lhs]
-
-pEApp0 :: Parser (Expr ())
-pEApp0 = do
- e1 <- pEAtom0
- es <- many (inlineWhite >> pEAtom0)
- case es of
- [] -> return e1
- _ -> return (EApp () e1 es)
-
-pEAtom0 :: Parser (Expr ())
-pEAtom0 = (ELit () <$> pLiteral0) <|> pEList0 <|> pEVar0 <|> pECon0 <|> pEParens0
-
-pLiteral0 :: Parser Literal
-pLiteral0 = asum
- [do as <- some (satisfy isDigit)
- let a = read as :: Integer
- asum
- [do char '.'
- bs <- some (satisfy isDigit)
- let b = read bs :: Integer
- cs <- optional $ do
- char 'e'
- cs <- some (satisfy isDigit)
- return cs
- let c = maybe 0 read cs :: Integer
- return (LFloat ((fromIntegral a + fromIntegral b / 10 ^ length bs) * 10 ^ c))
- ,return (LInt a)]
- ,do char '\''
- c <- pStringChar
- char '\''
- return (LChar c)
- ,do char '"'
- s <- many pStringChar
- char '"'
- return (LString s)]
-
-pStringChar :: Parser Char
-pStringChar = asum
- [do char '\\'
- char 'x'
- let hexdig = do
- c <- satisfy $ \c' ->
- let c = toLower c'
- in 'a' <= c && c <= 'f' || '0' <= c && c <= '9'
- return $ if 'a' <= c then 10 + ord c - ord 'a'
- else ord c - ord '0'
- digs <- some hexdig
- return (chr (sum (zipWith (*) (reverse digs) (iterate (*16) 1))))
- ,do char '\\'
- satisfy (const True) >>= \case
- 'n' -> return '\n'
- 'r' -> return '\r'
- 't' -> return '\t'
- 'a' -> return '\a'
- 'b' -> return '\b'
- '\'' -> return '\''
- '\"' -> return '\"'
- '0' -> return '\0'
- c -> do raise Error $ "Invalid escape sequence: \\" ++ [c]
- return '?'
- ,do satisfy (\c -> c `notElem` "\n\r\\\'")]
-
-pEList0 :: Parser (Expr ())
-pEList0 = do
- char '[' -- special syntax, no need for pKeySym
- es <- sepBy pExpr (inlineWhite >> char ',')
- inlineWhite
- char ']'
- return (EList () es)
-
-pEVar0 :: Parser (Expr ())
-pEVar0 = EVar () <$> pIdentifier0 InBlock Lowercase
-
-pECon0 :: Parser (Expr ())
-pECon0 = ECon () <$> pIdentifier0 InBlock Uppercase
-
-pEParens0 :: Parser (Expr ())
-pEParens0 = do
- char '('
- e <- pExpr
- inlineWhite
- char ')'
- return e
-
-data Associativity = AssocLeft | AssocRight | AssocNone
- deriving (Show, Eq)
-
-data ParsedOperator = PaOp Operator Int Associativity
- deriving (Show)
-
-pInfixOp :: Parser ParsedOperator
-pInfixOp = do
- inlineWhite
- asum [PaOp OEqu 4 AssocNone <$ pKeySym "=="
- ,PaOp OAdd 6 AssocLeft <$ pKeySym "+"
- ,PaOp OSub 6 AssocLeft <$ pKeySym "-"
- ,PaOp OMul 7 AssocLeft <$ pKeySym "*"
- ,PaOp ODiv 7 AssocLeft <$ pKeySym "/"
- ,PaOp OMod 7 AssocLeft <$ pKeySym "%"
- ,PaOp OPow 8 AssocRight <$ pKeySym "^"
- ]
-
-pStandaloneTypesig0 :: Parser (Name, Type)
-pStandaloneTypesig0 = do
- assertAtBlockLeft Fatal "Expected top-level definition, found indented stuff"
- name@(Name namestr) <- pIdentifier0 AtLeft Lowercase
- inlineWhite
- pKeySym "::"
- pushContext ("type signature for '" ++ namestr ++ "'") $ do
- ty <- pType
- return (name, ty)
-
-pType :: Parser Type
-pType = do
- ty1 <- pTypeApp
- asum [do inlineWhite
- pKeySym "->"
- ty2 <- pType
- return (TFun ty1 ty2)
- ,return ty1]
-
-pTypeApp :: Parser Type
-pTypeApp = many pTypeAtom >>= \case
- [] -> raise Fatal "Expected type"
- [t] -> return t
- t:ts -> return (TApp t ts)
-
-pTypeAtom :: Parser Type
-pTypeAtom = pTypeParens <|> pTypeList <|> pTypeName
- where
- pTypeParens = do
- inlineWhite
- char '('
- asum [do inlineWhite
- char ')'
- return (TTup [])
- ,do ty1 <- pType
- ty2s <- many $ do
- inlineWhite
- char ','
- pType
- inlineWhite
- char ')'
- case ty2s of
- [] -> return ty1
- _ -> return (TTup (ty1 : ty2s))]
-
- pTypeList = do
- inlineWhite
- char '['
- ty <- pType
- char ']'
- return (TList ty)
-
- pTypeName = do
- inlineWhite
- (cs, name) <- pIdentifier0 InBlock Don'tCare
- case cs of
- Uppercase -> return (TCon name)
- Lowercase -> return (TVar name)
-
--- | Parse the given name-like keyword, ensuring that it is the entire word.
-pKeyword :: String -> Parser ()
-pKeyword s = do
- string s
- notFollowedBy (() <$ satisfy isNameContChar)
-
--- | Parse the given symbol-like keyword, ensuring that it is the entire symbol.
-pKeySym :: String -> Parser ()
-pKeySym s = do
- string s
- notFollowedBy (() <$ satisfy isSymbolChar)
-
-data Case care where
- Uppercase :: Case 'True
- Lowercase :: Case 'True
- Don'tCare :: Case 'False
-deriving instance Show (Case care)
-
-type family WithCaseOutput care a where
- WithCaseOutput 'True a = a
- WithCaseOutput 'False a = (Case 'True, a)
-
--- | Consumes an identifier (word or parenthesised operator) at the current
--- position. The `var` production in Haskell2010.
--- var -> varid | "(" varsym ")"
-pIdentifier0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name)
-pIdentifier0 bpos cs = pAlphaName0 bpos cs <|> pParens0 (pSymbol0 bpos cs)
- where
- -- | Parser between parens, with the opening paren at the current position.
- pParens0 :: Parser a -> Parser a
- pParens0 p = do
- char '('
- inlineWhite
- res <- p
- inlineWhite
- char ')'
- return res
-
--- | Consumes a word-like name at the current position with the given case. The
--- `varid` production in Haskell2010 for 'Lowercase', `conid' for 'Uppercase'.
---
--- > varid -> (small {small | large | digit | "'"}) without reservedid
-pAlphaName0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name)
-pAlphaName0 bpos cs = do
- (_, s) <- readToken
- bpos
- (\atfst mc -> case (atfst, mc) of
- (True , Just c) | isNameHeadChar c -> Just (Right False)
- (True , _ ) -> Nothing
- (False, Just c) | isNameContChar c -> Just (Right False)
- (False, _ ) -> Just (Left ()))
- True
- (name, adjoin) <- case cs of
- Uppercase
- | isLower (head s) -> do
- raise Error "Unexpected uppercase word at this position, assuming typo"
- return (toUpper (head s) : tail s, id)
- | otherwise -> return (s, id)
- Lowercase
- | isUpper (head s) -> do
- raise Error "Unexpected lowercase word at this position, assuming typo"
- return (toLower (head s) : tail s, id)
- | otherwise -> return (s, id)
- Don'tCare
- | isLower (head s) -> return (s, (Lowercase,))
- | otherwise -> return (s, (Lowercase,))
- guard (name `notElem` ["case", "class", "data", "default", "deriving", "do", "else"
- ,"foreign", "if", "import", "in", "infix", "infixl"
- ,"infixr", "instance", "let", "module", "newtype", "of"
- ,"then", "type", "where", "_"])
- return (adjoin (Name name))
-
-isNameHeadChar :: Char -> Bool
-isNameHeadChar c = isLetter c || c == '_'
-
-isNameContChar :: Char -> Bool
-isNameContChar c = isNameHeadChar c || isDigit c || c == '\''
-
--- | Consumes a symbol at the current position. The `varsym` production in
--- Haskell2010 for 'Lowercase', `consym` otherwise, and either if 'Don'tCare'.
---
--- > varsym -> ((symbol without ":") {symbol}) without (reservedop | dashes)
--- > consym -> (":" {symbol}) without reservedop
--- > symbol -> ascSymbol | uniSymbol without (special | "_" | "\"" | "'")
--- > ascSymbol -> ```!#$%&⋆+./<=>?@^|-~:```
--- > uniSymbol -> unicode symbol or punctuation
--- > dashes -> "--" {"-"}
--- > special -> ```(),;[]`{}```
--- > reservedop -> ".." | ":" | "::" | "=" | "\" | "|" | "<-" | "->" | "@" | "~" | "=>"
-pSymbol0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name)
-pSymbol0 bpos cs = do
- case bpos of
- AtLeft -> assertAtBlockLeft Fatal "Expected symbol, but found indented expression"
- InBlock -> assertInsideBlock Fatal "Expected symbol, but found end of indented expression"
- (c1, adjoin) <-
- case cs of Lowercase -> (,id) <$> satisfy (\c -> isSymbolChar c && c /= ':')
- Uppercase -> (,id) <$> satisfy (== ':')
- Don'tCare -> do c1 <- satisfy (\c -> isSymbolChar c)
- return (c1, if c1 == ':' then (Uppercase,) else (Lowercase,))
- crest <- many (satisfy isSymbolChar)
- let name = c1 : crest
- guard (name `notElem` ["..", ":", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"])
- guard (take 2 name /= "--")
- return (adjoin (Name name))
-
-isSymbolChar :: Char -> Bool
-isSymbolChar c = (isAscSymbol || isUniSymbol) && not isSpecialExt
- where
- isSpecialExt = c `elem` "(),;[]`{}_\"'"
- isAscSymbol = c `elem` "!#$%&⋆+./<=>?@^|-~:"
- isUniSymbol = ord c > 127 && (isSymbol c || isPunctuation c)
-
-
-sepBy1 :: Parser a -> Parser sep -> Parser [a]
-sepBy1 p psep = do
- x1 <- p
- (psep >> (x1 :) <$> sepBy1 p psep) <|> return [x1]
-
-sepBy :: Parser a -> Parser sep -> Parser [a]
-sepBy p psep = sepBy1 p psep <|> return []
-
--- | Start a new layout block at the current position. The old layout block is
--- restored after completion of this subparser.
-startLayoutBlock :: Parser a -> Parser a
-startLayoutBlock p = do
- ps0 <- get
- put (ps0 { psBlkLine = psLine ps0
- , psBlkCol = psCol ps0 })
- res <- p
- modify (\ps -> ps { psBlkLine = psBlkLine ps0
- , psBlkCol = psBlkCol ps0 })
- return res
-
-data Fatality fatal where
- Error :: Fatality 'False
- Fatal :: Fatality 'True
-deriving instance Show (Fatality fatal)
-
-type family FatalCtx fatal a where
- FatalCtx 'False a = a ~ ()
- FatalCtx 'True a = ()
-
--- | Raise an error with the given fatality and description.
-raise :: FatalCtx fatal a => Fatality fatal -> String -> Parser a
-raise fat msg = do
- Context { ctxFile = fp , ctxStack = stk } <- ask
- PS { psLine = line, psCol = col } <- get
- let fun = case fat of
- Error -> dictate . pure
- Fatal -> confess . pure
- fun (ErrMsg fp stk line col msg)
-
-raise' :: Fatality fatal -> String -> Parser ()
-raise' Error = raise Error
-raise' Fatal = raise Fatal
-
--- | Registers a scope description on the stack for error reporting.
-pushContext :: String -> Parser a -> Parser a
-pushContext descr = local (\c -> c { ctxStack = descr : ctxStack c })
-
-data BlockPos = AtLeft | InBlock
- deriving (Show)
-
--- | Consumes a token at the current position, asserting that we are
--- in the position indicated by the 'BlockPos' argument. The token is defined
--- by a pure stateful parser. If encountering a newline or EOF, the parser is
--- run on this character ('Nothing' for EOF); if this produces a result, the
--- result is returned; otherwise, the parser fails. The newline is not consumed.
-readToken :: BlockPos -> (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String)
-readToken bpos f s0 = do
- case bpos of
- AtLeft -> assertAtBlockLeft Fatal "Expected token, but found indented expression"
- InBlock -> assertInsideBlock Fatal "Expected token, but found end of indented expression"
- let loop :: (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String)
- loop f' st = do
- ps <- get
- case psRest ps of
- [] | Just (Left res) <- f' st Nothing -> return (res, "")
- | otherwise -> empty
- '\n' : _ | Just (Left res) <- f' st (Just '\n') -> return (res, "")
- c : cs -> case f' st (Just c) of
- Nothing -> empty
- Just (Left res) -> return (res, "")
- Just (Right st') -> do
- put (ps { psCol = psCol ps + 1, psRest = cs })
- fmap (c :) <$> loop f' st'
- loop f s0
-
--- | Consumes all whitespace and comments (including newlines), but only if
--- this then leaves the parser inside the current block. If not, this fails.
-inlineWhite :: Parser ()
-inlineWhite = do
- skipWhiteComment
- whenM (not <$> isInsideBlock) empty
-
--- | Consumes all whitespace and comments (including newlines). Note: this may
--- end outside the current block.
-skipWhiteComment :: Parser ()
-skipWhiteComment = do
- inlineSpaces
- _ <- many (blockComment >> inlineSpaces)
- optional_ lineComment
- optional_ (consumeNewline >> skipWhiteComment)
- where
- -- | Consumes some inline whitespace. Stops before newlines.
- inlineSpaces :: Parser ()
- inlineSpaces = readWhileInline isSpace
-
--- | Consumes an delimited comment including both end markers. Note: this may
--- end outside the current block.
-blockComment :: Parser ()
-blockComment = do
- string "{-" -- no need for pKeySym here
- let loop = do
- readWhileInline (`notElem` "{-") -- "-}" also starts with '-'
- asum [string "-}"
- ,eof >> raise Error "Unfinished {- -} comment at end of file"
- ,blockComment >> loop
- ,consumeNewline >> loop]
- loop
-
--- | Consumes a line comment marker and the rest of the line, excluding
--- newline.
-lineComment :: Parser ()
-lineComment = do
- -- '--!' is an operator, so we need to parse a whole symbol here.
- pKeySym "--"
- readWhileInline (const True)
-
--- | Raises an error if we're not currently at the given column.
-assertAtBlockLeft :: Fatality fatal -> String -> Parser ()
-assertAtBlockLeft fat msg = whenM (not <$> isAtBlockLeft) $ raise' fat msg
-
--- | Raises an error if psCol is not greater than psRefCol.
-assertInsideBlock :: Fatality fatal -> String -> Parser ()
-assertInsideBlock fat msg = whenM (not <$> isInsideBlock) $ raise' fat msg
-
--- | Raises an error if we're not currently at EOF.
-assertEOF :: Fatality fatal -> Parser ()
-assertEOF fat = gets psRest >>= \case
- [] -> return ()
- _ -> raise' fat "Unexpected stuff"
-
--- | Returns whether the current position is _within_ the current block, for
--- soft-wrapping content. This means that col > blkCol.
-isInsideBlock :: Parser Bool
-isInsideBlock = do
- ps <- get
- return $ psLine ps >= psBlkLine ps && psCol ps > psBlkCol ps
-
--- | Returns whether the current position is at the left border of the block;
--- this is for list content such as function definitions or let bindings. This
--- means that col == blkCol.
-isAtBlockLeft :: Parser Bool
-isAtBlockLeft = do
- ps <- get
- return $ psLine ps >= psBlkLine ps && psCol ps == psBlkCol ps
-
--- | Consumes characters while the predicate holds or until (and excluding)
--- a newline, whichever comes first.
-readWhileInline :: (Char -> Bool) -> Parser ()
-readWhileInline p = do
- (taken, rest) <- span (\c -> p c && c /= '\n') <$> gets psRest
- modify (\ps -> ps { psCol = psCol ps + length taken
- , psRest = rest })
-
--- | Consumes exactly one newline at the current position.
-consumeNewline :: Parser ()
-consumeNewline = gets psRest >>= \case
- '\n' : rest -> modify (\ps -> ps { psLine = psLine ps + 1
- , psCol = 0
- , psRest = rest })
- _ -> empty
-
--- | Consumes exactly one character, unequal to newline, at the current position.
-satisfy :: (Char -> Bool) -> Parser Char
-satisfy p = do
- -- traceM "entering satisfy"
- r <- gets psRest
- -- traceM "got rest"
- r `seq` return ()
- -- traceM "seqd rest"
- -- traceM ("rest is " ++ show r)
- case r of
- c : rest | c /= '\n', p c -> do
- modify (\ps -> ps { psCol = psCol ps + 1
- , psRest = rest })
- return c
- _ -> empty
-
--- | Consumes exactly this character at the current position. Must not be a
--- newline.
-char :: Char -> Parser ()
-char c = string [c]
-
--- | Consumes exactly this string at the current position. The string must not
--- contain a newline.
-string :: String -> Parser ()
-string s | any (== '\n') s = error "Newline in 'string' argument"
-string s = do
- ps <- get
- if take (length s) (psRest ps) == s
- then put (ps { psCol = psCol ps + length s
- , psRest = drop (length s) (psRest ps) })
- else empty
-
-lookAhead :: Parser () -> Parser ()
-lookAhead p = do
- ps <- get
- success <- absolve False (True <$ p)
- put ps -- restore state, as if nothing happened
- when (not success) empty
-
-notFollowedBy :: Parser () -> Parser ()
-notFollowedBy p = do
- ps <- get
- success <- absolve True (False <$ p)
- put ps -- restore state, as if nothing happened
- when (not success) empty
-
--- | Only succeeds at EOF.
-eof :: Parser ()
-eof = gets psRest >>= \case [] -> return ()
- _ -> empty
-
-whenM :: (Monad m, Monoid a) => m Bool -> m a -> m a
-whenM mb mx = mb >>= \b -> if b then mx else return mempty
-
-optional_ :: Alternative f => f a -> f ()
-optional_ a = (() <$ a) <|> pure ()
-
-someNE :: Alternative f => f a -> f (NonEmpty a)
-someNE a = (:|) <$> a <*> many a