diff options
author | Tom Smeding <tom@tomsmeding.com> | 2024-02-17 09:22:49 +0100 |
---|---|---|
committer | Tom Smeding <tom@tomsmeding.com> | 2024-02-17 09:22:49 +0100 |
commit | 3faca807fe96f2cefa50023fe373d8bcf1430121 (patch) | |
tree | 00ee8524bb93fb589468edea0502e626f7ff3df2 /Parser.hs | |
parent | 3ef786673ff8298124cd3b5ef50c35dbb23f77e2 (diff) |
Move to src/, working HLS in examples/
Diffstat (limited to 'Parser.hs')
-rw-r--r-- | Parser.hs | 824 |
1 files changed, 0 insertions, 824 deletions
diff --git a/Parser.hs b/Parser.hs deleted file mode 100644 index 0f0bd0c..0000000 --- a/Parser.hs +++ /dev/null @@ -1,824 +0,0 @@ -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE MultiParamTypeClasses #-} -{-# LANGUAGE TupleSections #-} -{-# LANGUAGE DataKinds #-} -{-# LANGUAGE GADTs #-} -{-# LANGUAGE StandaloneDeriving #-} -{-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE RankNTypes #-} -{-# LANGUAGE TypeOperators #-} --- I don't want a warning for 'head' and 'tail' in this file. But I also don't --- want GHCs before 9.8 to complain that they don't know the x-partial warning. -{-# OPTIONS_GHC -Wno-unrecognised-warning-flags -Wno-x-partial #-} -module Parser ( - parse, - printErrMsg, - -- * Re-exports - These(..), -) where - -import Control.Applicative -import Control.Monad -import Control.Monad.Chronicle -import Control.Monad.Reader -import Control.Monad.State.Lazy -import Data.Bifunctor (first) -import Data.Char -import Data.Either (partitionEithers) -import Data.List.NonEmpty (NonEmpty(..)) -import Data.These -import Data.Tuple (swap) - --- import Debug.Trace - -import AST - - --- Positions are zero-based in both dimensions. --- See 'isInsideBlock' and 'isAtBlockLeft' for the two relevant "inside the --- block" conditions. -data PS = PS - { psBlkLine :: Int -- ^ Start line of current layout block - , psBlkCol :: Int -- ^ Start column of current layout block - , psLine :: Int -- ^ Current line - , psCol :: Int -- ^ Current column - , psRest :: String -- ^ Rest of the input - } - deriving (Show) - -data Context = Context - { ctxFile :: FilePath - , ctxStack :: [String] -- ^ Stack of syntax scopes, for error reporting - } - deriving (Show) - --- ReaderT Context (ChronicleT [ErrMsg] (State PS) a) --- Context -> ChronicleT [ErrMsg] (State PS) a --- Context -> State PS (These [ErrMsg] a) --- Context -> PS -> Identity (These [ErrMsg] a, PS) --- Context -> PS -> (These [ErrMsg] a, PS) --- whereas I want: --- Context -> PS -> These [ErrMsg] (a, PS) --- which is not any transformer stack, but a new monad. -newtype Parser a = Parser { runParser :: Context -> PS -> These [ErrMsg] (PS, a) } - -instance Functor Parser where - fmap f (Parser g) = Parser (\ctx ps -> fmap (fmap f) (g ctx ps)) - -instance Applicative Parser where - pure x = Parser (\_ ps -> That (ps, x)) - (<*>) = ap - -instance Monad Parser where - Parser g >>= f = Parser $ \ctx ps -> - case g ctx ps of - This errs -> This errs - That (ps', x) -> runParser (f x) ctx ps' - These errs (ps', x) -> case runParser (f x) ctx ps' of - This errs' -> This (errs <> errs') - That res -> These errs res - These errs' res -> These (errs <> errs') res - -instance Alternative Parser where - empty = Parser (\_ _ -> This []) - Parser f <|> Parser g = Parser $ \ctx ps -> - case f ctx ps of - This _ -> g ctx ps - success -> success - -instance MonadState PS Parser where - state f = Parser $ \_ ps -> That (swap (f ps)) - -instance MonadReader Context Parser where - reader f = Parser $ \ctx ps -> That (ps, f ctx) - local f (Parser g) = Parser (g . f) - -instance MonadChronicle [ErrMsg] Parser where - dictate errs = Parser (\_ ps -> These errs (ps, ())) - confess errs = Parser (\_ _ -> This errs) - memento (Parser f) = Parser (\ctx ps -> case f ctx ps of - This errs -> That (ps, Left errs) - That res -> That (Right <$> res) - These errs res -> These errs (Right <$> res)) - absolve def (Parser f) = Parser (\ctx ps -> case f ctx ps of - This _ -> That (ps, def) - success -> success) - condemn (Parser f) = Parser (\ctx ps -> case f ctx ps of - These errs _ -> This errs - res -> res) - retcon g (Parser f) = Parser (\ctx ps -> first g (f ctx ps)) - chronicle th = Parser (\_ ps -> (ps,) <$> th) - --- Positions are zero-based in both dimensions -data ErrMsg = ErrMsg - { errFile :: FilePath - , errStk :: [String] - , errLine :: Int - , errCol :: Int - , errMsg :: String } - deriving (Show) - -printErrMsg :: ErrMsg -> String -printErrMsg (ErrMsg fp stk y x s) = - unlines (map (\descr -> "In " ++ descr ++ ":") (reverse stk)) ++ - fp ++ ":" ++ show (y + 1) ++ ":" ++ show (x + 1) ++ ": " ++ s - -parse :: FilePath -> String -> These [ErrMsg] (Program ()) -parse fp source = fmap snd $ runParser pProgram (Context fp []) (PS 0 0 0 0 source) - -pProgram :: Parser (Program ()) -pProgram = do - defs <- many pTopDef - let (datadefs, fundefs) = partitionEithers defs - skipWhiteComment - assertEOF Error - return (Program datadefs fundefs) - -pTopDef :: Parser (Either DataDef (FunDef ())) -pTopDef = do - skipWhiteComment - Left <$> pDataDef0 <|> Right <$> pFunDef0 - -pDataDef0 :: Parser DataDef -pDataDef0 = do - pKeyword "data" - inlineWhite - name <- pIdentifier0 InBlock Uppercase - params <- many (inlineWhite >> pIdentifier0 InBlock Lowercase) - cons <- pDatacons "=" - return (DataDef name params cons) - where - pDatacons :: String -> Parser [(Name, [Type])] - pDatacons leader = do - inlineWhite - pKeySym leader - inlineWhite - name <- pIdentifier0 InBlock Uppercase - fields <- many pTypeAtom - rest <- pDatacons "|" <|> return [] - return ((name, fields) : rest) - -pFunDef0 :: Parser (FunDef ()) -pFunDef0 = do - mtypesig <- optional pStandaloneTypesig0 - let mname = fst <$> mtypesig - mtype = snd <$> mtypesig - (clauses, name) <- someClauses mname - return (FunDef name mtype clauses) - where - someClauses :: Maybe Name -> Parser (NonEmpty (FunEq ()), Name) - someClauses Nothing = do - clause@(FunEq name _ _) <- pFunEq Nothing - (,name) . (clause :|) <$> many (pFunEq (Just name)) - someClauses (Just name) = (,name) <$> someNE (pFunEq (Just name)) - --- | Given the name of the type signature, if any. -pFunEq :: Maybe Name -> Parser (FunEq ()) -pFunEq mCheckName = do - skipWhiteComment - assertAtBlockLeft Fatal "Expected function clause, found indented stuff" - - name <- pIdentifier0 AtLeft Lowercase - case mCheckName of - Just checkName | name /= checkName -> - raise Fatal "Name of function clause does not correspond with type signature" - _ -> return () - - pats <- many (pPattern 11) - rhs <- pRHS "=" - return (FunEq name pats rhs) - --- | Pass "=" for function definitions and "->" for case clauses. -pRHS :: String -> Parser (RHS ()) -pRHS sepsym = do - -- TODO: parse guards - inlineWhite - pKeySym sepsym - Plain <$> pExpr - -pPattern :: Int -> Parser (Pattern ()) -pPattern d = inlineWhite >> pPattern0 d - -pPattern0 :: Int -> Parser (Pattern ()) -pPattern0 d = do - asum [pPatWildcard0 - ,pPatVarOrAs0 - ,pPatCon0 - ,pPatList0 - ,pPatParens0] - where - pPatWildcard0 = pKeySym "_" >> return (PWildcard ()) - pPatVarOrAs0 = do - var <- pIdentifier0 InBlock Lowercase - asum [do inlineWhite - pKeySym "@" - p <- pPattern 11 - return (PAs () var p) - ,return (PVar () var)] - pPatCon0 = do - con <- pIdentifier0 InBlock Uppercase - if d > 0 - then return (PCon () con []) - else do args <- many (pPattern 11) - return (PCon () con args) - pPatList0 = do - char '[' -- special syntax, no need for pKeySym - ps <- pPattern 0 `sepBy` (inlineWhite >> char ',') - inlineWhite - char ']' - return (PList () ps) - pPatParens0 = do - char '(' - inlineWhite - asum [do char ')' - return (PTup () []) - ,do p <- pPattern0 0 - inlineWhite - asum [do char ')' - return p - ,do char ',' - ps <- pPattern 0 `sepBy1` (inlineWhite >> char ',') - return (PTup () (p : ps))]] - -pExpr :: Parser (Expr ()) -pExpr = do - inlineWhite - -- basics: lit, list, var, con, tup - -- expression atom: application of basics - -- expression parser: op - -- around: let, case, if - asum [pELet0 - ,pECase0 - ,pEIf0 - ,pExprOpExpr0 0] - -pELet0 :: Parser (Expr ()) -pELet0 = do - pKeyword "let" - inlineWhite - startLayoutBlock $ do - -- The first occurrence is also going to skip whitespace in front, - -- which is redundant -- but not harmful. - defs <- many $ do - skipWhiteComment - -- Note: now not necessarily in the indented block. Which is - -- precisely what we need here. If we see "in", let the 'many' - -- choice fail so that the defs loop ends. But let it fail outside - -- this asum so that it is the many that picks it up, not this - -- asum. - res <- asum [Nothing <$ lookAhead (pKeyword "in") - ,Just <$> pFunDef0] - case res of - Nothing -> empty - Just def -> return def - inlineWhite - body <- pExpr - return (ELet () defs body) - -pECase0 :: Parser (Expr ()) -pECase0 = do - pKeyword "case" - e <- pExpr - inlineWhite - pKeyword "of" - inlineWhite - startLayoutBlock $ do - -- The first clause is going to skip whitespace, but that's harmless - -- (though redundant). - let pClause = do - skipWhiteComment - whenM (not <$> isInsideBlock) (() <$ empty) - pat <- pPattern0 0 - rhs <- pRHS "->" - return (pat, rhs) - clauses <- many pClause - return (ECase () e clauses) - -pEIf0 :: Parser (Expr ()) -pEIf0 = do - pKeyword "if" - e1 <- pExpr - inlineWhite - pKeyword "then" - e2 <- pExpr - inlineWhite - pKeyword "else" - e3 <- pExpr - return (EIf () e1 e2 e3) - -pExprOpExpr :: Int -> Parser (Expr ()) -pExprOpExpr d = inlineWhite >> pExprOpExpr0 d - -pExprOpExpr0 :: Int -> Parser (Expr ()) -pExprOpExpr0 d = do - e0 <- pEApp0 - climbRight e0 Nothing - where - climbRight :: Expr () -> Maybe ParsedOperator -> Parser (Expr ()) - climbRight lhs mlhsop = do - asum [do paop@(PaOp op d2 a2) <- pInfixOp - guard (d2 >= d) -- respect global minimum precedence - case mlhsop of -- check operator compatibility - Just (PaOp _ d1 a1) -> - guard (d1 > d2 || (d1 == d2 && a1 == a2 && a1 /= AssocNone)) - Nothing -> - return () - let oprhsd = case a2 of AssocRight -> d2 ; _ -> d2 + 1 - rhs <- pExprOpExpr oprhsd - climbRight (EOp () lhs op rhs) (Just paop) - ,return lhs] - -pEApp0 :: Parser (Expr ()) -pEApp0 = do - e1 <- pEAtom0 - es <- many (inlineWhite >> pEAtom0) - case es of - [] -> return e1 - _ -> return (EApp () e1 es) - -pEAtom0 :: Parser (Expr ()) -pEAtom0 = (ELit () <$> pLiteral0) <|> pEList0 <|> pEVar0 <|> pECon0 <|> pEParens0 - -pLiteral0 :: Parser Literal -pLiteral0 = asum - [do as <- some (satisfy isDigit) - let a = read as :: Integer - asum - [do char '.' - bs <- some (satisfy isDigit) - let b = read bs :: Integer - cs <- optional $ do - char 'e' - cs <- some (satisfy isDigit) - return cs - let c = maybe 0 read cs :: Integer - return (LFloat ((fromIntegral a + fromIntegral b / 10 ^ length bs) * 10 ^ c)) - ,return (LInt a)] - ,do char '\'' - c <- pStringChar - char '\'' - return (LChar c) - ,do char '"' - s <- many pStringChar - char '"' - return (LString s)] - -pStringChar :: Parser Char -pStringChar = asum - [do char '\\' - char 'x' - let hexdig = do - c <- satisfy $ \c' -> - let c = toLower c' - in 'a' <= c && c <= 'f' || '0' <= c && c <= '9' - return $ if 'a' <= c then 10 + ord c - ord 'a' - else ord c - ord '0' - digs <- some hexdig - return (chr (sum (zipWith (*) (reverse digs) (iterate (*16) 1)))) - ,do char '\\' - satisfy (const True) >>= \case - 'n' -> return '\n' - 'r' -> return '\r' - 't' -> return '\t' - 'a' -> return '\a' - 'b' -> return '\b' - '\'' -> return '\'' - '\"' -> return '\"' - '0' -> return '\0' - c -> do raise Error $ "Invalid escape sequence: \\" ++ [c] - return '?' - ,do satisfy (\c -> c `notElem` "\n\r\\\'")] - -pEList0 :: Parser (Expr ()) -pEList0 = do - char '[' -- special syntax, no need for pKeySym - es <- sepBy pExpr (inlineWhite >> char ',') - inlineWhite - char ']' - return (EList () es) - -pEVar0 :: Parser (Expr ()) -pEVar0 = EVar () <$> pIdentifier0 InBlock Lowercase - -pECon0 :: Parser (Expr ()) -pECon0 = ECon () <$> pIdentifier0 InBlock Uppercase - -pEParens0 :: Parser (Expr ()) -pEParens0 = do - char '(' - e <- pExpr - inlineWhite - char ')' - return e - -data Associativity = AssocLeft | AssocRight | AssocNone - deriving (Show, Eq) - -data ParsedOperator = PaOp Operator Int Associativity - deriving (Show) - -pInfixOp :: Parser ParsedOperator -pInfixOp = do - inlineWhite - asum [PaOp OEqu 4 AssocNone <$ pKeySym "==" - ,PaOp OAdd 6 AssocLeft <$ pKeySym "+" - ,PaOp OSub 6 AssocLeft <$ pKeySym "-" - ,PaOp OMul 7 AssocLeft <$ pKeySym "*" - ,PaOp ODiv 7 AssocLeft <$ pKeySym "/" - ,PaOp OMod 7 AssocLeft <$ pKeySym "%" - ,PaOp OPow 8 AssocRight <$ pKeySym "^" - ] - -pStandaloneTypesig0 :: Parser (Name, Type) -pStandaloneTypesig0 = do - assertAtBlockLeft Fatal "Expected top-level definition, found indented stuff" - name@(Name namestr) <- pIdentifier0 AtLeft Lowercase - inlineWhite - pKeySym "::" - pushContext ("type signature for '" ++ namestr ++ "'") $ do - ty <- pType - return (name, ty) - -pType :: Parser Type -pType = do - ty1 <- pTypeApp - asum [do inlineWhite - pKeySym "->" - ty2 <- pType - return (TFun ty1 ty2) - ,return ty1] - -pTypeApp :: Parser Type -pTypeApp = many pTypeAtom >>= \case - [] -> raise Fatal "Expected type" - [t] -> return t - t:ts -> return (TApp t ts) - -pTypeAtom :: Parser Type -pTypeAtom = pTypeParens <|> pTypeList <|> pTypeName - where - pTypeParens = do - inlineWhite - char '(' - asum [do inlineWhite - char ')' - return (TTup []) - ,do ty1 <- pType - ty2s <- many $ do - inlineWhite - char ',' - pType - inlineWhite - char ')' - case ty2s of - [] -> return ty1 - _ -> return (TTup (ty1 : ty2s))] - - pTypeList = do - inlineWhite - char '[' - ty <- pType - char ']' - return (TList ty) - - pTypeName = do - inlineWhite - (cs, name) <- pIdentifier0 InBlock Don'tCare - case cs of - Uppercase -> return (TCon name) - Lowercase -> return (TVar name) - --- | Parse the given name-like keyword, ensuring that it is the entire word. -pKeyword :: String -> Parser () -pKeyword s = do - string s - notFollowedBy (() <$ satisfy isNameContChar) - --- | Parse the given symbol-like keyword, ensuring that it is the entire symbol. -pKeySym :: String -> Parser () -pKeySym s = do - string s - notFollowedBy (() <$ satisfy isSymbolChar) - -data Case care where - Uppercase :: Case 'True - Lowercase :: Case 'True - Don'tCare :: Case 'False -deriving instance Show (Case care) - -type family WithCaseOutput care a where - WithCaseOutput 'True a = a - WithCaseOutput 'False a = (Case 'True, a) - --- | Consumes an identifier (word or parenthesised operator) at the current --- position. The `var` production in Haskell2010. --- var -> varid | "(" varsym ")" -pIdentifier0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name) -pIdentifier0 bpos cs = pAlphaName0 bpos cs <|> pParens0 (pSymbol0 bpos cs) - where - -- | Parser between parens, with the opening paren at the current position. - pParens0 :: Parser a -> Parser a - pParens0 p = do - char '(' - inlineWhite - res <- p - inlineWhite - char ')' - return res - --- | Consumes a word-like name at the current position with the given case. The --- `varid` production in Haskell2010 for 'Lowercase', `conid' for 'Uppercase'. --- --- > varid -> (small {small | large | digit | "'"}) without reservedid -pAlphaName0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name) -pAlphaName0 bpos cs = do - (_, s) <- readToken - bpos - (\atfst mc -> case (atfst, mc) of - (True , Just c) | isNameHeadChar c -> Just (Right False) - (True , _ ) -> Nothing - (False, Just c) | isNameContChar c -> Just (Right False) - (False, _ ) -> Just (Left ())) - True - (name, adjoin) <- case cs of - Uppercase - | isLower (head s) -> do - raise Error "Unexpected uppercase word at this position, assuming typo" - return (toUpper (head s) : tail s, id) - | otherwise -> return (s, id) - Lowercase - | isUpper (head s) -> do - raise Error "Unexpected lowercase word at this position, assuming typo" - return (toLower (head s) : tail s, id) - | otherwise -> return (s, id) - Don'tCare - | isLower (head s) -> return (s, (Lowercase,)) - | otherwise -> return (s, (Lowercase,)) - guard (name `notElem` ["case", "class", "data", "default", "deriving", "do", "else" - ,"foreign", "if", "import", "in", "infix", "infixl" - ,"infixr", "instance", "let", "module", "newtype", "of" - ,"then", "type", "where", "_"]) - return (adjoin (Name name)) - -isNameHeadChar :: Char -> Bool -isNameHeadChar c = isLetter c || c == '_' - -isNameContChar :: Char -> Bool -isNameContChar c = isNameHeadChar c || isDigit c || c == '\'' - --- | Consumes a symbol at the current position. The `varsym` production in --- Haskell2010 for 'Lowercase', `consym` otherwise, and either if 'Don'tCare'. --- --- > varsym -> ((symbol without ":") {symbol}) without (reservedop | dashes) --- > consym -> (":" {symbol}) without reservedop --- > symbol -> ascSymbol | uniSymbol without (special | "_" | "\"" | "'") --- > ascSymbol -> ```!#$%&⋆+./<=>?@^|-~:``` --- > uniSymbol -> unicode symbol or punctuation --- > dashes -> "--" {"-"} --- > special -> ```(),;[]`{}``` --- > reservedop -> ".." | ":" | "::" | "=" | "\" | "|" | "<-" | "->" | "@" | "~" | "=>" -pSymbol0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name) -pSymbol0 bpos cs = do - case bpos of - AtLeft -> assertAtBlockLeft Fatal "Expected symbol, but found indented expression" - InBlock -> assertInsideBlock Fatal "Expected symbol, but found end of indented expression" - (c1, adjoin) <- - case cs of Lowercase -> (,id) <$> satisfy (\c -> isSymbolChar c && c /= ':') - Uppercase -> (,id) <$> satisfy (== ':') - Don'tCare -> do c1 <- satisfy (\c -> isSymbolChar c) - return (c1, if c1 == ':' then (Uppercase,) else (Lowercase,)) - crest <- many (satisfy isSymbolChar) - let name = c1 : crest - guard (name `notElem` ["..", ":", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]) - guard (take 2 name /= "--") - return (adjoin (Name name)) - -isSymbolChar :: Char -> Bool -isSymbolChar c = (isAscSymbol || isUniSymbol) && not isSpecialExt - where - isSpecialExt = c `elem` "(),;[]`{}_\"'" - isAscSymbol = c `elem` "!#$%&⋆+./<=>?@^|-~:" - isUniSymbol = ord c > 127 && (isSymbol c || isPunctuation c) - - -sepBy1 :: Parser a -> Parser sep -> Parser [a] -sepBy1 p psep = do - x1 <- p - (psep >> (x1 :) <$> sepBy1 p psep) <|> return [x1] - -sepBy :: Parser a -> Parser sep -> Parser [a] -sepBy p psep = sepBy1 p psep <|> return [] - --- | Start a new layout block at the current position. The old layout block is --- restored after completion of this subparser. -startLayoutBlock :: Parser a -> Parser a -startLayoutBlock p = do - ps0 <- get - put (ps0 { psBlkLine = psLine ps0 - , psBlkCol = psCol ps0 }) - res <- p - modify (\ps -> ps { psBlkLine = psBlkLine ps0 - , psBlkCol = psBlkCol ps0 }) - return res - -data Fatality fatal where - Error :: Fatality 'False - Fatal :: Fatality 'True -deriving instance Show (Fatality fatal) - -type family FatalCtx fatal a where - FatalCtx 'False a = a ~ () - FatalCtx 'True a = () - --- | Raise an error with the given fatality and description. -raise :: FatalCtx fatal a => Fatality fatal -> String -> Parser a -raise fat msg = do - Context { ctxFile = fp , ctxStack = stk } <- ask - PS { psLine = line, psCol = col } <- get - let fun = case fat of - Error -> dictate . pure - Fatal -> confess . pure - fun (ErrMsg fp stk line col msg) - -raise' :: Fatality fatal -> String -> Parser () -raise' Error = raise Error -raise' Fatal = raise Fatal - --- | Registers a scope description on the stack for error reporting. -pushContext :: String -> Parser a -> Parser a -pushContext descr = local (\c -> c { ctxStack = descr : ctxStack c }) - -data BlockPos = AtLeft | InBlock - deriving (Show) - --- | Consumes a token at the current position, asserting that we are --- in the position indicated by the 'BlockPos' argument. The token is defined --- by a pure stateful parser. If encountering a newline or EOF, the parser is --- run on this character ('Nothing' for EOF); if this produces a result, the --- result is returned; otherwise, the parser fails. The newline is not consumed. -readToken :: BlockPos -> (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String) -readToken bpos f s0 = do - case bpos of - AtLeft -> assertAtBlockLeft Fatal "Expected token, but found indented expression" - InBlock -> assertInsideBlock Fatal "Expected token, but found end of indented expression" - let loop :: (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String) - loop f' st = do - ps <- get - case psRest ps of - [] | Just (Left res) <- f' st Nothing -> return (res, "") - | otherwise -> empty - '\n' : _ | Just (Left res) <- f' st (Just '\n') -> return (res, "") - c : cs -> case f' st (Just c) of - Nothing -> empty - Just (Left res) -> return (res, "") - Just (Right st') -> do - put (ps { psCol = psCol ps + 1, psRest = cs }) - fmap (c :) <$> loop f' st' - loop f s0 - --- | Consumes all whitespace and comments (including newlines), but only if --- this then leaves the parser inside the current block. If not, this fails. -inlineWhite :: Parser () -inlineWhite = do - skipWhiteComment - whenM (not <$> isInsideBlock) empty - --- | Consumes all whitespace and comments (including newlines). Note: this may --- end outside the current block. -skipWhiteComment :: Parser () -skipWhiteComment = do - inlineSpaces - _ <- many (blockComment >> inlineSpaces) - optional_ lineComment - optional_ (consumeNewline >> skipWhiteComment) - where - -- | Consumes some inline whitespace. Stops before newlines. - inlineSpaces :: Parser () - inlineSpaces = readWhileInline isSpace - --- | Consumes an delimited comment including both end markers. Note: this may --- end outside the current block. -blockComment :: Parser () -blockComment = do - string "{-" -- no need for pKeySym here - let loop = do - readWhileInline (`notElem` "{-") -- "-}" also starts with '-' - asum [string "-}" - ,eof >> raise Error "Unfinished {- -} comment at end of file" - ,blockComment >> loop - ,consumeNewline >> loop] - loop - --- | Consumes a line comment marker and the rest of the line, excluding --- newline. -lineComment :: Parser () -lineComment = do - -- '--!' is an operator, so we need to parse a whole symbol here. - pKeySym "--" - readWhileInline (const True) - --- | Raises an error if we're not currently at the given column. -assertAtBlockLeft :: Fatality fatal -> String -> Parser () -assertAtBlockLeft fat msg = whenM (not <$> isAtBlockLeft) $ raise' fat msg - --- | Raises an error if psCol is not greater than psRefCol. -assertInsideBlock :: Fatality fatal -> String -> Parser () -assertInsideBlock fat msg = whenM (not <$> isInsideBlock) $ raise' fat msg - --- | Raises an error if we're not currently at EOF. -assertEOF :: Fatality fatal -> Parser () -assertEOF fat = gets psRest >>= \case - [] -> return () - _ -> raise' fat "Unexpected stuff" - --- | Returns whether the current position is _within_ the current block, for --- soft-wrapping content. This means that col > blkCol. -isInsideBlock :: Parser Bool -isInsideBlock = do - ps <- get - return $ psLine ps >= psBlkLine ps && psCol ps > psBlkCol ps - --- | Returns whether the current position is at the left border of the block; --- this is for list content such as function definitions or let bindings. This --- means that col == blkCol. -isAtBlockLeft :: Parser Bool -isAtBlockLeft = do - ps <- get - return $ psLine ps >= psBlkLine ps && psCol ps == psBlkCol ps - --- | Consumes characters while the predicate holds or until (and excluding) --- a newline, whichever comes first. -readWhileInline :: (Char -> Bool) -> Parser () -readWhileInline p = do - (taken, rest) <- span (\c -> p c && c /= '\n') <$> gets psRest - modify (\ps -> ps { psCol = psCol ps + length taken - , psRest = rest }) - --- | Consumes exactly one newline at the current position. -consumeNewline :: Parser () -consumeNewline = gets psRest >>= \case - '\n' : rest -> modify (\ps -> ps { psLine = psLine ps + 1 - , psCol = 0 - , psRest = rest }) - _ -> empty - --- | Consumes exactly one character, unequal to newline, at the current position. -satisfy :: (Char -> Bool) -> Parser Char -satisfy p = do - -- traceM "entering satisfy" - r <- gets psRest - -- traceM "got rest" - r `seq` return () - -- traceM "seqd rest" - -- traceM ("rest is " ++ show r) - case r of - c : rest | c /= '\n', p c -> do - modify (\ps -> ps { psCol = psCol ps + 1 - , psRest = rest }) - return c - _ -> empty - --- | Consumes exactly this character at the current position. Must not be a --- newline. -char :: Char -> Parser () -char c = string [c] - --- | Consumes exactly this string at the current position. The string must not --- contain a newline. -string :: String -> Parser () -string s | any (== '\n') s = error "Newline in 'string' argument" -string s = do - ps <- get - if take (length s) (psRest ps) == s - then put (ps { psCol = psCol ps + length s - , psRest = drop (length s) (psRest ps) }) - else empty - -lookAhead :: Parser () -> Parser () -lookAhead p = do - ps <- get - success <- absolve False (True <$ p) - put ps -- restore state, as if nothing happened - when (not success) empty - -notFollowedBy :: Parser () -> Parser () -notFollowedBy p = do - ps <- get - success <- absolve True (False <$ p) - put ps -- restore state, as if nothing happened - when (not success) empty - --- | Only succeeds at EOF. -eof :: Parser () -eof = gets psRest >>= \case [] -> return () - _ -> empty - -whenM :: (Monad m, Monoid a) => m Bool -> m a -> m a -whenM mb mx = mb >>= \b -> if b then mx else return mempty - -optional_ :: Alternative f => f a -> f () -optional_ a = (() <$ a) <|> pure () - -someNE :: Alternative f => f a -> f (NonEmpty a) -someNE a = (:|) <$> a <*> many a |