{-# LANGUAGE LambdaCase #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE TypeOperators #-} -- I don't want a warning for 'head' and 'tail' in this file. But I also don't -- want GHCs before 9.8 to complain that they don't know the x-partial warning. {-# OPTIONS_GHC -Wno-unrecognised-warning-flags -Wno-x-partial #-} module Parser ( parse, printErrMsg, -- * Re-exports These(..), ) where import Control.Applicative import Control.Monad import Control.Monad.Chronicle import Control.Monad.Reader import Control.Monad.State.Lazy import Data.Char import Data.Either (partitionEithers) import Data.List.NonEmpty (NonEmpty(..)) import Data.These import Debug.Trace import AST data Pos = Pos { posLine :: Int -- ^ zero-based , posCol :: Int -- ^ zero-based } deriving (Show) -- Positions are zero-based in both dimensions. -- See 'isInsideBlock' and 'isAtBlockLeft' for the two relevant "inside the -- block" conditions. data PS = PS { psBlk :: Pos -- ^ Start of current layout block , psCur :: Pos -- ^ Current parsing position , psRest :: String -- ^ Rest of the input } deriving (Show) data Context = Context { ctxFile :: FilePath , ctxStack :: [String] -- ^ Stack of syntax scopes, for error reporting } deriving (Show) newtype Parser a = Parser { runParser :: forall r. Context -> PS -> (PS -> [ErrMsg] -> a -> r) -- ^ OK: some diagnostics, but parsing succeeded -> ([ErrMsg] -> r) -- ^ Fatal: error that prevented parsing from proceeding -> r -- ^ Backtrack: alternative was exhausted without success -> r } instance Functor Parser where fmap f (Parser g) = Parser (\ctx ps kok kfat kbt -> g ctx ps (\ps' errs x -> kok ps' errs (f x)) kfat kbt) instance Applicative Parser where pure x = Parser (\_ ps kok _ _ -> kok ps [] x) (<*>) = ap instance Monad Parser where Parser g >>= f = Parser $ \ctx ps kok kfat kbt -> g ctx ps (\ps1 errs x -> x `seq` runParser (f x) ctx ps1 (\ps2 errs' y -> kok ps2 (errs <> errs') y) (\errs' -> kfat (errs <> errs')) kbt) (\errs -> kfat errs) kbt instance Alternative Parser where empty = Parser (\_ _ _ _ kbt -> kbt) Parser f <|> Parser g = Parser $ \ctx ps kok kfat kbt -> f ctx ps kok kfat (g ctx ps kok kfat kbt) instance MonadState PS Parser where state f = Parser $ \_ ps kok _ _ -> let (x, ps') = f ps in kok ps' [] x instance MonadReader Context Parser where reader f = Parser $ \ctx ps kok _ _ -> kok ps [] (f ctx) local f (Parser g) = Parser (\ctx -> g (f ctx)) instance MonadChronicle [ErrMsg] Parser where dictate errs = Parser $ \_ ps kok _ _ -> kok ps errs () confess errs = Parser $ \_ _ _ kfat _ -> kfat errs memento (Parser f) = Parser $ \ctx ps kok _ kbt -> f ctx ps (\ps' errs x -> kok ps' errs (Right x)) (\errs -> kok ps [] (Left errs)) kbt absolve def (Parser f) = Parser $ \ctx ps kok _ _ -> f ctx ps kok (\_ -> kok ps [] def) (kok ps [] def) condemn (Parser f) = Parser $ \ctx ps kok kfat kbt -> f ctx ps (\ps' errs x -> case errs of [] -> kok ps' [] x _ -> kfat errs) kfat kbt retcon g (Parser f) = Parser $ \ctx ps kok kfat kbt -> f ctx ps (\ps' errs x -> kok ps' (g errs) x) (\errs -> kfat (g errs)) kbt chronicle th = case th of This errs -> Parser (\_ _ _ kfat _ -> kfat errs) That res -> Parser (\_ ps kok _ _ -> kok ps [] res) These errs res -> Parser (\_ ps kok _ _ -> kok ps errs res) -- Positions are zero-based in both dimensions data ErrMsg = ErrMsg { errFile :: FilePath , errStk :: [String] , errPos :: Pos , errMsg :: String } deriving (Show) printErrMsg :: ErrMsg -> String printErrMsg (ErrMsg fp stk (Pos y x) s) = unlines (map (\descr -> "In " ++ descr ++ ":") (reverse stk)) ++ fp ++ ":" ++ show (y + 1) ++ ":" ++ show (x + 1) ++ ": " ++ s parse :: FilePath -> String -> These [ErrMsg] (Program ()) parse fp source = runParser pProgram (Context fp []) (PS (Pos 0 0) (Pos 0 0) source) (\_ errs res -> case errs of [] -> That res _ -> These errs res) (\errs -> This errs) (This [ErrMsg fp [] (Pos 0 0) "Parse error, no grammar alternatives match your source"]) pProgram :: Parser (Program ()) pProgram = do defs <- many pTopDef let (datadefs, fundefs) = partitionEithers defs skipWhiteComment assertEOF Error return (Program datadefs fundefs) pTopDef :: Parser (Either DataDef (FunDef ())) pTopDef = do skipWhiteComment isAtBlockLeft >>= \case True -> Left <$> pDataDef0 <|> Right <$> pFunDef0 False -> do raise Error "Skipping unparseable content" readWhileInline (const True) pTopDef pDataDef0 :: Parser DataDef pDataDef0 = do pKeyword "data" inlineWhite name <- pIdentifier0 InBlock Uppercase params <- many (inlineWhite >> pIdentifier0 InBlock Lowercase) cons <- pDatacons "=" return (DataDef name params cons) where pDatacons :: String -> Parser [(Name, [Type])] pDatacons leader = do inlineWhite pKeySym leader inlineWhite name <- pIdentifier0 InBlock Uppercase fields <- many pTypeAtom rest <- pDatacons "|" <|> return [] return ((name, fields) : rest) data FunEqContext = FirstLine | TypeSig Name | Continue Name deriving (Show) pFunDef0 :: Parser (FunDef ()) pFunDef0 = do mtypesig <- optional pStandaloneTypesig0 let mname = fst <$> mtypesig mtype = snd <$> mtypesig clause@(FunEq name _ _) <- pFunEq (maybe FirstLine TypeSig mname) clauses <- many (pFunEq (Continue name)) return (FunDef name mtype (clause :| clauses)) -- | Given the name of the type signature, if any. pFunEq :: FunEqContext -> Parser (FunEq ()) pFunEq fectx = do skipWhiteComment pushLocatedContext "funeq" $ do isAtBlockLeft >>= guard -- assertAtBlockLeft Fatal "Expected function clause, found indented stuff" name <- pIdentifier0 AtLeft Lowercase case fectx of FirstLine -> return () TypeSig checkName -> when (name /= checkName) $ raise Fatal "Name of function clause does not correspond with type signature" Continue checkName -> guard (name == checkName) pats <- many (pPattern 11) rhs <- pRHS "=" return (FunEq name pats rhs) -- | Pass "=" for function definitions and "->" for case clauses. pRHS :: String -> Parser (RHS ()) pRHS sepsym = do -- TODO: parse guards inlineWhite pKeySym sepsym <|> raise Error ("Expected " ++ show sepsym) expr <- pExpr <|> (raise Error "Expected expression" >> return (ETup () [])) return (Plain expr) pPattern :: Int -> Parser (Pattern ()) pPattern d = inlineWhite >> pPattern0 d pPattern0 :: Int -> Parser (Pattern ()) pPattern0 d = do p0 <- pPatExprAtom0 (max 10 d) climbRight pPattern (pInfixOp Uppercase) (POp ()) d p0 Nothing pPatExprAtom0 :: Int -> Parser (Pattern ()) pPatExprAtom0 d = asum [pPatWildcard0 ,pPatVarOrAs0 ,pPatCon0 ,pPatList0 ,pPatParens0] where pPatWildcard0 = pKeySym "_" >> return (PWildcard ()) pPatVarOrAs0 = do var <- pIdentifier0 InBlock Lowercase asum [do inlineWhite pKeySym "@" p <- pPattern 11 return (PAs () var p) ,return (PVar () var)] pPatCon0 = do con <- pIdentifier0 InBlock Uppercase if d > 10 then return (PCon () con []) else do args <- many (pPattern 11) return (PCon () con args) pPatList0 = do char '[' -- special syntax, no need for pKeySym ps <- pPattern 0 `sepBy` (inlineWhite >> char ',') inlineWhite char ']' return (PList () ps) pPatParens0 = do char '(' inlineWhite asum [do char ')' return (PTup () []) ,do p <- pPattern0 0 inlineWhite asum [do char ')' return p ,do char ',' ps <- pPattern 0 `sepBy1` (inlineWhite >> char ',') return (PTup () (p : ps))]] pExpr :: Parser (Expr ()) pExpr = do inlineWhite -- basics: lit, list, var, con, tup -- expression atom: application of basics -- expression parser: op -- around: let, case, if pushLocatedContext "expression" $ do asum [pELet0 ,pECase0 ,pEIf0 ,pExprOpExpr0 0] pELet0 :: Parser (Expr ()) pELet0 = do pKeyword "let" inlineWhite defs <- startLayoutBlock $ do -- The first occurrence is also going to skip whitespace in front, -- which is redundant -- but not harmful. many $ do skipWhiteComment -- Note: now not necessarily in the indented block. Which is -- precisely what we need here. If we see "in", let the 'many' -- choice fail so that the defs loop ends. But let it fail outside -- this asum so that it is the many that picks it up, not this -- asum. res <- asum [Nothing <$ pKeyword "in" -- note: will be dropped due to the empty backtrack ,Just <$> pFunDef0] case res of Nothing -> empty Just def -> return def inlineWhite asum [do pKeyword "in" inlineWhite body <- pExpr <|> (raise Error "Expected expression" >> return (ETup () [])) return (ELet () defs body) ,do raise Error "Expected 'in' after 'let'" return (ELet () defs (ETup () []))] pECase0 :: Parser (Expr ()) pECase0 = do pKeyword "case" e <- pExpr inlineWhite pKeyword "of" inlineWhite startLayoutBlock $ do -- The first clause is going to skip whitespace, but that's harmless -- (though redundant). let pClause = do skipWhiteComment whenM (not <$> isInsideBlock) (() <$ empty) pat <- pPattern0 0 rhs <- pRHS "->" return (pat, rhs) clauses <- many pClause return (ECase () e clauses) pEIf0 :: Parser (Expr ()) pEIf0 = do pKeyword "if" e1 <- pExpr inlineWhite pKeyword "then" e2 <- pExpr inlineWhite pKeyword "else" e3 <- pExpr return (EIf () e1 e2 e3) pExprOpExpr :: Int -> Parser (Expr ()) pExprOpExpr d = inlineWhite >> pExprOpExpr0 d pExprOpExpr0 :: Int -> Parser (Expr ()) pExprOpExpr0 d = do e0 <- pEApp0 climbRight pExprOpExpr (snd <$> pInfixOp Don'tCare) (EOp ()) d e0 Nothing climbRight :: (Int -> Parser e) -- ^ Parse an expression at the given precedence level -> Parser ParsedOperator -- ^ Parse an operator -> (e -> Operator -> e -> e) -- ^ Build an operator application experssion -> Int -- ^ Ambient precedence level: minimum precedence of top-level operator in result -> e -- ^ lhs: Initial non-operator expression already parsed -> Maybe ParsedOperator -- ^ Top-level operator in lhs (initialise with Nothing) -> Parser e climbRight pExpr' pOper makeOp d lhs mlhsop = asum [do paop@(PaOp op d2 a2) <- pOper guard (d2 >= d) -- respect global minimum precedence case mlhsop of -- check operator compatibility Just (PaOp _ d1 a1) -> guard (d1 > d2 || (d1 == d2 && a1 == a2 && a1 /= AssocNone)) Nothing -> return () let oprhsd = case a2 of AssocRight -> d2 ; _ -> d2 + 1 rhs <- pExpr' oprhsd climbRight pExpr' pOper makeOp d (makeOp lhs op rhs) (Just paop) ,return lhs] pEApp0 :: Parser (Expr ()) pEApp0 = do e1 <- pEAtom0 es <- many (inlineWhite >> pEAtom0) case es of [] -> return e1 _ -> return (EApp () e1 es) pEAtom0 :: Parser (Expr ()) pEAtom0 = (ELit () <$> pLiteral0) <|> pEList0 <|> pEVar0 <|> pECon0 <|> pEParens0 pLiteral0 :: Parser Literal pLiteral0 = asum [do as <- some (satisfy isDigit) let a = read as :: Integer asum [do char '.' bs <- some (satisfy isDigit) let b = read bs :: Integer cs <- optional $ do char 'e' cs <- some (satisfy isDigit) return cs let c = maybe 0 read cs :: Integer return (LFloat ((fromIntegral a + fromIntegral b / 10 ^ length bs) * 10 ^ c)) ,return (LInt a)] ,do char '\'' c <- pStringChar char '\'' return (LChar c) ,do char '"' s <- many pStringChar char '"' return (LString s)] pStringChar :: Parser Char pStringChar = asum [do char '\\' char 'x' let hexdig = do c <- satisfy $ \c' -> let c = toLower c' in 'a' <= c && c <= 'f' || '0' <= c && c <= '9' return $ if 'a' <= c then 10 + ord c - ord 'a' else ord c - ord '0' digs <- some hexdig return (chr (sum (zipWith (*) (reverse digs) (iterate (*16) 1)))) ,do char '\\' satisfy (const True) >>= \case 'n' -> return '\n' 'r' -> return '\r' 't' -> return '\t' 'a' -> return '\a' 'b' -> return '\b' '\'' -> return '\'' '\"' -> return '\"' '0' -> return '\0' c -> do raise Error $ "Invalid escape sequence: \\" ++ [c] return '?' ,do satisfy (\c -> c `notElem` "\n\r\\\'")] pEList0 :: Parser (Expr ()) pEList0 = do char '[' -- special syntax, no need for pKeySym es <- sepBy pExpr (inlineWhite >> char ',') inlineWhite char ']' return (EList () es) pEVar0 :: Parser (Expr ()) pEVar0 = EVar () <$> pIdentifier0 InBlock Lowercase pECon0 :: Parser (Expr ()) pECon0 = ECon () <$> pIdentifier0 InBlock Uppercase pEParens0 :: Parser (Expr ()) pEParens0 = do char '(' e <- pExpr inlineWhite char ')' return e data Associativity = AssocLeft | AssocRight | AssocNone deriving (Show, Eq) data ParsedOperator = PaOp Operator Int Associativity deriving (Show) pInfixOp :: Case care -> Parser (WithCaseOutput care ParsedOperator) pInfixOp cs = do inlineWhite case cs of Lowercase -> pLowerInfixOp0 Uppercase -> pUpperInfixOp0 Don'tCare -> asum [(Lowercase,) <$> pLowerInfixOp0 ,(Uppercase,) <$> pUpperInfixOp0] pLowerInfixOp0 :: Parser ParsedOperator pLowerInfixOp0 = asum [PaOp OEqu 4 AssocNone <$ pKeySym "==" ,PaOp OAdd 6 AssocLeft <$ pKeySym "+" ,PaOp OSub 6 AssocLeft <$ pKeySym "-" ,PaOp OMul 7 AssocLeft <$ pKeySym "*" ,PaOp ODiv 7 AssocLeft <$ pKeySym "/" ,PaOp OMod 7 AssocLeft <$ pKeySym "%" ,PaOp OPow 8 AssocRight <$ pKeySym "^" ] pUpperInfixOp0 :: Parser ParsedOperator pUpperInfixOp0 = asum [PaOp OCons 5 AssocRight <$ pKeySym ":"] pStandaloneTypesig0 :: Parser (Name, Type) pStandaloneTypesig0 = do isAtBlockLeft >>= guard -- assertAtBlockLeft Fatal "Expected definition, found indented stuff" name@(Name namestr) <- pIdentifier0 AtLeft Lowercase inlineWhite pKeySym "::" pushContext ("type signature for '" ++ namestr ++ "'") $ do ty <- pType return (name, ty) pType :: Parser Type pType = do ty1 <- pTypeApp asum [do inlineWhite pKeySym "->" ty2 <- pType return (TFun ty1 ty2) ,return ty1] pTypeApp :: Parser Type pTypeApp = many pTypeAtom >>= \case [] -> raise Fatal "Expected type" [t] -> return t t:ts -> return (TApp t ts) pTypeAtom :: Parser Type pTypeAtom = pTypeParens <|> pTypeList <|> pTypeName where pTypeParens = do inlineWhite char '(' asum [do inlineWhite char ')' return (TTup []) ,do ty1 <- pType ty2s <- many $ do inlineWhite char ',' pType inlineWhite char ')' case ty2s of [] -> return ty1 _ -> return (TTup (ty1 : ty2s))] pTypeList = do inlineWhite char '[' ty <- pType char ']' return (TList ty) pTypeName = do inlineWhite (cs, name) <- pIdentifier0 InBlock Don'tCare case cs of Uppercase -> return (TCon name) Lowercase -> return (TVar name) -- | Parse the given name-like keyword, ensuring that it is the entire word. pKeyword :: String -> Parser () pKeyword s = do string s -- traceM $ "pKeyword: parsed " ++ show s notFollowedBy (() <$ satisfy isNameContChar) -- | Parse the given symbol-like keyword, ensuring that it is the entire symbol. pKeySym :: String -> Parser () pKeySym s = do string s notFollowedBy (() <$ satisfy isSymbolChar) data Case care where Uppercase :: Case 'True Lowercase :: Case 'True Don'tCare :: Case 'False deriving instance Show (Case care) type family WithCaseOutput care a where WithCaseOutput 'True a = a WithCaseOutput 'False a = (Case 'True, a) -- | Consumes an identifier (word or parenthesised operator) at the current -- position. The `var` production in Haskell2010. -- var -> varid | "(" varsym ")" pIdentifier0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name) pIdentifier0 bpos cs = pAlphaName0 bpos cs <|> pParens0 (pSymbol0 bpos cs) where -- | Parser between parens, with the opening paren at the current position. pParens0 :: Parser a -> Parser a pParens0 p = do char '(' inlineWhite res <- p inlineWhite char ')' return res -- | Consumes a word-like name at the current position with the given case. The -- `varid` production in Haskell2010 for 'Lowercase', `conid' for 'Uppercase'. -- -- > varid -> (small {small | large | digit | "'"}) without reservedid pAlphaName0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name) pAlphaName0 bpos cs = do startPos <- gets psCur (_, s) <- readToken bpos (\atfst mc -> case (atfst, mc) of (True , Just c) | isNameHeadChar c -> Just (Right False) (True , _ ) -> Nothing (False, Just c) | isNameContChar c -> Just (Right False) (False, _ ) -> Just (Left ())) True guard (s `notElem` ["case", "class", "data", "default", "deriving", "do", "else" ,"foreign", "if", "import", "in", "infix", "infixl" ,"infixr", "instance", "let", "module", "newtype", "of" ,"then", "type", "where", "_"]) (name, adjoin) <- case cs of Uppercase | isLower (head s) -> do raiseAt startPos Error "Unexpected uppercase word at this position, assuming typo" return (toUpper (head s) : tail s, id) | otherwise -> return (s, id) Lowercase | isUpper (head s) -> do raiseAt startPos Error "Unexpected lowercase word at this position, assuming typo" return (toLower (head s) : tail s, id) | otherwise -> return (s, id) Don'tCare | isLower (head s) -> return (s, (Lowercase,)) | otherwise -> return (s, (Uppercase,)) return (adjoin (Name name)) isNameHeadChar :: Char -> Bool isNameHeadChar c = isLetter c || c == '_' isNameContChar :: Char -> Bool isNameContChar c = isNameHeadChar c || isDigit c || c == '\'' -- | Consumes a symbol at the current position. The `varsym` production in -- Haskell2010 for 'Lowercase', `consym` otherwise, and either if 'Don'tCare'. -- -- > varsym -> ((symbol without ":") {symbol}) without (reservedop | dashes) -- > consym -> (":" {symbol}) without reservedop -- > symbol -> ascSymbol | uniSymbol without (special | "_" | "\"" | "'") -- > ascSymbol -> ```!#$%&⋆+./<=>?@^|-~:``` -- > uniSymbol -> unicode symbol or punctuation -- > dashes -> "--" {"-"} -- > special -> ```(),;[]`{}``` -- > reservedop -> ".." | ":" | "::" | "=" | "\" | "|" | "<-" | "->" | "@" | "~" | "=>" pSymbol0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name) pSymbol0 bpos cs = do case bpos of AtLeft -> assertAtBlockLeft Fatal "Expected symbol, but found indented expression" InBlock -> assertInsideBlock Fatal "Expected symbol, but found end of indented expression" (c1, adjoin) <- case cs of Lowercase -> (,id) <$> satisfy (\c -> isSymbolChar c && c /= ':') Uppercase -> (,id) <$> satisfy (== ':') Don'tCare -> do c1 <- satisfy (\c -> isSymbolChar c) return (c1, if c1 == ':' then (Uppercase,) else (Lowercase,)) crest <- many (satisfy isSymbolChar) let name = c1 : crest guard (name `notElem` ["..", ":", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]) guard (take 2 name /= "--") return (adjoin (Name name)) isSymbolChar :: Char -> Bool isSymbolChar c = (isAscSymbol || isUniSymbol) && not isSpecialExt where isSpecialExt = c `elem` "(),;[]`{}_\"'" isAscSymbol = c `elem` "!#$%&⋆+./<=>?@^|-~:" isUniSymbol = ord c > 127 && (isSymbol c || isPunctuation c) sepBy1 :: Parser a -> Parser sep -> Parser [a] sepBy1 p psep = do x1 <- p (psep >> (x1 :) <$> sepBy1 p psep) <|> return [x1] sepBy :: Parser a -> Parser sep -> Parser [a] sepBy p psep = sepBy1 p psep <|> return [] -- | Start a new layout block at the current position. The old layout block is -- restored after completion of this subparser. startLayoutBlock :: Parser a -> Parser a startLayoutBlock p = do ps0 <- get put (ps0 { psBlk = psCur ps0 }) res <- p modify (\ps -> ps { psBlk = psBlk ps0 }) return res data Fatality fatal where Error :: Fatality 'False Fatal :: Fatality 'True deriving instance Show (Fatality fatal) type family FatalCtx fatal a where FatalCtx 'False a = a ~ () FatalCtx 'True a = () raise' :: Fatality fatal -> String -> Parser () raise' Error = raise Error raise' Fatal = raise Fatal raise :: FatalCtx fatal a => Fatality fatal -> String -> Parser a raise fat msg = gets psCur >>= \pos -> raiseAt pos fat msg -- | Raise an error with the given fatality and description. raiseAt :: FatalCtx fatal a => Pos -> Fatality fatal -> String -> Parser a raiseAt pos fat msg = do Context { ctxFile = fp , ctxStack = stk } <- ask let err = ErrMsg fp stk pos msg case fat of Error -> dictate (pure err) Fatal -> confess (pure err) describeLocation :: Parser String describeLocation = do fp <- asks ctxFile cur <- gets psCur return $ fp ++ ":" ++ show (posLine cur + 1) ++ ":" ++ show (posCol cur + 1) -- | Registers a scope description on the stack for error reporting. pushContext :: String -> Parser a -> Parser a pushContext descr = local (\c -> c { ctxStack = descr : ctxStack c }) -- | Registers a scope description on the stack for error reporting, suffixed -- with the current parsing location. pushLocatedContext :: String -> Parser a -> Parser a pushLocatedContext descr p = do loc <- describeLocation pushContext (descr ++ " at " ++ loc) p data BlockPos = AtLeft | InBlock deriving (Show) -- | Consumes a token at the current position, asserting that we are -- in the position indicated by the 'BlockPos' argument. The token is defined -- by a pure stateful parser. If encountering a newline or EOF, the parser is -- run on this character ('Nothing' for EOF); if this produces a result, the -- result is returned; otherwise, the parser fails. The newline is not consumed. readToken :: BlockPos -> (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String) readToken bpos f s0 = do case bpos of AtLeft -> assertAtBlockLeft Fatal "Expected token, but found indented expression" InBlock -> assertInsideBlock Fatal "Expected token, but found end of indented expression" let loop :: (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String) loop f' st = do ps <- get case psRest ps of [] | Just (Left res) <- f' st Nothing -> return (res, "") | otherwise -> empty '\n' : _ | Just (Left res) <- f' st (Just '\n') -> return (res, "") c : cs -> case f' st (Just c) of Nothing -> empty Just (Left res) -> return (res, "") Just (Right st') -> do let Pos line col = psCur ps put (ps { psCur = Pos line (col + 1), psRest = cs }) fmap (c :) <$> loop f' st' loop f s0 -- | Consumes all whitespace and comments (including newlines), but only if -- this then leaves the parser inside the current block. If not, this fails. inlineWhite :: Parser () inlineWhite = do skipWhiteComment whenM (not <$> isInsideBlock) empty -- | Consumes all whitespace and comments (including newlines). Note: this may -- end outside the current block. skipWhiteComment :: Parser () skipWhiteComment = do inlineSpaces _ <- many (blockComment >> inlineSpaces) optional_ lineComment optional_ (consumeNewline >> skipWhiteComment) where -- | Consumes some inline whitespace. Stops before newlines. inlineSpaces :: Parser () inlineSpaces = readWhileInline isSpace -- | Consumes an delimited comment including both end markers. Note: this may -- end outside the current block. blockComment :: Parser () blockComment = do string "{-" -- no need for pKeySym here let loop = do readWhileInline (`notElem` "{-") -- "-}" also starts with '-' asum [string "-}" ,eof >> raise Error "Unfinished {- -} comment at end of file" ,blockComment >> loop ,consumeNewline >> loop] loop -- | Consumes a line comment marker and the rest of the line, excluding -- newline. lineComment :: Parser () lineComment = do -- '--!' is an operator, so we need to parse a whole symbol here. pKeySym "--" readWhileInline (const True) -- | Raises an error if we're not currently at the given column. assertAtBlockLeft :: Fatality fatal -> String -> Parser () assertAtBlockLeft fat msg = whenM (not <$> isAtBlockLeft) $ raise' fat msg -- | Raises an error if psCol is not greater than psRefCol. assertInsideBlock :: Fatality fatal -> String -> Parser () assertInsideBlock fat msg = whenM (not <$> isInsideBlock) $ raise' fat msg -- | Raises an error if we're not currently at EOF. assertEOF :: Fatality fatal -> Parser () assertEOF fat = gets psRest >>= \case [] -> return () _ -> raise' fat "Unexpected stuff" -- | Returns whether the current position is _within_ the current block, for -- soft-wrapping content. This means that col > blkCol. isInsideBlock :: Parser Bool isInsideBlock = do PS { psCur = cur, psBlk = blk } <- get return $ posLine cur >= posLine blk && posCol cur > posCol blk -- | Returns whether the current position is at the left border of the block; -- this is for list content such as function definitions or let bindings. This -- means that col == blkCol. isAtBlockLeft :: Parser Bool isAtBlockLeft = do PS { psCur = cur, psBlk = blk } <- get return $ posLine cur >= posLine blk && posCol cur == posCol blk -- | Consumes characters while the predicate holds or until (and excluding) -- a newline, whichever comes first. readWhileInline :: (Char -> Bool) -> Parser () readWhileInline p = do (taken, rest) <- span (\c -> p c && c /= '\n') <$> gets psRest modify (\ps -> ps { psCur = let Pos line col = psCur ps in Pos line (col + length taken) , psRest = rest }) -- | Consumes exactly one newline at the current position. consumeNewline :: Parser () consumeNewline = gets psRest >>= \case '\n' : rest -> modify (\ps -> ps { psCur = Pos (posLine (psCur ps) + 1) 0 , psRest = rest }) _ -> empty -- | Consumes exactly one character, unequal to newline, at the current position. satisfy :: (Char -> Bool) -> Parser Char satisfy p = do -- traceM "entering satisfy" r <- gets psRest -- traceM "got rest" r `seq` return () -- traceM "seqd rest" -- traceM ("rest is " ++ show r) case r of c : rest | c /= '\n', p c -> do modify (\ps -> let Pos line col = psCur ps in ps { psCur = Pos line (col + 1) , psRest = rest }) return c _ -> empty -- | Consumes exactly this character at the current position. Must not be a -- newline. char :: Char -> Parser () char c = string [c] -- | Consumes exactly this string at the current position. The string must not -- contain a newline. string :: String -> Parser () string s | any (== '\n') s = error "Newline in 'string' argument" string s = do ps <- get let Pos line col = psCur ps if take (length s) (psRest ps) == s then put (ps { psCur = Pos line (col + length s) , psRest = drop (length s) (psRest ps) }) else empty lookAhead :: Parser a -> Parser a lookAhead p = do ps <- get success <- absolve Nothing (Just <$> p) put ps -- restore state, as if nothing happened case success of Nothing -> empty Just x -> return x notFollowedBy :: Parser () -> Parser () notFollowedBy p = do ps <- get success <- absolve True (False <$ p) put ps -- restore state, as if nothing happened when (not success) empty -- | Only succeeds at EOF. eof :: Parser () eof = gets psRest >>= \case [] -> return () _ -> empty whenM :: (Monad m, Monoid a) => m Bool -> m a -> m a whenM mb mx = mb >>= \b -> if b then mx else return mempty optional_ :: Alternative f => f a -> f () optional_ a = (() <$ a) <|> pure ()