{-# LANGUAGE LambdaCase #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE TypeOperators #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE ViewPatterns #-} {-# LANGUAGE InstanceSigs #-} -- I don't want a warning for 'head' and 'tail' in this file. But I also don't -- want GHCs before 9.8 to complain that they don't know the x-partial warning. {-# OPTIONS_GHC -Wno-unrecognised-warning-flags -Wno-x-partial #-} module Parser ( parse, printErrMsg, -- * Re-exports These(..), ) where -- import Control.Applicative import Control.Monad import Control.Monad.Chronicle import Control.Monad.Reader import Control.Monad.State.Lazy import Data.Char import Data.Either (partitionEithers) import Data.List.NonEmpty (NonEmpty(..)) import Data.These import Debug.Trace import AST import Control.FAlternative import Pretty data Pos = Pos { posLine :: Int -- ^ zero-based , posCol :: Int -- ^ zero-based } deriving (Show) -- Positions are zero-based in both dimensions. -- See 'isInsideBlock' and 'isAtBlockLeft' for the two relevant "inside the -- block" conditions. data PS = PS { psBlk :: Pos -- ^ Start of current layout block , psCur :: Pos -- ^ Current parsing position , psRest :: String -- ^ Rest of the input } deriving (Show) data Context = Context { ctxFile :: FilePath , ctxStack :: [String] -- ^ Stack of syntax scopes, for error reporting } deriving (Show) type family BacktrackPath fail r where BacktrackPath 'Fallible r = r BacktrackPath 'Infallible r = () newtype Parser fail a = Parser { runParser :: forall r. Context -> PS -> (PS -> [ErrMsg] -> a -> r) -- ^ OK: some diagnostics, but parsing succeeded -> ([ErrMsg] -> r) -- ^ Fatal: error that prevented parsing from proceeding -> BacktrackPath fail r -- ^ Backtrack: alternative was exhausted without success -> r } type IParser = Parser 'Infallible type FParser = Parser 'Fallible instance Functor (Parser fail) where fmap f (Parser g) = Parser (\ctx ps kok kfat kbt -> g ctx ps (\ps' errs x -> kok ps' errs (f x)) kfat kbt) instance Applicative (Parser fail) where pure x = Parser (\_ ps kok _ _ -> kok ps [] x) (<*>) = ap instance Monad (Parser fail) where Parser g >>= f = Parser $ \ctx ps kok kfat kbt -> g ctx ps (\ps1 errs x -> x `seq` runParser (f x) ctx ps1 (\ps2 errs' y -> kok ps2 (errs <> errs') y) (\errs' -> kfat (errs <> errs')) kbt) (\errs -> kfat errs) kbt instance FAlternative Parser where faempty = Parser (\_ _ _ _ kbt -> kbt) Parser f <|>> Parser g = Parser $ \ctx ps kok kfat kbt -> f ctx ps kok kfat (g ctx ps kok kfat kbt) noFail (Parser f) = Parser $ \ctx ps kok kfat _ -> f ctx ps kok kfat () toFallible :: forall fail a. KnownFallible fail => Parser fail a -> Parser 'Fallible a toFallible (Parser f) = Parser $ \ctx ps kok kfat kbt -> f ctx ps kok kfat (case knownFallible @fail of SFallible -> kbt SInfallible -> ()) instance MonadState PS (Parser fail) where state f = Parser $ \_ ps kok _ _ -> let (x, ps') = f ps in kok ps' [] x instance MonadReader Context (Parser fail) where reader f = Parser $ \ctx ps kok _ _ -> kok ps [] (f ctx) local f (Parser g) = Parser (\ctx -> g (f ctx)) instance KnownFallible fail => MonadChronicle [ErrMsg] (Parser fail) where dictate errs = Parser $ \_ ps kok _ _ -> kok ps errs () confess errs = Parser $ \_ _ _ kfat _ -> kfat errs memento (Parser f) = Parser $ \ctx ps kok _ kbt -> f ctx ps (\ps' errs x -> kok ps' errs (Right x)) (\errs -> kok ps [] (Left errs)) kbt absolve def (toFallible -> Parser f) = Parser $ \ctx ps kok _ _ -> f ctx ps kok (\_ -> kok ps [] def) (kok ps [] def) condemn (Parser f) = Parser $ \ctx ps kok kfat kbt -> f ctx ps (\ps' errs x -> case errs of [] -> kok ps' [] x _ -> kfat errs) kfat kbt retcon g (Parser f) = Parser $ \ctx ps kok kfat kbt -> f ctx ps (\ps' errs x -> kok ps' (g errs) x) (\errs -> kfat (g errs)) kbt chronicle th = case th of This errs -> Parser (\_ _ _ kfat _ -> kfat errs) That res -> Parser (\_ ps kok _ _ -> kok ps [] res) These errs res -> Parser (\_ ps kok _ _ -> kok ps errs res) -- Positions are zero-based in both dimensions data ErrMsg = ErrMsg { errFile :: FilePath , errStk :: [String] , errPos :: Pos , errMsg :: String } deriving (Show) printErrMsg :: ErrMsg -> String printErrMsg (ErrMsg fp stk (Pos y x) s) = unlines (map (\descr -> "In " ++ descr ++ ":") (reverse stk)) ++ fp ++ ":" ++ show (y + 1) ++ ":" ++ show (x + 1) ++ ": " ++ s parse :: FilePath -> String -> These [ErrMsg] (Program ()) parse fp source = runParser pProgram (Context fp []) (PS (Pos 0 0) (Pos 0 0) source) (\_ errs res -> case errs of [] -> That res _ -> These errs res) (\errs -> This errs) () -- (This [ErrMsg fp [] (Pos 0 0) "Parse error, no grammar alternatives match your source"]) pProgram :: IParser (Program ()) pProgram = do defs <- pTopDefs let (datadefs, fundefs) = partitionEithers defs skipWhiteComment assertEOF Error return (Program datadefs fundefs) pTopDefs :: IParser [Either DataDef (FunDef ())] pTopDefs = do faoptional pTopDef >>= \case Nothing -> do skipWhiteComment faoptional eof >>= \case Nothing -> do raise Error "Unparseable content" readWhileInline (const True) pTopDefs -- will skip the possible newline Just () -> return [] Just defs -> do defs2 <- pTopDefs return (defs ++ defs2) pTopDef :: FParser [Either DataDef (FunDef ())] pTopDef = do noFail skipWhiteComment noFail isAtBlockLeft >>= \case True -> map Left <$> pDataDef0 <|>> map Right <$> pFunDef0 False -> do noFail $ raise Error "Skipping unparseable content" noFail $ readWhileInline (const True) pTopDef pDataDef0 :: FParser [DataDef] pDataDef0 = do pKeyword "data" noFail $ do inlineWhite faoptional (pIdentifier0 InBlock Uppercase WCAssume) >>= \case Nothing -> do raise Error "Expected data declaration after 'data'" return [] Just name -> do params <- famany (inlineWhite >> pIdentifier0 InBlock Lowercase WCBacktrack) cons <- pDatacons "=" return [DataDef name params cons] where pDatacons :: String -> IParser [(Name, [Type])] pDatacons leader = do inlineWhite facatch (return []) $ do pKeySym leader inlineWhite name <- pIdentifier0 InBlock Uppercase WCAssume fields <- noFail $ famany pTypeAtom rest <- noFail $ pDatacons "|" return ((name, fields) : rest) data FunEqContext = FirstLine | TypeSig Name | Continue Name deriving (Show) pFunDef0 :: FParser [FunDef ()] pFunDef0 = faasum' [do (name, typ) <- pStandaloneTypesig0 noFail $ do faoptional (pFunEq (TypeSig name)) >>= \case Nothing -> do raise Error $ "Expected function equation for " ++ pretty name ++ " after type signature" return [] Just [] -> return [FunDef name (Just typ) (FunEq name [] (Plain (ETup () [])) :| [])] Just (clause1 : clauses1) -> do clauses <- concat <$> famany (pFunEq (Continue name)) return [FunDef name (Just typ) (clause1 :| clauses1 ++ clauses)] ,do clause1@(FunEq name _ _) : clauses1 <- pFunEq FirstLine noFail $ do clauses <- concat <$> famany (pFunEq (Continue name)) return [FunDef name Nothing (clause1 :| clauses1 ++ clauses)]] -- | Given the name from the type signature or a previous clause, if any. pFunEq :: FunEqContext -> FParser [FunEq ()] pFunEq fectx = do noFail skipWhiteComment faguardM isAtBlockLeft pushLocatedContext "function equation" $ do name <- pIdentifier0 AtLeft Lowercase WCAssume -- We want to do various checks with what came before, and there are -- multiple branches where we decide to continue parsing this equation. To -- avoid code duplication or an early exit monad, we use a boolean here. success <- case fectx of FirstLine -> return True TypeSig checkName | name == checkName -> return True | otherwise -> noFail $ do raise Error $ "Name of function clause does not correspond with type signature: " ++ pretty checkName return False Continue checkName -> do faguard (name == checkName) -- this can still backtrack out of pFunEq return True noFail $ if success then do pats <- famany (pPattern 11) rhs <- pRHS "=" return [FunEq name pats rhs] else return [] -- | Pass "=" for function definitions and "->" for case clauses. pRHS :: String -> IParser (RHS ()) pRHS sepsym = do -- TODO: parse guards inlineWhite pKeySym sepsym <|>> raise Error ("Expected " ++ show sepsym) expr <- pExpr <|>> (raise Error "Expected expression" >> return (ETup () [])) return (Plain expr) pPattern :: Int -> FParser (Pattern ()) pPattern d = inlineWhite >> pPattern0 d pPattern0 :: Int -> FParser (Pattern ()) pPattern0 d = do p0 <- pPatExprAtom0 (max 10 d) climbRight pPattern (pInfixOp Uppercase) (POp ()) d p0 Nothing pExpr :: FParser (Expr ()) pExpr = do inlineWhite -- basics: lit, list, var, con, tup -- expression atom: application of basics -- expression parser: op -- around: let, case, if pushLocatedContext "expression" $ do faasum' [pELet0 ,pECase0 ,pEIf0 ,pExprOpExpr0 0] pPatExprAtom0 :: Int -> FParser (Pattern ()) pPatExprAtom0 d = faasum' [pPatWildcard0 ,pPatVarOrAs0 ,pPatCon0 ,pPatList0 ,pPatParens0] where pPatWildcard0 = pKeySym "_" >> return (PWildcard ()) pPatVarOrAs0 = do var <- pIdentifier0 InBlock Lowercase WCBacktrack facatch (return (PVar () var)) $ do inlineWhite pKeySym "@" noFail $ do p <- pPattern 11 <|>> (raise Error "Expected pattern after '@'" >> return (PWildcard ())) return (PAs () var p) pPatCon0 = do con <- pIdentifier0 InBlock Uppercase WCBacktrack noFail $ if d > 10 then return (PCon () con []) else do args <- famany (pPattern 11) return (PCon () con args) pPatList0 = do char '[' -- special syntax, no need for pKeySym noFail $ do ps <- pPattern 0 `sepBy` (inlineWhite >> char ',') inlineWhite char ']' <|>> raise Error "Expected ']'" return (PList () ps) pPatParens0 = do char '(' inlineWhite faasum' [do char ')' return (PTup () []) ,do p <- pPattern0 0 inlineWhite faasum' [do char ')' return p ,do char ',' ps <- pPattern 0 `sepBy1` (inlineWhite >> char ',') return (PTup () (p : ps))]] {- pELet0 :: IParser (Expr ()) pELet0 = do pKeyword "let" inlineWhite defs <- startLayoutBlock $ do -- The first occurrence is also going to skip whitespace in front, -- which is redundant -- but not harmful. many $ do skipWhiteComment -- Note: now not necessarily in the indented block. Which is -- precisely what we need here. If we see "in", let the 'many' -- choice fail so that the defs loop ends. But let it fail outside -- this asum so that it is the many that picks it up, not this -- asum. res <- asum [Nothing <$ pKeyword "in" -- note: will be dropped due to the empty backtrack ,Just <$> pFunDef0] case res of Nothing -> empty Just def -> return def inlineWhite asum [do pKeyword "in" inlineWhite body <- pExpr <|> (raise Error "Expected expression" >> return (ETup () [])) return (ELet () defs body) ,do raise Error "Expected 'in' after 'let'" return (ELet () defs (ETup () []))] pECase0 :: IParser (Expr ()) pECase0 = do pKeyword "case" e <- pExpr inlineWhite pKeyword "of" inlineWhite startLayoutBlock $ do -- The first clause is going to skip whitespace, but that's harmless -- (though redundant). let pClause = do skipWhiteComment whenM (not <$> isInsideBlock) (() <$ empty) pat <- pPattern0 0 rhs <- pRHS "->" return (pat, rhs) clauses <- many pClause return (ECase () e clauses) pEIf0 :: IParser (Expr ()) pEIf0 = do pKeyword "if" e1 <- pExpr inlineWhite pKeyword "then" e2 <- pExpr inlineWhite pKeyword "else" e3 <- pExpr return (EIf () e1 e2 e3) pExprOpExpr :: Int -> IParser (Expr ()) pExprOpExpr d = inlineWhite >> pExprOpExpr0 d pExprOpExpr0 :: Int -> IParser (Expr ()) pExprOpExpr0 d = do e0 <- pEApp0 climbRight pExprOpExpr (snd <$> pInfixOp Don'tCare) (EOp ()) d e0 Nothing -} climbRight :: (Int -> FParser e) -- ^ Parse an expression at the given precedence level -> FParser ParsedOperator -- ^ Parse an operator -> (e -> Operator -> e -> e) -- ^ Build an operator application experssion -> Int -- ^ Ambient precedence level: minimum precedence of top-level operator in result -> e -- ^ lhs: Initial non-operator expression already parsed -> Maybe ParsedOperator -- ^ Top-level operator in lhs (initialise with Nothing) -> FParser e climbRight pExpr' pOper makeOp d lhs mlhsop = facatch (return lhs) $ do paop@(PaOp op d2 a2) <- pOper faguard (d2 >= d) -- respect global minimum precedence case mlhsop of -- check operator compatibility Just (PaOp _ d1 a1) -> faguard (d1 > d2 || (d1 == d2 && a1 == a2 && a1 /= AssocNone)) Nothing -> return () let oprhsd = case a2 of AssocRight -> d2 ; _ -> d2 + 1 rhs <- pExpr' oprhsd climbRight pExpr' pOper makeOp d (makeOp lhs op rhs) (Just paop) {- pEApp0 :: IParser (Expr ()) pEApp0 = do e1 <- pEAtom0 es <- many (inlineWhite >> pEAtom0) case es of [] -> return e1 _ -> return (EApp () e1 es) pEAtom0 :: IParser (Expr ()) pEAtom0 = (ELit () <$> pLiteral0) <|> pEList0 <|> pEVar0 <|> pECon0 <|> pEParens0 pLiteral0 :: IParser Literal pLiteral0 = asum [do as <- some (satisfy isDigit) let a = read as :: Integer asum [do char '.' bs <- some (satisfy isDigit) let b = read bs :: Integer cs <- optional $ do char 'e' cs <- some (satisfy isDigit) return cs let c = maybe 0 read cs :: Integer return (LFloat ((fromIntegral a + fromIntegral b / 10 ^ length bs) * 10 ^ c)) ,return (LInt a)] ,do char '\'' c <- pStringChar char '\'' return (LChar c) ,do char '"' s <- many pStringChar char '"' return (LString s)] pStringChar :: IParser Char pStringChar = asum [do char '\\' char 'x' let hexdig = do c <- satisfy $ \c' -> let c = toLower c' in 'a' <= c && c <= 'f' || '0' <= c && c <= '9' return $ if 'a' <= c then 10 + ord c - ord 'a' else ord c - ord '0' digs <- some hexdig return (chr (sum (zipWith (*) (reverse digs) (iterate (*16) 1)))) ,do char '\\' satisfy (const True) >>= \case 'n' -> return '\n' 'r' -> return '\r' 't' -> return '\t' 'a' -> return '\a' 'b' -> return '\b' '\'' -> return '\'' '\"' -> return '\"' '0' -> return '\0' c -> do raise Error $ "Invalid escape sequence: \\" ++ [c] return '?' ,do satisfy (\c -> c `notElem` "\n\r\\\'")] pEList0 :: IParser (Expr ()) pEList0 = do char '[' -- special syntax, no need for pKeySym es <- sepBy pExpr (inlineWhite >> char ',') inlineWhite char ']' return (EList () es) pEVar0 :: IParser (Expr ()) pEVar0 = EVar () <$> pIdentifier0 InBlock Lowercase pECon0 :: IParser (Expr ()) pECon0 = ECon () <$> pIdentifier0 InBlock Uppercase pEParens0 :: IParser (Expr ()) pEParens0 = do char '(' e <- pExpr inlineWhite char ')' return e -} data Associativity = AssocLeft | AssocRight | AssocNone deriving (Show, Eq) data ParsedOperator = PaOp Operator Int Associativity deriving (Show) pInfixOp :: Case care -> FParser (WithCaseOutput care ParsedOperator) pInfixOp cs = do inlineWhite case cs of Lowercase -> pLowerInfixOp0 Uppercase -> pUpperInfixOp0 Don'tCare -> faasum' [(Lowercase,) <$> pLowerInfixOp0 ,(Uppercase,) <$> pUpperInfixOp0] pLowerInfixOp0 :: FParser ParsedOperator pLowerInfixOp0 = faasum' [PaOp OEqu 4 AssocNone <$ pKeySym "==" ,PaOp OAdd 6 AssocLeft <$ pKeySym "+" ,PaOp OSub 6 AssocLeft <$ pKeySym "-" ,PaOp OMul 7 AssocLeft <$ pKeySym "*" ,PaOp ODiv 7 AssocLeft <$ pKeySym "/" ,PaOp OMod 7 AssocLeft <$ pKeySym "%" ,PaOp OPow 8 AssocRight <$ pKeySym "^" ] pUpperInfixOp0 :: FParser ParsedOperator pUpperInfixOp0 = faasum' [PaOp OCons 5 AssocRight <$ pKeySym ":"] pStandaloneTypesig0 :: FParser (Name, Type) pStandaloneTypesig0 = do name <- pIdentifier0 AtLeft Lowercase WCBacktrack inlineWhite pKeySym "::" noFail $ pushContext ("type signature for " ++ pretty name) $ do ty <- pType <|>> (raise Error "Expected type" >> return (TTup [])) return (name, ty) pType :: FParser Type pType = do ty1 <- pTypeApp facatch (return ty1) $ do inlineWhite pKeySym "->" noFail $ do ty2 <- pType <|>> (raise Error "Expected type" >> return (TTup [])) return (TFun ty1 ty2) pTypeApp :: FParser Type pTypeApp = fasome pTypeAtom >>= \case t :| [] -> return t t :| ts -> return (TApp t ts) pTypeAtom :: FParser Type pTypeAtom = faasum' [pTypeParens, pTypeList, pTypeName] where pTypeParens = do inlineWhite char '(' faasum' [do inlineWhite char ')' return (TTup []) ,do ty1 <- pType noFail $ do ty2s <- famany $ do inlineWhite char ',' noFail $ pType <|>> (raise Error "Expected type" >> return (TTup [])) inlineWhite char ')' <|>> raise Error "Expected closing ')'" case ty2s of [] -> return ty1 _ -> return (TTup (ty1 : ty2s))] pTypeList = do inlineWhite char '[' ty <- pType noFail $ char ']' <|>> raise Error "Expecte closing ']'" return (TList ty) pTypeName = do inlineWhite (cs, name) <- pIdentifier0 InBlock Don'tCare () case cs of Uppercase -> return (TCon name) Lowercase -> return (TVar name) -- | Parse the given name-like keyword, ensuring that it is the entire word. pKeyword :: String -> FParser () pKeyword s = do string s -- traceM $ "pKeyword: parsed " ++ show s notFollowedBy (() <$ satisfy isNameContChar) -- | Parse the given symbol-like keyword, ensuring that it is the entire symbol. pKeySym :: String -> FParser () pKeySym s = do string s notFollowedBy (() <$ satisfy isSymbolChar) data Case care where Uppercase :: Case 'True Lowercase :: Case 'True Don'tCare :: Case 'False deriving instance Show (Case care) type family WithCaseOutput care a where WithCaseOutput 'True a = a WithCaseOutput 'False a = (Case 'True, a) type family If c a b where If 'True a b = a If 'False a b = b data WrongCaseBacktrack = WCBacktrack -- ^ If a word was found but it had the wrong case, fail and backtrack. | WCAssume -- ^ Be certain that this case is expected here, and assume incorrect -- case is a typo. deriving (Show) -- | Consumes an identifier (word or parenthesised operator) at the current -- position. The `var` production in Haskell2010. -- var -> varid | "(" varsym ")" pIdentifier0 :: BlockPos -> Case care -> If care WrongCaseBacktrack () -> FParser (WithCaseOutput care Name) pIdentifier0 bpos cs wrongcase = pAlphaName0 bpos cs wrongcase <|>> pParens0 (pSymbol0 bpos cs) where -- | Parser between parens, with the opening paren at the current position. pParens0 :: FParser a -> FParser a pParens0 p = do char '(' inlineWhite res <- p inlineWhite char ')' return res -- | Consumes a word-like name at the current position with the given case. The -- `varid` production in Haskell2010 for 'Lowercase', `conid' for 'Uppercase'. -- -- > varid -> (small {small | large | digit | "'"}) without reservedid pAlphaName0 :: BlockPos -> Case care -> If care WrongCaseBacktrack () -> FParser (WithCaseOutput care Name) pAlphaName0 bpos cs wrongcase = do startPos <- gets psCur (_, s) <- readToken bpos (\atfst mc -> case (atfst, mc) of (True , Just c) | isNameHeadChar c -> Just (Right False) (True , _ ) -> Nothing (False, Just c) | isNameContChar c -> Just (Right False) (False, _ ) -> Just (Left ())) True faguard (s `notElem` ["case", "class", "data", "default", "deriving", "do", "else" ,"foreign", "if", "import", "in", "infix", "infixl" ,"infixr", "instance", "let", "module", "newtype", "of" ,"then", "type", "where", "_"]) (name, adjoin) <- case cs of Uppercase | isLower (head s) -> case wrongcase of WCBacktrack -> faempty WCAssume -> noFail $ do raiseAt startPos Error "Unexpected uppercase word at this position, assuming typo" return (toUpper (head s) : tail s, id) | otherwise -> return (s, id) Lowercase | isUpper (head s) -> case wrongcase of WCBacktrack -> faempty WCAssume -> noFail $ do raiseAt startPos Error "Unexpected lowercase word at this position, assuming typo" return (toLower (head s) : tail s, id) | otherwise -> return (s, id) Don'tCare | isLower (head s) -> return (s, (Lowercase,)) | otherwise -> return (s, (Uppercase,)) return (adjoin (Name name)) isNameHeadChar :: Char -> Bool isNameHeadChar c = isLetter c || c == '_' isNameContChar :: Char -> Bool isNameContChar c = isNameHeadChar c || isDigit c || c == '\'' -- | Consumes a symbol at the current position. The `varsym` production in -- Haskell2010 for 'Lowercase', `consym` otherwise, and either if 'Don'tCare'. -- -- > varsym -> ((symbol without ":") {symbol}) without (reservedop | dashes) -- > consym -> (":" {symbol}) without reservedop -- > symbol -> ascSymbol | uniSymbol without (special | "_" | "\"" | "'") -- > ascSymbol -> ```!#$%&⋆+./<=>?@^|-~:``` -- > uniSymbol -> unicode symbol or punctuation -- > dashes -> "--" {"-"} -- > special -> ```(),;[]`{}``` -- > reservedop -> ".." | ":" | "::" | "=" | "\" | "|" | "<-" | "->" | "@" | "~" | "=>" pSymbol0 :: BlockPos -> Case care -> FParser (WithCaseOutput care Name) pSymbol0 bpos cs = do case bpos of AtLeft -> noFail $ assertAtBlockLeft Fatal "Expected symbol, but found indented expression" InBlock -> noFail $ assertInsideBlock Fatal "Expected symbol, but found end of indented expression" (c1, adjoin) <- case cs of Lowercase -> (,id) <$> satisfy (\c -> isSymbolChar c && c /= ':') Uppercase -> (,id) <$> satisfy (== ':') Don'tCare -> do c1 <- satisfy (\c -> isSymbolChar c) return (c1, if c1 == ':' then (Uppercase,) else (Lowercase,)) crest <- noFail $ famany (satisfy isSymbolChar) let name = c1 : crest faguard (name `notElem` ["..", ":", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]) faguard (take 2 name /= "--") return (adjoin (Name name)) isSymbolChar :: Char -> Bool isSymbolChar c = (isAscSymbol || isUniSymbol) && not isSpecialExt where isSpecialExt = c `elem` "(),;[]`{}_\"'" isAscSymbol = c `elem` "!#$%&⋆+./<=>?@^|-~:" isUniSymbol = ord c > 127 && (isSymbol c || isPunctuation c) sepBy1 :: FParser a -> FParser sep -> FParser [a] sepBy1 p psep = do x1 <- p (psep >> (x1 :) <$> sepBy1 p psep) <|>> pure [x1] sepBy :: FParser a -> FParser sep -> IParser [a] sepBy p psep = sepBy1 p psep <|>> pure [] -- | Start a new layout block at the current position. The old layout block is -- restored after completion of this subparser. startLayoutBlock :: IParser a -> IParser a startLayoutBlock p = do ps0 <- get put (ps0 { psBlk = psCur ps0 }) res <- p modify (\ps -> ps { psBlk = psBlk ps0 }) return res data Fatality fatal where Error :: Fatality 'False Fatal :: Fatality 'True deriving instance Show (Fatality fatal) type family FatalCtx fatal a where FatalCtx 'False a = a ~ () FatalCtx 'True a = () raise_ :: Fatality fatal -> String -> IParser () raise_ Error = raise Error raise_ Fatal = raise Fatal raise :: FatalCtx fatal a => Fatality fatal -> String -> IParser a raise fat msg = gets psCur >>= \pos -> raiseAt pos fat msg -- | Raise an error with the given fatality and description. raiseAt :: FatalCtx fatal a => Pos -> Fatality fatal -> String -> IParser a raiseAt pos fat msg = do Context { ctxFile = fp , ctxStack = stk } <- ask let err = ErrMsg fp stk pos msg case fat of Error -> dictate (pure err) Fatal -> confess (pure err) describeLocation :: IParser String describeLocation = do fp <- asks ctxFile cur <- gets psCur return $ fp ++ ":" ++ show (posLine cur + 1) ++ ":" ++ show (posCol cur + 1) -- | Registers a scope description on the stack for error reporting. pushContext :: String -> Parser fail a -> Parser fail a pushContext descr = local (\c -> c { ctxStack = descr : ctxStack c }) -- | Registers a scope description on the stack for error reporting, suffixed -- with the current parsing location. pushLocatedContext :: String -> Parser fail a -> Parser fail a pushLocatedContext descr p = do loc <- noFail describeLocation pushContext (descr ++ " at " ++ loc) p data BlockPos = AtLeft | InBlock deriving (Show) -- | Consumes a token at the current position, asserting that we are -- in the position indicated by the 'BlockPos' argument. The token is defined -- by a pure stateful parser. If encountering a newline or EOF, the parser is -- run on this character ('Nothing' for EOF); if this produces a result, the -- result is returned; otherwise, the parser fails. The newline is not consumed. readToken :: BlockPos -> (s -> Maybe Char -> Maybe (Either r s)) -> s -> FParser (r, String) readToken bpos f s0 = do case bpos of AtLeft -> noFail $ assertAtBlockLeft Fatal "Expected token, but found indented expression" InBlock -> noFail $ assertInsideBlock Fatal "Expected token, but found end of indented expression" let loop :: (s -> Maybe Char -> Maybe (Either r s)) -> s -> FParser (r, String) loop f' st = do ps <- get case psRest ps of [] | Just (Left res) <- f' st Nothing -> return (res, "") | otherwise -> faempty '\n' : _ | Just (Left res) <- f' st (Just '\n') -> return (res, "") c : cs -> case f' st (Just c) of Nothing -> faempty Just (Left res) -> return (res, "") Just (Right st') -> do let Pos line col = psCur ps put (ps { psCur = Pos line (col + 1), psRest = cs }) fmap (c :) <$> loop f' st' loop f s0 -- | Consumes all whitespace and comments (including newlines), but only if -- this then leaves the parser inside the current block. If not, succeeds and -- consumes nothing. inlineWhite :: Parser fail () inlineWhite = do ps <- get noFail skipWhiteComment whenM (noFail $ not <$> isInsideBlock) $ put ps -- | Consumes all whitespace and comments (including newlines). Note: this may -- end outside the current block. skipWhiteComment :: IParser () skipWhiteComment = do inlineSpaces _ <- famany (blockComment >> noFail inlineSpaces) optional_ lineComment optional_ (consumeNewline >> noFail skipWhiteComment) where -- | Consumes some inline whitespace. Stops before newlines. inlineSpaces :: IParser () inlineSpaces = readWhileInline isSpace -- | Consumes an delimited comment including both end markers. Note: this may -- end outside the current block. blockComment :: FParser () blockComment = do string "{-" -- no need for pKeySym here let loop = do faasum [string "-}" ,eof >> noFail (raise Error "Unfinished {- -} comment at end of file") ,blockComment >> noFail loop ,consumeNewline >> noFail loop] (readWhileInline (`notElem` "{-")) -- "-}" also starts with '-' noFail loop -- | Consumes a line comment marker and the rest of the line, excluding -- newline. lineComment :: FParser () lineComment = do -- '--!' is an operator, so we need to parse a whole symbol here. pKeySym "--" noFail $ readWhileInline (const True) -- | Raises an error if we're not currently at the given column. assertAtBlockLeft :: Fatality fatal -> String -> IParser () assertAtBlockLeft fat msg = whenM (not <$> isAtBlockLeft) $ raise_ fat msg -- | Raises an error if psCol is not greater than psRefCol. assertInsideBlock :: Fatality fatal -> String -> IParser () assertInsideBlock fat msg = whenM (not <$> isInsideBlock) $ raise_ fat msg -- | Raises an error if we're not currently at EOF. assertEOF :: Fatality fatal -> IParser () assertEOF fat = gets psRest >>= \case [] -> return () _ -> raise_ fat "Unexpected stuff" -- | Returns whether the current position is _within_ the current block, for -- soft-wrapping content. This means that col > blkCol. isInsideBlock :: IParser Bool isInsideBlock = do PS { psCur = cur, psBlk = blk } <- get return $ posLine cur >= posLine blk && posCol cur > posCol blk -- | Returns whether the current position is at the left border of the block; -- this is for list content such as function definitions or let bindings. This -- means that col == blkCol. isAtBlockLeft :: IParser Bool isAtBlockLeft = do PS { psCur = cur, psBlk = blk } <- get return $ posLine cur >= posLine blk && posCol cur == posCol blk -- | Consumes characters while the predicate holds or until (and excluding) -- a newline, whichever comes first. readWhileInline :: (Char -> Bool) -> IParser () readWhileInline p = do (taken, rest) <- span (\c -> p c && c /= '\n') <$> gets psRest modify (\ps -> ps { psCur = let Pos line col = psCur ps in Pos line (col + length taken) , psRest = rest }) -- | Consumes exactly one newline at the current position. consumeNewline :: FParser () consumeNewline = gets psRest >>= \case '\n' : rest -> modify (\ps -> ps { psCur = Pos (posLine (psCur ps) + 1) 0 , psRest = rest }) _ -> faempty -- | Consumes exactly one character, unequal to newline, at the current position. satisfy :: (Char -> Bool) -> FParser Char satisfy p = do -- traceM "entering satisfy" r <- gets psRest -- traceM "got rest" r `seq` return () -- traceM "seqd rest" -- traceM ("rest is " ++ show r) case r of c : rest | c /= '\n', p c -> do modify (\ps -> let Pos line col = psCur ps in ps { psCur = Pos line (col + 1) , psRest = rest }) return c _ -> faempty -- | Consumes exactly this character at the current position. Must not be a -- newline. char :: Char -> FParser () char c = string [c] -- | Consumes exactly this string at the current position. The string must not -- contain a newline. string :: String -> FParser () string s | any (== '\n') s = error "Newline in 'string' argument" string s = do ps <- get let Pos line col = psCur ps if take (length s) (psRest ps) == s then put (ps { psCur = Pos line (col + length s) , psRest = drop (length s) (psRest ps) }) else faempty lookAhead :: FParser a -> FParser a lookAhead p = do ps <- get success <- (Just <$> p) <|>> pure Nothing put ps -- restore state, as if nothing happened case success of Nothing -> faempty Just x -> return x notFollowedBy :: FParser () -> FParser () notFollowedBy p = do ps <- get success <- (False <$ p) <|>> pure True put ps -- restore state, as if nothing happened when (not success) faempty -- | Only succeeds at EOF. eof :: FParser () eof = gets psRest >>= \case [] -> return () _ -> faempty whenM :: (Monad m, Monoid a) => m Bool -> m a -> m a whenM mb mx = mb >>= \b -> if b then mx else return mempty optional_ :: FAlternative f => f 'Fallible a -> f 'Infallible () optional_ a = (() <$ a) <|>> pure ()