diff options
author | Tom Smeding <tom@tomsmeding.com> | 2024-02-18 21:55:46 +0100 |
---|---|---|
committer | Tom Smeding <tom@tomsmeding.com> | 2024-02-18 21:55:46 +0100 |
commit | d744fa7ae5e638c1ca16f400a49633a705208ce4 (patch) | |
tree | f5fe6a6aa6ed8f7ff00a6c4024dc7aa6777a892c /src/Parser.hs | |
parent | 78ffb5ed5fbda230675310b37f798c500a13ef11 (diff) |
WIP big parser refactor with better typing
Diffstat (limited to 'src/Parser.hs')
-rw-r--r-- | src/Parser.hs | 639 |
1 files changed, 363 insertions, 276 deletions
diff --git a/src/Parser.hs b/src/Parser.hs index 517ec64..9b3bfdb 100644 --- a/src/Parser.hs +++ b/src/Parser.hs @@ -8,6 +8,10 @@ {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE TypeOperators #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE InstanceSigs #-} -- I don't want a warning for 'head' and 'tail' in this file. But I also don't -- want GHCs before 9.8 to complain that they don't know the x-partial warning. {-# OPTIONS_GHC -Wno-unrecognised-warning-flags -Wno-x-partial #-} @@ -18,7 +22,7 @@ module Parser ( These(..), ) where -import Control.Applicative +-- import Control.Applicative import Control.Monad import Control.Monad.Chronicle import Control.Monad.Reader @@ -31,6 +35,8 @@ import Data.These import Debug.Trace import AST +import Control.FAlternative +import Pretty data Pos = Pos @@ -55,25 +61,32 @@ data Context = Context } deriving (Show) -newtype Parser a = Parser +type family BacktrackPath fail r where + BacktrackPath 'Fallible r = r + BacktrackPath 'Infallible r = () + +newtype Parser fail a = Parser { runParser :: forall r. Context -> PS -> (PS -> [ErrMsg] -> a -> r) -- ^ OK: some diagnostics, but parsing succeeded -> ([ErrMsg] -> r) -- ^ Fatal: error that prevented parsing from proceeding - -> r -- ^ Backtrack: alternative was exhausted without success + -> BacktrackPath fail r -- ^ Backtrack: alternative was exhausted without success -> r } -instance Functor Parser where +type IParser = Parser 'Infallible +type FParser = Parser 'Fallible + +instance Functor (Parser fail) where fmap f (Parser g) = Parser (\ctx ps kok kfat kbt -> g ctx ps (\ps' errs x -> kok ps' errs (f x)) kfat kbt) -instance Applicative Parser where +instance Applicative (Parser fail) where pure x = Parser (\_ ps kok _ _ -> kok ps [] x) (<*>) = ap -instance Monad Parser where +instance Monad (Parser fail) where Parser g >>= f = Parser $ \ctx ps kok kfat kbt -> g ctx ps (\ps1 errs x -> @@ -85,21 +98,29 @@ instance Monad Parser where (\errs -> kfat errs) kbt -instance Alternative Parser where - empty = Parser (\_ _ _ _ kbt -> kbt) - Parser f <|> Parser g = Parser $ \ctx ps kok kfat kbt -> +instance FAlternative Parser where + faempty = Parser (\_ _ _ _ kbt -> kbt) + Parser f <|>> Parser g = Parser $ \ctx ps kok kfat kbt -> f ctx ps kok kfat (g ctx ps kok kfat kbt) -instance MonadState PS Parser where + noFail (Parser f) = Parser $ \ctx ps kok kfat _ -> f ctx ps kok kfat () + + toFallible :: forall fail a. KnownFallible fail => Parser fail a -> Parser 'Fallible a + toFallible (Parser f) = Parser $ \ctx ps kok kfat kbt -> + f ctx ps kok kfat (case knownFallible @fail of + SFallible -> kbt + SInfallible -> ()) + +instance MonadState PS (Parser fail) where state f = Parser $ \_ ps kok _ _ -> let (x, ps') = f ps in kok ps' [] x -instance MonadReader Context Parser where +instance MonadReader Context (Parser fail) where reader f = Parser $ \ctx ps kok _ _ -> kok ps [] (f ctx) local f (Parser g) = Parser (\ctx -> g (f ctx)) -instance MonadChronicle [ErrMsg] Parser where +instance KnownFallible fail => MonadChronicle [ErrMsg] (Parser fail) where dictate errs = Parser $ \_ ps kok _ _ -> kok ps errs () confess errs = Parser $ \_ _ _ kfat _ -> kfat errs memento (Parser f) = Parser $ \ctx ps kok _ kbt -> @@ -107,7 +128,7 @@ instance MonadChronicle [ErrMsg] Parser where (\ps' errs x -> kok ps' errs (Right x)) (\errs -> kok ps [] (Left errs)) kbt - absolve def (Parser f) = Parser $ \ctx ps kok _ _ -> + absolve def (toFallible -> Parser f) = Parser $ \ctx ps kok _ _ -> f ctx ps kok (\_ -> kok ps [] def) @@ -149,44 +170,65 @@ parse fp source = [] -> That res _ -> These errs res) (\errs -> This errs) - (This [ErrMsg fp [] (Pos 0 0) "Parse error, no grammar alternatives match your source"]) + () -- (This [ErrMsg fp [] (Pos 0 0) "Parse error, no grammar alternatives match your source"]) -pProgram :: Parser (Program ()) +pProgram :: IParser (Program ()) pProgram = do - defs <- many pTopDef + defs <- pTopDefs let (datadefs, fundefs) = partitionEithers defs skipWhiteComment assertEOF Error return (Program datadefs fundefs) -pTopDef :: Parser (Either DataDef (FunDef ())) +pTopDefs :: IParser [Either DataDef (FunDef ())] +pTopDefs = do + faoptional pTopDef >>= \case + Nothing -> do + skipWhiteComment + faoptional eof >>= \case + Nothing -> do + raise Error "Unparseable content" + readWhileInline (const True) + pTopDefs -- will skip the possible newline + Just () -> return [] + Just defs -> do + defs2 <- pTopDefs + return (defs ++ defs2) + +pTopDef :: FParser [Either DataDef (FunDef ())] pTopDef = do - skipWhiteComment - isAtBlockLeft >>= \case - True -> Left <$> pDataDef0 <|> Right <$> pFunDef0 + noFail skipWhiteComment + noFail isAtBlockLeft >>= \case + True -> map Left <$> pDataDef0 <|>> map Right <$> pFunDef0 False -> do - raise Error "Skipping unparseable content" - readWhileInline (const True) + noFail $ raise Error "Skipping unparseable content" + noFail $ readWhileInline (const True) pTopDef -pDataDef0 :: Parser DataDef +pDataDef0 :: FParser [DataDef] pDataDef0 = do pKeyword "data" - inlineWhite - name <- pIdentifier0 InBlock Uppercase - params <- many (inlineWhite >> pIdentifier0 InBlock Lowercase) - cons <- pDatacons "=" - return (DataDef name params cons) + noFail $ do + inlineWhite + faoptional (pIdentifier0 InBlock Uppercase WCAssume) >>= \case + Nothing -> do + raise Error "Expected data declaration after 'data'" + return [] + Just name -> do + params <- famany (inlineWhite >> pIdentifier0 InBlock Lowercase WCBacktrack) + cons <- pDatacons "=" + return [DataDef name params cons] where - pDatacons :: String -> Parser [(Name, [Type])] + pDatacons :: String -> IParser [(Name, [Type])] pDatacons leader = do inlineWhite - pKeySym leader - inlineWhite - name <- pIdentifier0 InBlock Uppercase - fields <- many pTypeAtom - rest <- pDatacons "|" <|> return [] - return ((name, fields) : rest) + facatch (return []) $ do + pKeySym leader + inlineWhite + name <- pIdentifier0 InBlock Uppercase WCAssume + fields <- noFail $ famany pTypeAtom + rest <- noFail $ pDatacons "|" + return ((name, fields) : rest) data FunEqContext = FirstLine @@ -194,108 +236,133 @@ data FunEqContext | Continue Name deriving (Show) -pFunDef0 :: Parser (FunDef ()) -pFunDef0 = do - mtypesig <- optional pStandaloneTypesig0 - let mname = fst <$> mtypesig - mtype = snd <$> mtypesig - clause@(FunEq name _ _) <- pFunEq (maybe FirstLine TypeSig mname) - clauses <- many (pFunEq (Continue name)) - return (FunDef name mtype (clause :| clauses)) - --- | Given the name of the type signature, if any. -pFunEq :: FunEqContext -> Parser (FunEq ()) +pFunDef0 :: FParser [FunDef ()] +pFunDef0 = + faasum' + [do (name, typ) <- pStandaloneTypesig0 + noFail $ do + faoptional (pFunEq (TypeSig name)) >>= \case + Nothing -> do + raise Error $ "Expected function equation for " ++ pretty name ++ + " after type signature" + return [] + Just [] -> return [FunDef name (Just typ) (FunEq name [] (Plain (ETup () [])) :| [])] + Just (clause1 : clauses1) -> do + clauses <- concat <$> famany (pFunEq (Continue name)) + return [FunDef name (Just typ) (clause1 :| clauses1 ++ clauses)] + ,do clause1@(FunEq name _ _) : clauses1 <- pFunEq FirstLine + noFail $ do + clauses <- concat <$> famany (pFunEq (Continue name)) + return [FunDef name Nothing (clause1 :| clauses1 ++ clauses)]] + +-- | Given the name from the type signature or a previous clause, if any. +pFunEq :: FunEqContext -> FParser [FunEq ()] pFunEq fectx = do - skipWhiteComment - pushLocatedContext "funeq" $ do - isAtBlockLeft >>= guard - -- assertAtBlockLeft Fatal "Expected function clause, found indented stuff" - - name <- pIdentifier0 AtLeft Lowercase - case fectx of - FirstLine -> return () - TypeSig checkName -> - when (name /= checkName) $ - raise Fatal "Name of function clause does not correspond with type signature" - Continue checkName -> - guard (name == checkName) - - pats <- many (pPattern 11) - rhs <- pRHS "=" - return (FunEq name pats rhs) + noFail skipWhiteComment + faguardM isAtBlockLeft + + pushLocatedContext "function equation" $ do + name <- pIdentifier0 AtLeft Lowercase WCAssume + + -- We want to do various checks with what came before, and there are + -- multiple branches where we decide to continue parsing this equation. To + -- avoid code duplication or an early exit monad, we use a boolean here. + success <- case fectx of + FirstLine -> return True + TypeSig checkName + | name == checkName -> return True + | otherwise -> noFail $ do + raise Error $ "Name of function clause does not correspond with type signature: " ++ + pretty checkName + return False + Continue checkName -> do + faguard (name == checkName) -- this can still backtrack out of pFunEq + return True + + noFail $ if success + then do + pats <- famany (pPattern 11) + rhs <- pRHS "=" + return [FunEq name pats rhs] + else return [] -- | Pass "=" for function definitions and "->" for case clauses. -pRHS :: String -> Parser (RHS ()) +pRHS :: String -> IParser (RHS ()) pRHS sepsym = do -- TODO: parse guards inlineWhite - pKeySym sepsym <|> raise Error ("Expected " ++ show sepsym) - expr <- pExpr <|> (raise Error "Expected expression" >> return (ETup () [])) + pKeySym sepsym <|>> raise Error ("Expected " ++ show sepsym) + expr <- pExpr <|>> (raise Error "Expected expression" >> return (ETup () [])) return (Plain expr) -pPattern :: Int -> Parser (Pattern ()) +pPattern :: Int -> FParser (Pattern ()) pPattern d = inlineWhite >> pPattern0 d -pPattern0 :: Int -> Parser (Pattern ()) +pPattern0 :: Int -> FParser (Pattern ()) pPattern0 d = do p0 <- pPatExprAtom0 (max 10 d) climbRight pPattern (pInfixOp Uppercase) (POp ()) d p0 Nothing -pPatExprAtom0 :: Int -> Parser (Pattern ()) +pExpr :: FParser (Expr ()) +pExpr = do + inlineWhite + -- basics: lit, list, var, con, tup + -- expression atom: application of basics + -- expression parser: op + -- around: let, case, if + pushLocatedContext "expression" $ do + faasum' [pELet0 + ,pECase0 + ,pEIf0 + ,pExprOpExpr0 0] + +pPatExprAtom0 :: Int -> FParser (Pattern ()) pPatExprAtom0 d = - asum [pPatWildcard0 - ,pPatVarOrAs0 - ,pPatCon0 - ,pPatList0 - ,pPatParens0] + faasum' [pPatWildcard0 + ,pPatVarOrAs0 + ,pPatCon0 + ,pPatList0 + ,pPatParens0] where pPatWildcard0 = pKeySym "_" >> return (PWildcard ()) pPatVarOrAs0 = do - var <- pIdentifier0 InBlock Lowercase - asum [do inlineWhite - pKeySym "@" - p <- pPattern 11 - return (PAs () var p) - ,return (PVar () var)] + var <- pIdentifier0 InBlock Lowercase WCBacktrack + facatch (return (PVar () var)) $ do + inlineWhite + pKeySym "@" + noFail $ do + p <- pPattern 11 <|>> (raise Error "Expected pattern after '@'" >> return (PWildcard ())) + return (PAs () var p) pPatCon0 = do - con <- pIdentifier0 InBlock Uppercase - if d > 10 + con <- pIdentifier0 InBlock Uppercase WCBacktrack + noFail $ if d > 10 then return (PCon () con []) - else do args <- many (pPattern 11) + else do args <- famany (pPattern 11) return (PCon () con args) pPatList0 = do char '[' -- special syntax, no need for pKeySym - ps <- pPattern 0 `sepBy` (inlineWhite >> char ',') - inlineWhite - char ']' - return (PList () ps) + noFail $ do + ps <- pPattern 0 `sepBy` (inlineWhite >> char ',') + inlineWhite + char ']' <|>> raise Error "Expected ']'" + return (PList () ps) pPatParens0 = do char '(' inlineWhite - asum [do char ')' - return (PTup () []) - ,do p <- pPattern0 0 - inlineWhite - asum [do char ')' - return p - ,do char ',' - ps <- pPattern 0 `sepBy1` (inlineWhite >> char ',') - return (PTup () (p : ps))]] - -pExpr :: Parser (Expr ()) -pExpr = do - inlineWhite - -- basics: lit, list, var, con, tup - -- expression atom: application of basics - -- expression parser: op - -- around: let, case, if - pushLocatedContext "expression" $ do - asum [pELet0 - ,pECase0 - ,pEIf0 - ,pExprOpExpr0 0] - -pELet0 :: Parser (Expr ()) + faasum' + [do char ')' + return (PTup () []) + ,do p <- pPattern0 0 + inlineWhite + faasum' + [do char ')' + return p + ,do char ',' + ps <- pPattern 0 `sepBy1` (inlineWhite >> char ',') + return (PTup () (p : ps))]] + +{- +pELet0 :: IParser (Expr ()) pELet0 = do pKeyword "let" inlineWhite @@ -322,7 +389,7 @@ pELet0 = do ,do raise Error "Expected 'in' after 'let'" return (ELet () defs (ETup () []))] -pECase0 :: Parser (Expr ()) +pECase0 :: IParser (Expr ()) pECase0 = do pKeyword "case" e <- pExpr @@ -341,7 +408,7 @@ pECase0 = do clauses <- many pClause return (ECase () e clauses) -pEIf0 :: Parser (Expr ()) +pEIf0 :: IParser (Expr ()) pEIf0 = do pKeyword "if" e1 <- pExpr @@ -353,36 +420,38 @@ pEIf0 = do e3 <- pExpr return (EIf () e1 e2 e3) -pExprOpExpr :: Int -> Parser (Expr ()) +pExprOpExpr :: Int -> IParser (Expr ()) pExprOpExpr d = inlineWhite >> pExprOpExpr0 d -pExprOpExpr0 :: Int -> Parser (Expr ()) +pExprOpExpr0 :: Int -> IParser (Expr ()) pExprOpExpr0 d = do e0 <- pEApp0 climbRight pExprOpExpr (snd <$> pInfixOp Don'tCare) (EOp ()) d e0 Nothing +-} climbRight - :: (Int -> Parser e) -- ^ Parse an expression at the given precedence level - -> Parser ParsedOperator -- ^ Parse an operator + :: (Int -> FParser e) -- ^ Parse an expression at the given precedence level + -> FParser ParsedOperator -- ^ Parse an operator -> (e -> Operator -> e -> e) -- ^ Build an operator application experssion -> Int -- ^ Ambient precedence level: minimum precedence of top-level operator in result -> e -- ^ lhs: Initial non-operator expression already parsed -> Maybe ParsedOperator -- ^ Top-level operator in lhs (initialise with Nothing) - -> Parser e + -> FParser e climbRight pExpr' pOper makeOp d lhs mlhsop = - asum [do paop@(PaOp op d2 a2) <- pOper - guard (d2 >= d) -- respect global minimum precedence - case mlhsop of -- check operator compatibility - Just (PaOp _ d1 a1) -> - guard (d1 > d2 || (d1 == d2 && a1 == a2 && a1 /= AssocNone)) - Nothing -> - return () - let oprhsd = case a2 of AssocRight -> d2 ; _ -> d2 + 1 - rhs <- pExpr' oprhsd - climbRight pExpr' pOper makeOp d (makeOp lhs op rhs) (Just paop) - ,return lhs] - -pEApp0 :: Parser (Expr ()) + facatch (return lhs) $ do + paop@(PaOp op d2 a2) <- pOper + faguard (d2 >= d) -- respect global minimum precedence + case mlhsop of -- check operator compatibility + Just (PaOp _ d1 a1) -> + faguard (d1 > d2 || (d1 == d2 && a1 == a2 && a1 /= AssocNone)) + Nothing -> + return () + let oprhsd = case a2 of AssocRight -> d2 ; _ -> d2 + 1 + rhs <- pExpr' oprhsd + climbRight pExpr' pOper makeOp d (makeOp lhs op rhs) (Just paop) + +{- +pEApp0 :: IParser (Expr ()) pEApp0 = do e1 <- pEAtom0 es <- many (inlineWhite >> pEAtom0) @@ -390,10 +459,10 @@ pEApp0 = do [] -> return e1 _ -> return (EApp () e1 es) -pEAtom0 :: Parser (Expr ()) +pEAtom0 :: IParser (Expr ()) pEAtom0 = (ELit () <$> pLiteral0) <|> pEList0 <|> pEVar0 <|> pECon0 <|> pEParens0 -pLiteral0 :: Parser Literal +pLiteral0 :: IParser Literal pLiteral0 = asum [do as <- some (satisfy isDigit) let a = read as :: Integer @@ -417,7 +486,7 @@ pLiteral0 = asum char '"' return (LString s)] -pStringChar :: Parser Char +pStringChar :: IParser Char pStringChar = asum [do char '\\' char 'x' @@ -443,7 +512,7 @@ pStringChar = asum return '?' ,do satisfy (\c -> c `notElem` "\n\r\\\'")] -pEList0 :: Parser (Expr ()) +pEList0 :: IParser (Expr ()) pEList0 = do char '[' -- special syntax, no need for pKeySym es <- sepBy pExpr (inlineWhite >> char ',') @@ -451,19 +520,20 @@ pEList0 = do char ']' return (EList () es) -pEVar0 :: Parser (Expr ()) +pEVar0 :: IParser (Expr ()) pEVar0 = EVar () <$> pIdentifier0 InBlock Lowercase -pECon0 :: Parser (Expr ()) +pECon0 :: IParser (Expr ()) pECon0 = ECon () <$> pIdentifier0 InBlock Uppercase -pEParens0 :: Parser (Expr ()) +pEParens0 :: IParser (Expr ()) pEParens0 = do char '(' e <- pExpr inlineWhite char ')' return e +-} data Associativity = AssocLeft | AssocRight | AssocNone deriving (Show, Eq) @@ -471,99 +541,99 @@ data Associativity = AssocLeft | AssocRight | AssocNone data ParsedOperator = PaOp Operator Int Associativity deriving (Show) -pInfixOp :: Case care -> Parser (WithCaseOutput care ParsedOperator) +pInfixOp :: Case care -> FParser (WithCaseOutput care ParsedOperator) pInfixOp cs = do inlineWhite case cs of Lowercase -> pLowerInfixOp0 Uppercase -> pUpperInfixOp0 - Don'tCare -> asum [(Lowercase,) <$> pLowerInfixOp0 - ,(Uppercase,) <$> pUpperInfixOp0] + Don'tCare -> faasum' [(Lowercase,) <$> pLowerInfixOp0 + ,(Uppercase,) <$> pUpperInfixOp0] -pLowerInfixOp0 :: Parser ParsedOperator +pLowerInfixOp0 :: FParser ParsedOperator pLowerInfixOp0 = - asum [PaOp OEqu 4 AssocNone <$ pKeySym "==" - ,PaOp OAdd 6 AssocLeft <$ pKeySym "+" - ,PaOp OSub 6 AssocLeft <$ pKeySym "-" - ,PaOp OMul 7 AssocLeft <$ pKeySym "*" - ,PaOp ODiv 7 AssocLeft <$ pKeySym "/" - ,PaOp OMod 7 AssocLeft <$ pKeySym "%" - ,PaOp OPow 8 AssocRight <$ pKeySym "^" - ] - -pUpperInfixOp0 :: Parser ParsedOperator + faasum' [PaOp OEqu 4 AssocNone <$ pKeySym "==" + ,PaOp OAdd 6 AssocLeft <$ pKeySym "+" + ,PaOp OSub 6 AssocLeft <$ pKeySym "-" + ,PaOp OMul 7 AssocLeft <$ pKeySym "*" + ,PaOp ODiv 7 AssocLeft <$ pKeySym "/" + ,PaOp OMod 7 AssocLeft <$ pKeySym "%" + ,PaOp OPow 8 AssocRight <$ pKeySym "^" + ] + +pUpperInfixOp0 :: FParser ParsedOperator pUpperInfixOp0 = - asum [PaOp OCons 5 AssocRight <$ pKeySym ":"] + faasum' [PaOp OCons 5 AssocRight <$ pKeySym ":"] -pStandaloneTypesig0 :: Parser (Name, Type) +pStandaloneTypesig0 :: FParser (Name, Type) pStandaloneTypesig0 = do - isAtBlockLeft >>= guard - -- assertAtBlockLeft Fatal "Expected definition, found indented stuff" - name@(Name namestr) <- pIdentifier0 AtLeft Lowercase + name <- pIdentifier0 AtLeft Lowercase WCBacktrack inlineWhite pKeySym "::" - pushContext ("type signature for '" ++ namestr ++ "'") $ do - ty <- pType - return (name, ty) + noFail $ pushContext ("type signature for " ++ pretty name) $ do + ty <- pType <|>> (raise Error "Expected type" >> return (TTup [])) + return (name, ty) -pType :: Parser Type +pType :: FParser Type pType = do ty1 <- pTypeApp - asum [do inlineWhite - pKeySym "->" - ty2 <- pType - return (TFun ty1 ty2) - ,return ty1] - -pTypeApp :: Parser Type -pTypeApp = many pTypeAtom >>= \case - [] -> raise Fatal "Expected type" - [t] -> return t - t:ts -> return (TApp t ts) - -pTypeAtom :: Parser Type -pTypeAtom = pTypeParens <|> pTypeList <|> pTypeName + facatch (return ty1) $ do + inlineWhite + pKeySym "->" + noFail $ do + ty2 <- pType <|>> (raise Error "Expected type" >> return (TTup [])) + return (TFun ty1 ty2) + +pTypeApp :: FParser Type +pTypeApp = fasome pTypeAtom >>= \case + t :| [] -> return t + t :| ts -> return (TApp t ts) + +pTypeAtom :: FParser Type +pTypeAtom = faasum' [pTypeParens, pTypeList, pTypeName] where pTypeParens = do inlineWhite char '(' - asum [do inlineWhite - char ')' - return (TTup []) - ,do ty1 <- pType - ty2s <- many $ do - inlineWhite - char ',' - pType - inlineWhite - char ')' - case ty2s of - [] -> return ty1 - _ -> return (TTup (ty1 : ty2s))] + faasum' + [do inlineWhite + char ')' + return (TTup []) + ,do ty1 <- pType + noFail $ do + ty2s <- famany $ do + inlineWhite + char ',' + noFail $ pType <|>> (raise Error "Expected type" >> return (TTup [])) + inlineWhite + char ')' <|>> raise Error "Expected closing ')'" + case ty2s of + [] -> return ty1 + _ -> return (TTup (ty1 : ty2s))] pTypeList = do inlineWhite char '[' ty <- pType - char ']' + noFail $ char ']' <|>> raise Error "Expecte closing ']'" return (TList ty) pTypeName = do inlineWhite - (cs, name) <- pIdentifier0 InBlock Don'tCare + (cs, name) <- pIdentifier0 InBlock Don'tCare () case cs of Uppercase -> return (TCon name) Lowercase -> return (TVar name) -- | Parse the given name-like keyword, ensuring that it is the entire word. -pKeyword :: String -> Parser () +pKeyword :: String -> FParser () pKeyword s = do string s -- traceM $ "pKeyword: parsed " ++ show s notFollowedBy (() <$ satisfy isNameContChar) -- | Parse the given symbol-like keyword, ensuring that it is the entire symbol. -pKeySym :: String -> Parser () +pKeySym :: String -> FParser () pKeySym s = do string s notFollowedBy (() <$ satisfy isSymbolChar) @@ -578,14 +648,25 @@ type family WithCaseOutput care a where WithCaseOutput 'True a = a WithCaseOutput 'False a = (Case 'True, a) +type family If c a b where + If 'True a b = a + If 'False a b = b + +data WrongCaseBacktrack + = WCBacktrack -- ^ If a word was found but it had the wrong case, fail and backtrack. + | WCAssume -- ^ Be certain that this case is expected here, and assume incorrect + -- case is a typo. + deriving (Show) + -- | Consumes an identifier (word or parenthesised operator) at the current -- position. The `var` production in Haskell2010. -- var -> varid | "(" varsym ")" -pIdentifier0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name) -pIdentifier0 bpos cs = pAlphaName0 bpos cs <|> pParens0 (pSymbol0 bpos cs) +pIdentifier0 :: BlockPos -> Case care -> If care WrongCaseBacktrack () -> FParser (WithCaseOutput care Name) +pIdentifier0 bpos cs wrongcase = + pAlphaName0 bpos cs wrongcase <|>> pParens0 (pSymbol0 bpos cs) where -- | Parser between parens, with the opening paren at the current position. - pParens0 :: Parser a -> Parser a + pParens0 :: FParser a -> FParser a pParens0 p = do char '(' inlineWhite @@ -598,8 +679,8 @@ pIdentifier0 bpos cs = pAlphaName0 bpos cs <|> pParens0 (pSymbol0 bpos cs) -- `varid` production in Haskell2010 for 'Lowercase', `conid' for 'Uppercase'. -- -- > varid -> (small {small | large | digit | "'"}) without reservedid -pAlphaName0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name) -pAlphaName0 bpos cs = do +pAlphaName0 :: BlockPos -> Case care -> If care WrongCaseBacktrack () -> FParser (WithCaseOutput care Name) +pAlphaName0 bpos cs wrongcase = do startPos <- gets psCur (_, s) <- readToken bpos @@ -609,20 +690,24 @@ pAlphaName0 bpos cs = do (False, Just c) | isNameContChar c -> Just (Right False) (False, _ ) -> Just (Left ())) True - guard (s `notElem` ["case", "class", "data", "default", "deriving", "do", "else" - ,"foreign", "if", "import", "in", "infix", "infixl" - ,"infixr", "instance", "let", "module", "newtype", "of" - ,"then", "type", "where", "_"]) + faguard (s `notElem` ["case", "class", "data", "default", "deriving", "do", "else" + ,"foreign", "if", "import", "in", "infix", "infixl" + ,"infixr", "instance", "let", "module", "newtype", "of" + ,"then", "type", "where", "_"]) (name, adjoin) <- case cs of Uppercase - | isLower (head s) -> do - raiseAt startPos Error "Unexpected uppercase word at this position, assuming typo" - return (toUpper (head s) : tail s, id) + | isLower (head s) -> case wrongcase of + WCBacktrack -> faempty + WCAssume -> noFail $ do + raiseAt startPos Error "Unexpected uppercase word at this position, assuming typo" + return (toUpper (head s) : tail s, id) | otherwise -> return (s, id) Lowercase - | isUpper (head s) -> do - raiseAt startPos Error "Unexpected lowercase word at this position, assuming typo" - return (toLower (head s) : tail s, id) + | isUpper (head s) -> case wrongcase of + WCBacktrack -> faempty + WCAssume -> noFail $ do + raiseAt startPos Error "Unexpected lowercase word at this position, assuming typo" + return (toLower (head s) : tail s, id) | otherwise -> return (s, id) Don'tCare | isLower (head s) -> return (s, (Lowercase,)) @@ -646,20 +731,20 @@ isNameContChar c = isNameHeadChar c || isDigit c || c == '\'' -- > dashes -> "--" {"-"} -- > special -> ```(),;[]`{}``` -- > reservedop -> ".." | ":" | "::" | "=" | "\" | "|" | "<-" | "->" | "@" | "~" | "=>" -pSymbol0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name) +pSymbol0 :: BlockPos -> Case care -> FParser (WithCaseOutput care Name) pSymbol0 bpos cs = do case bpos of - AtLeft -> assertAtBlockLeft Fatal "Expected symbol, but found indented expression" - InBlock -> assertInsideBlock Fatal "Expected symbol, but found end of indented expression" + AtLeft -> noFail $ assertAtBlockLeft Fatal "Expected symbol, but found indented expression" + InBlock -> noFail $ assertInsideBlock Fatal "Expected symbol, but found end of indented expression" (c1, adjoin) <- case cs of Lowercase -> (,id) <$> satisfy (\c -> isSymbolChar c && c /= ':') Uppercase -> (,id) <$> satisfy (== ':') Don'tCare -> do c1 <- satisfy (\c -> isSymbolChar c) return (c1, if c1 == ':' then (Uppercase,) else (Lowercase,)) - crest <- many (satisfy isSymbolChar) + crest <- noFail $ famany (satisfy isSymbolChar) let name = c1 : crest - guard (name `notElem` ["..", ":", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]) - guard (take 2 name /= "--") + faguard (name `notElem` ["..", ":", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]) + faguard (take 2 name /= "--") return (adjoin (Name name)) isSymbolChar :: Char -> Bool @@ -670,17 +755,17 @@ isSymbolChar c = (isAscSymbol || isUniSymbol) && not isSpecialExt isUniSymbol = ord c > 127 && (isSymbol c || isPunctuation c) -sepBy1 :: Parser a -> Parser sep -> Parser [a] +sepBy1 :: FParser a -> FParser sep -> FParser [a] sepBy1 p psep = do x1 <- p - (psep >> (x1 :) <$> sepBy1 p psep) <|> return [x1] + (psep >> (x1 :) <$> sepBy1 p psep) <|>> pure [x1] -sepBy :: Parser a -> Parser sep -> Parser [a] -sepBy p psep = sepBy1 p psep <|> return [] +sepBy :: FParser a -> FParser sep -> IParser [a] +sepBy p psep = sepBy1 p psep <|>> pure [] -- | Start a new layout block at the current position. The old layout block is -- restored after completion of this subparser. -startLayoutBlock :: Parser a -> Parser a +startLayoutBlock :: IParser a -> IParser a startLayoutBlock p = do ps0 <- get put (ps0 { psBlk = psCur ps0 }) @@ -697,15 +782,15 @@ type family FatalCtx fatal a where FatalCtx 'False a = a ~ () FatalCtx 'True a = () -raise' :: Fatality fatal -> String -> Parser () -raise' Error = raise Error -raise' Fatal = raise Fatal +raise_ :: Fatality fatal -> String -> IParser () +raise_ Error = raise Error +raise_ Fatal = raise Fatal -raise :: FatalCtx fatal a => Fatality fatal -> String -> Parser a +raise :: FatalCtx fatal a => Fatality fatal -> String -> IParser a raise fat msg = gets psCur >>= \pos -> raiseAt pos fat msg -- | Raise an error with the given fatality and description. -raiseAt :: FatalCtx fatal a => Pos -> Fatality fatal -> String -> Parser a +raiseAt :: FatalCtx fatal a => Pos -> Fatality fatal -> String -> IParser a raiseAt pos fat msg = do Context { ctxFile = fp , ctxStack = stk } <- ask let err = ErrMsg fp stk pos msg @@ -713,21 +798,21 @@ raiseAt pos fat msg = do Error -> dictate (pure err) Fatal -> confess (pure err) -describeLocation :: Parser String +describeLocation :: IParser String describeLocation = do fp <- asks ctxFile cur <- gets psCur return $ fp ++ ":" ++ show (posLine cur + 1) ++ ":" ++ show (posCol cur + 1) -- | Registers a scope description on the stack for error reporting. -pushContext :: String -> Parser a -> Parser a +pushContext :: String -> Parser fail a -> Parser fail a pushContext descr = local (\c -> c { ctxStack = descr : ctxStack c }) -- | Registers a scope description on the stack for error reporting, suffixed -- with the current parsing location. -pushLocatedContext :: String -> Parser a -> Parser a +pushLocatedContext :: String -> Parser fail a -> Parser fail a pushLocatedContext descr p = do - loc <- describeLocation + loc <- noFail describeLocation pushContext (descr ++ " at " ++ loc) p data BlockPos = AtLeft | InBlock @@ -738,20 +823,20 @@ data BlockPos = AtLeft | InBlock -- by a pure stateful parser. If encountering a newline or EOF, the parser is -- run on this character ('Nothing' for EOF); if this produces a result, the -- result is returned; otherwise, the parser fails. The newline is not consumed. -readToken :: BlockPos -> (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String) +readToken :: BlockPos -> (s -> Maybe Char -> Maybe (Either r s)) -> s -> FParser (r, String) readToken bpos f s0 = do case bpos of - AtLeft -> assertAtBlockLeft Fatal "Expected token, but found indented expression" - InBlock -> assertInsideBlock Fatal "Expected token, but found end of indented expression" - let loop :: (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String) + AtLeft -> noFail $ assertAtBlockLeft Fatal "Expected token, but found indented expression" + InBlock -> noFail $ assertInsideBlock Fatal "Expected token, but found end of indented expression" + let loop :: (s -> Maybe Char -> Maybe (Either r s)) -> s -> FParser (r, String) loop f' st = do ps <- get case psRest ps of [] | Just (Left res) <- f' st Nothing -> return (res, "") - | otherwise -> empty + | otherwise -> faempty '\n' : _ | Just (Left res) <- f' st (Just '\n') -> return (res, "") c : cs -> case f' st (Just c) of - Nothing -> empty + Nothing -> faempty Just (Left res) -> return (res, "") Just (Right st') -> do let Pos line col = psCur ps @@ -760,63 +845,65 @@ readToken bpos f s0 = do loop f s0 -- | Consumes all whitespace and comments (including newlines), but only if --- this then leaves the parser inside the current block. If not, this fails. -inlineWhite :: Parser () +-- this then leaves the parser inside the current block. If not, succeeds and +-- consumes nothing. +inlineWhite :: Parser fail () inlineWhite = do - skipWhiteComment - whenM (not <$> isInsideBlock) empty + ps <- get + noFail skipWhiteComment + whenM (noFail $ not <$> isInsideBlock) $ put ps -- | Consumes all whitespace and comments (including newlines). Note: this may -- end outside the current block. -skipWhiteComment :: Parser () +skipWhiteComment :: IParser () skipWhiteComment = do inlineSpaces - _ <- many (blockComment >> inlineSpaces) + _ <- famany (blockComment >> noFail inlineSpaces) optional_ lineComment - optional_ (consumeNewline >> skipWhiteComment) + optional_ (consumeNewline >> noFail skipWhiteComment) where -- | Consumes some inline whitespace. Stops before newlines. - inlineSpaces :: Parser () + inlineSpaces :: IParser () inlineSpaces = readWhileInline isSpace -- | Consumes an delimited comment including both end markers. Note: this may -- end outside the current block. -blockComment :: Parser () +blockComment :: FParser () blockComment = do string "{-" -- no need for pKeySym here let loop = do - readWhileInline (`notElem` "{-") -- "-}" also starts with '-' - asum [string "-}" - ,eof >> raise Error "Unfinished {- -} comment at end of file" - ,blockComment >> loop - ,consumeNewline >> loop] - loop + faasum [string "-}" + ,eof >> noFail (raise Error "Unfinished {- -} comment at end of file") + ,blockComment >> noFail loop + ,consumeNewline >> noFail loop] + (readWhileInline (`notElem` "{-")) -- "-}" also starts with '-' + noFail loop -- | Consumes a line comment marker and the rest of the line, excluding -- newline. -lineComment :: Parser () +lineComment :: FParser () lineComment = do -- '--!' is an operator, so we need to parse a whole symbol here. pKeySym "--" - readWhileInline (const True) + noFail $ readWhileInline (const True) -- | Raises an error if we're not currently at the given column. -assertAtBlockLeft :: Fatality fatal -> String -> Parser () -assertAtBlockLeft fat msg = whenM (not <$> isAtBlockLeft) $ raise' fat msg +assertAtBlockLeft :: Fatality fatal -> String -> IParser () +assertAtBlockLeft fat msg = whenM (not <$> isAtBlockLeft) $ raise_ fat msg -- | Raises an error if psCol is not greater than psRefCol. -assertInsideBlock :: Fatality fatal -> String -> Parser () -assertInsideBlock fat msg = whenM (not <$> isInsideBlock) $ raise' fat msg +assertInsideBlock :: Fatality fatal -> String -> IParser () +assertInsideBlock fat msg = whenM (not <$> isInsideBlock) $ raise_ fat msg -- | Raises an error if we're not currently at EOF. -assertEOF :: Fatality fatal -> Parser () +assertEOF :: Fatality fatal -> IParser () assertEOF fat = gets psRest >>= \case [] -> return () - _ -> raise' fat "Unexpected stuff" + _ -> raise_ fat "Unexpected stuff" -- | Returns whether the current position is _within_ the current block, for -- soft-wrapping content. This means that col > blkCol. -isInsideBlock :: Parser Bool +isInsideBlock :: IParser Bool isInsideBlock = do PS { psCur = cur, psBlk = blk } <- get return $ posLine cur >= posLine blk && posCol cur > posCol blk @@ -824,14 +911,14 @@ isInsideBlock = do -- | Returns whether the current position is at the left border of the block; -- this is for list content such as function definitions or let bindings. This -- means that col == blkCol. -isAtBlockLeft :: Parser Bool +isAtBlockLeft :: IParser Bool isAtBlockLeft = do PS { psCur = cur, psBlk = blk } <- get return $ posLine cur >= posLine blk && posCol cur == posCol blk -- | Consumes characters while the predicate holds or until (and excluding) -- a newline, whichever comes first. -readWhileInline :: (Char -> Bool) -> Parser () +readWhileInline :: (Char -> Bool) -> IParser () readWhileInline p = do (taken, rest) <- span (\c -> p c && c /= '\n') <$> gets psRest modify (\ps -> ps { psCur = let Pos line col = psCur ps @@ -839,14 +926,14 @@ readWhileInline p = do , psRest = rest }) -- | Consumes exactly one newline at the current position. -consumeNewline :: Parser () +consumeNewline :: FParser () consumeNewline = gets psRest >>= \case '\n' : rest -> modify (\ps -> ps { psCur = Pos (posLine (psCur ps) + 1) 0 , psRest = rest }) - _ -> empty + _ -> faempty -- | Consumes exactly one character, unequal to newline, at the current position. -satisfy :: (Char -> Bool) -> Parser Char +satisfy :: (Char -> Bool) -> FParser Char satisfy p = do -- traceM "entering satisfy" r <- gets psRest @@ -860,16 +947,16 @@ satisfy p = do in ps { psCur = Pos line (col + 1) , psRest = rest }) return c - _ -> empty + _ -> faempty -- | Consumes exactly this character at the current position. Must not be a -- newline. -char :: Char -> Parser () +char :: Char -> FParser () char c = string [c] -- | Consumes exactly this string at the current position. The string must not -- contain a newline. -string :: String -> Parser () +string :: String -> FParser () string s | any (== '\n') s = error "Newline in 'string' argument" string s = do ps <- get @@ -877,31 +964,31 @@ string s = do if take (length s) (psRest ps) == s then put (ps { psCur = Pos line (col + length s) , psRest = drop (length s) (psRest ps) }) - else empty + else faempty -lookAhead :: Parser a -> Parser a +lookAhead :: FParser a -> FParser a lookAhead p = do ps <- get - success <- absolve Nothing (Just <$> p) + success <- (Just <$> p) <|>> pure Nothing put ps -- restore state, as if nothing happened case success of - Nothing -> empty + Nothing -> faempty Just x -> return x -notFollowedBy :: Parser () -> Parser () +notFollowedBy :: FParser () -> FParser () notFollowedBy p = do ps <- get - success <- absolve True (False <$ p) + success <- (False <$ p) <|>> pure True put ps -- restore state, as if nothing happened - when (not success) empty + when (not success) faempty -- | Only succeeds at EOF. -eof :: Parser () +eof :: FParser () eof = gets psRest >>= \case [] -> return () - _ -> empty + _ -> faempty whenM :: (Monad m, Monoid a) => m Bool -> m a -> m a whenM mb mx = mb >>= \b -> if b then mx else return mempty -optional_ :: Alternative f => f a -> f () -optional_ a = (() <$ a) <|> pure () +optional_ :: FAlternative f => f 'Fallible a -> f 'Infallible () +optional_ a = (() <$ a) <|>> pure () |