diff options
author | Tom Smeding <tom@tomsmeding.com> | 2024-02-25 21:01:13 +0100 |
---|---|---|
committer | Tom Smeding <tom@tomsmeding.com> | 2024-02-25 21:01:13 +0100 |
commit | f72bf16e2edc8d654e661cd59f820409219e5f27 (patch) | |
tree | 2986fcd5421c474f50b76214eccea93cb74850e0 /src/Parser.hs | |
parent | b0c81ee7def783037b514af9fdeab06f7e3bdb13 (diff) |
Add HSVIS module prefix
Diffstat (limited to 'src/Parser.hs')
-rw-r--r-- | src/Parser.hs | 1016 |
1 files changed, 0 insertions, 1016 deletions
diff --git a/src/Parser.hs b/src/Parser.hs deleted file mode 100644 index 76cc10e..0000000 --- a/src/Parser.hs +++ /dev/null @@ -1,1016 +0,0 @@ -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE MultiParamTypeClasses #-} -{-# LANGUAGE TupleSections #-} -{-# LANGUAGE DataKinds #-} -{-# LANGUAGE GADTs #-} -{-# LANGUAGE StandaloneDeriving #-} -{-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE RankNTypes #-} -{-# LANGUAGE TypeOperators #-} -{-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TypeApplications #-} -{-# LANGUAGE ViewPatterns #-} -{-# LANGUAGE InstanceSigs #-} --- I don't want a warning for 'head' and 'tail' in this file. But I also don't --- want GHCs before 9.8 to complain that they don't know the x-partial warning. -{-# OPTIONS_GHC -Wno-unrecognised-warning-flags -Wno-x-partial #-} -module Parser ( - parse, - printErrMsg, - -- * Re-exports - These(..), -) where - --- import Control.Applicative -import Control.Monad -import Control.Monad.Chronicle -import Control.Monad.Reader -import Control.Monad.State.Lazy -import Data.Char -import Data.Either (partitionEithers) -import Data.Foldable -import Data.List (intercalate) -import Data.List.NonEmpty (NonEmpty(..)) -import Data.These - --- import Debug.Trace - -import AST -import Control.FAlternative -import Pretty - - -data Pos = Pos - { posLine :: Int -- ^ zero-based - , posCol :: Int -- ^ zero-based - } - deriving (Show) - --- Positions are zero-based in both dimensions. --- See 'isInsideBlock' and 'isAtBlockLeft' for the two relevant "inside the --- block" conditions. -data PS = PS - { psBlk :: Pos -- ^ Start of current layout block - , psCur :: Pos -- ^ Current parsing position - , psRest :: String -- ^ Rest of the input - } - deriving (Show) - -data Context = Context - { ctxFile :: FilePath - , ctxLines :: [String] -- ^ The file contents, split up in lines - , ctxStack :: [String] -- ^ Stack of syntax scopes, for error reporting - } - deriving (Show) - -type family BacktrackPath fail r where - BacktrackPath 'Fallible r = r - BacktrackPath 'Infallible r = () - -newtype Parser fail a = Parser - { runParser - :: forall r. - Context - -> PS - -> (PS -> [ErrMsg] -> a -> r) -- ^ OK: some diagnostics, but parsing succeeded - -> ([ErrMsg] -> r) -- ^ Fatal: error that prevented parsing from proceeding - -> BacktrackPath fail r -- ^ Backtrack: alternative was exhausted without success - -> r } - -type IParser = Parser 'Infallible -type FParser = Parser 'Fallible - -instance Functor (Parser fail) where - fmap f (Parser g) = Parser (\ctx ps kok kfat kbt -> - g ctx ps (\ps' errs x -> kok ps' errs (f x)) kfat kbt) - -instance Applicative (Parser fail) where - pure x = Parser (\_ ps kok _ _ -> kok ps [] x) - (<*>) = ap - -instance Monad (Parser fail) where - Parser g >>= f = Parser $ \ctx ps kok kfat kbt -> - g ctx ps - (\ps1 errs x -> - x `seq` - runParser (f x) ctx ps1 - (\ps2 errs' y -> kok ps2 (errs <> errs') y) - (\errs' -> kfat (errs <> errs')) - kbt) - (\errs -> kfat errs) - kbt - -instance FAlternative Parser where - faempty = Parser (\_ _ _ _ kbt -> kbt) - Parser f <|>> Parser g = Parser $ \ctx ps kok kfat kbt -> - f ctx ps kok kfat (g ctx ps kok kfat kbt) - - noFail (Parser f) = Parser $ \ctx ps kok kfat _ -> f ctx ps kok kfat () - - toFallible :: forall fail a. KnownFallible fail => Parser fail a -> Parser 'Fallible a - toFallible (Parser f) = Parser $ \ctx ps kok kfat kbt -> - f ctx ps kok kfat (case knownFallible @fail of - SFallible -> kbt - SInfallible -> ()) - -instance MonadState PS (Parser fail) where - state f = Parser $ \_ ps kok _ _ -> - let (x, ps') = f ps - in kok ps' [] x - -instance MonadReader Context (Parser fail) where - reader f = Parser $ \ctx ps kok _ _ -> kok ps [] (f ctx) - local f (Parser g) = Parser (\ctx -> g (f ctx)) - -instance KnownFallible fail => MonadChronicle [ErrMsg] (Parser fail) where - dictate errs = Parser $ \_ ps kok _ _ -> kok ps errs () - confess errs = Parser $ \_ _ _ kfat _ -> kfat errs - memento (Parser f) = Parser $ \ctx ps kok _ kbt -> - f ctx ps - (\ps' errs x -> kok ps' errs (Right x)) - (\errs -> kok ps [] (Left errs)) - kbt - absolve def (toFallible -> Parser f) = Parser $ \ctx ps kok _ _ -> - f ctx ps - kok - (\_ -> kok ps [] def) - (kok ps [] def) - condemn (Parser f) = Parser $ \ctx ps kok kfat kbt -> - f ctx ps - (\ps' errs x -> case errs of - [] -> kok ps' [] x - _ -> kfat errs) - kfat - kbt - retcon g (Parser f) = Parser $ \ctx ps kok kfat kbt -> - f ctx ps - (\ps' errs x -> kok ps' (g errs) x) - (\errs -> kfat (g errs)) - kbt - chronicle th = case th of - This errs -> Parser (\_ _ _ kfat _ -> kfat errs) - That res -> Parser (\_ ps kok _ _ -> kok ps [] res) - These errs res -> Parser (\_ ps kok _ _ -> kok ps errs res) - --- Positions are zero-based in both dimensions -data ErrMsg = ErrMsg - { errFile :: FilePath - , errStk :: [String] - , errPos :: Pos - , errMsg :: String - , errSourceLine :: String } - deriving (Show) - -printErrMsg :: ErrMsg -> String -printErrMsg (ErrMsg fp stk (Pos y x) s srcline) = - let linenum = show (y + 1) - in intercalate "\n" $ - map (\descr -> "In " ++ descr ++ ":") (reverse stk) - ++ [fp ++ ":" ++ show (y + 1) ++ ":" ++ show (x + 1) ++ ": " ++ s - ,map (\_ -> ' ') linenum ++ " |" - ,linenum ++ " | " ++ srcline - ,map (\_ -> ' ') linenum ++ " | " ++ replicate x ' ' ++ "^"] - -parse :: FilePath -> String -> These [ErrMsg] (Program ()) -parse fp source = - runParser pProgram (Context fp (lines source) []) (PS (Pos 0 0) (Pos 0 0) source) - (\_ errs res -> case errs of - [] -> That res - _ -> These errs res) - (\errs -> This errs) - () -- (This [ErrMsg fp [] (Pos 0 0) "Parse error, no grammar alternatives match your source"]) - -pProgram :: IParser (Program ()) -pProgram = do - defs <- pTopDefs - let (datadefs, fundefs) = partitionEithers defs - skipWhiteComment - assertEOF Error - return (Program datadefs fundefs) - -pTopDefs :: IParser [Either DataDef (FunDef ())] -pTopDefs = do - faoptional pTopDef >>= \case - Nothing -> do - skipWhiteComment - faoptional eof >>= \case - Nothing -> do - raise Error "Unparseable content" - readWhileInline (const True) - pTopDefs -- will skip the possible newline - Just () -> return [] - Just defs -> do - defs2 <- pTopDefs - return (defs ++ defs2) - -pTopDef :: FParser [Either DataDef (FunDef ())] -pTopDef = do - noFail skipWhiteComment - noFail isAtBlockLeft >>= \case - True -> map Left <$> pDataDef0 <|>> map Right <$> pFunDef0 - False -> do - raise Error "Skipping unparseable content" - noFail $ readWhileInline (const True) - pTopDef - -pDataDef0 :: FParser [DataDef] -pDataDef0 = do - pKeyword "data" - noFail $ do - inlineWhite - faoptional (pIdentifier0 InBlock Uppercase WCAssume) >>= \case - Nothing -> do - raise Error "Expected data declaration after 'data'" - return [] - Just name -> do - params <- famany (inlineWhite >> pIdentifier0 InBlock Lowercase WCBacktrack) - cons <- pDatacons "=" - return [DataDef name params cons] - where - pDatacons :: String -> IParser [(Name, [Type])] - pDatacons leader = do - inlineWhite - facatch (return []) $ do - pKeySym leader - inlineWhite - name <- pIdentifier0 InBlock Uppercase WCAssume - fields <- noFail $ famany pTypeAtom - rest <- noFail $ pDatacons "|" - return ((name, fields) : rest) - -data FunEqContext - = FirstLine - | TypeSig Name - | Continue Name - deriving (Show) - -pFunDef0 :: FParser [FunDef ()] -pFunDef0 = - faasum' - [do (name, typ) <- pStandaloneTypesig0 - noFail $ do - faoptional (pFunEq (TypeSig name)) >>= \case - Nothing -> do - raise Error $ "Expected function equation for " ++ pretty name ++ - " after type signature" - return [] - Just [] -> return [FunDef name (Just typ) (FunEq name [] (Plain (ETup () [])) :| [])] - Just (clause1 : clauses1) -> do - clauses <- concat <$> famany (pFunEq (Continue name)) - return [FunDef name (Just typ) (clause1 :| clauses1 ++ clauses)] - ,do pFunEq FirstLine >>= \case - clause1@(FunEq name _ _) : clauses1 -> noFail $ do - clauses <- concat <$> famany (pFunEq (Continue name)) - return [FunDef name Nothing (clause1 :| clauses1 ++ clauses)] - [] -> faempty] - --- | Given the name from the type signature or a previous clause, if any. -pFunEq :: FunEqContext -> FParser [FunEq ()] -pFunEq fectx = do - noFail skipWhiteComment - faguardM isAtBlockLeft - - pushLocatedContext "function equation" $ do - name <- pIdentifier0 AtLeft Lowercase WCAssume - - -- We want to do various checks with what came before, and there are - -- multiple branches where we decide to continue parsing this equation. To - -- avoid code duplication or an early exit monad, we use a boolean here. - success <- case fectx of - FirstLine -> return True - TypeSig checkName - | name == checkName -> return True - | otherwise -> noFail $ do - raise Error $ "Name of function clause does not correspond with type signature: " ++ - pretty checkName - return False - Continue checkName -> do - faguard (name == checkName) -- this can still backtrack out of pFunEq - return True - - noFail $ if success - then do - pats <- famany (pPattern 11) - rhs <- pRHS "=" - return [FunEq name pats rhs] - else return [] - --- | Pass "=" for function definitions and "->" for case clauses. -pRHS :: String -> IParser (RHS ()) -pRHS sepsym = do - -- TODO: parse guards - inlineWhite - pKeySym sepsym <|>> raise Error ("Expected " ++ show sepsym) - expr <- pExpr <|>> (raise Error "Expected expression" >> return (ETup () [])) - return (Plain expr) - -pPattern :: Int -> FParser (Pattern ()) -pPattern d = inlineWhite >> pPattern0 d - -pPattern0 :: Int -> FParser (Pattern ()) -pPattern0 d = do - p0 <- pPatExprAtom0 (max 10 d) - climbRight pPattern (pInfixOp Uppercase) (POp ()) d p0 Nothing - -pExpr :: FParser (Expr ()) -pExpr = do - inlineWhite - -- basics: lit, list, var, con, tup - -- expression atom: application of basics - -- expression parser: op - -- around: let, case, if - pushLocatedContext "expression" $ do - faasum' [pELet0 - ,pECase0 - ,pEIf0 - ,pExprOpExpr0 0] - -pPatExprAtom0 :: Int -> FParser (Pattern ()) -pPatExprAtom0 d = - faasum' [pPatWildcard0 - ,pPatVarOrAs0 - ,pPatCon0 - ,pPatList0 - ,pPatParens0] - where - pPatWildcard0 = pKeySym "_" >> return (PWildcard ()) - pPatVarOrAs0 = do - var <- pIdentifier0 InBlock Lowercase WCBacktrack - facatch (return (PVar () var)) $ do - inlineWhite - pKeySym "@" - noFail $ do - p <- pPattern 11 <|>> (raise Error "Expected pattern after '@'" >> return (PWildcard ())) - return (PAs () var p) - pPatCon0 = do - con <- pIdentifier0 InBlock Uppercase WCBacktrack - noFail $ if d > 10 - then return (PCon () con []) - else do args <- famany (pPattern 11) - return (PCon () con args) - pPatList0 = do - char '[' -- special syntax, no need for pKeySym - noFail $ do - ps <- pPattern 0 `sepBy` (inlineWhite >> char ',') - inlineWhite - char ']' <|>> raise Error "Expected ']'" - return (PList () ps) - pPatParens0 = do - char '(' - inlineWhite - faasum' - [do char ')' - return (PTup () []) - ,do p <- pPattern0 0 - inlineWhite - faasum' - [do char ')' - return p - ,do char ',' - ps <- pPattern 0 `sepBy1` (inlineWhite >> char ',') - return (PTup () (p : ps))]] - -pELet0 :: FParser (Expr ()) -pELet0 = do - pKeyword "let" - noFail $ do - inlineWhite - defss <- startLayoutBlock $ do - -- The first occurrence is also going to skip whitespace in front, - -- which is redundant -- but not harmful. - famany $ do - noFail skipWhiteComment - -- Note: now not necessarily in the indented block. Which is - -- precisely what we need here. If we see "in", let the 'many' - -- choice fail so that the defs loop ends. But let it fail outside - -- this asum so that it is the many that picks it up, not this - -- asum. - res <- faasum' [Nothing <$ pKeyword "in" -- note: will be dropped due to the empty backtrack - ,Just <$> pFunDef0] - case res of - Nothing -> faempty - Just defs -> return defs - - let defs = concat defss - inlineWhite - facatch (do raise Error "Expected 'in' after 'let'" - return (ELet () defs (ETup () []))) $ do - pKeyword "in" - noFail $ do - inlineWhite - body <- pExpr <|>> (raise Error "Expected expression" >> return (ETup () [])) - return (ELet () defs body) - -pECase0 :: FParser (Expr ()) -pECase0 = do - pKeyword "case" - noFail $ do - e <- pExpr <|>> (raise Error "Expected expression" >> return (ETup () [])) - inlineWhite - facatch (raise Error "Expected 'of' after 'case'" >> return (ECase () e [])) $ do - pKeyword "of" - noFail $ do - inlineWhite - startLayoutBlock $ do - -- The first clause is going to skip whitespace, but that's harmless - -- (though redundant). - let pClause = do - noFail $ skipWhiteComment - whenM (noFail $ not <$> isInsideBlock) (() <$ faempty) - pat <- pPattern0 0 - noFail $ do - rhs <- pRHS "->" - return (pat, rhs) - clauses <- famany pClause - return (ECase () e clauses) - -pEIf0 :: FParser (Expr ()) -pEIf0 = do - pKeyword "if" - noFail $ do - e1 <- pExpr <|>> (raise Error "Expected expression" >> return (ECon () (Name "True"))) - inlineWhite - facatch (raise Error "Expected 'then' after 'if'" >> return (EIf () e1 (ETup () []) (ETup () []))) $ do - pKeyword "then" - noFail $ do - e2 <- pExpr <|>> (raise Error "Expected expression" >> return (ETup () [])) - inlineWhite - facatch (raise Error "Expected else after 'if'" >> return (EIf () e1 (ETup () []) (ETup () []))) $ do - pKeyword "else" - noFail $ do - e3 <- pExpr <|>> (raise Error "Expected expression" >> return (ETup () [])) - return (EIf () e1 e2 e3) - -pExprOpExpr :: Int -> FParser (Expr ()) -pExprOpExpr d = inlineWhite >> pExprOpExpr0 d - -pExprOpExpr0 :: Int -> FParser (Expr ()) -pExprOpExpr0 d = do - e0 <- pEApp0 - climbRight pExprOpExpr (snd <$> pInfixOp Don'tCare) (EOp ()) d e0 Nothing - -climbRight - :: (Int -> FParser e) -- ^ Parse an expression at the given precedence level - -> FParser ParsedOperator -- ^ Parse an operator - -> (e -> Operator -> e -> e) -- ^ Build an operator application experssion - -> Int -- ^ Ambient precedence level: minimum precedence of top-level operator in result - -> e -- ^ lhs: Initial non-operator expression already parsed - -> Maybe ParsedOperator -- ^ Top-level operator in lhs (initialise with Nothing) - -> FParser e -climbRight pExpr' pOper makeOp d lhs mlhsop = - facatch (return lhs) $ do - paop@(PaOp op d2 a2) <- pOper - faguard (d2 >= d) -- respect global minimum precedence - case mlhsop of -- check operator compatibility - Just (PaOp _ d1 a1) -> - faguard (d1 > d2 || (d1 == d2 && a1 == a2 && a1 /= AssocNone)) - Nothing -> - return () - let oprhsd = case a2 of AssocRight -> d2 ; _ -> d2 + 1 - rhs <- pExpr' oprhsd - climbRight pExpr' pOper makeOp d (makeOp lhs op rhs) (Just paop) - -pEApp0 :: FParser (Expr ()) -pEApp0 = do - e1 <- pEAtom0 - es <- noFail $ famany (inlineWhite >> pEAtom0) - case es of - [] -> return e1 - _ -> return (EApp () e1 es) - -pEAtom0 :: FParser (Expr ()) -pEAtom0 = faasum' - [ELit () <$> pLiteral0 - ,pEList0 - ,pEVarOrCon0 - ,pEParens0] - -pLiteral0 :: FParser Literal -pLiteral0 = faasum' - [do as <- toList <$> fasome (satisfy isDigit) - let a = read as :: Integer - facatch (return (LInt a)) $ do - char '.' - bs <- toList <$> fasome (satisfy isDigit) - let b = read bs :: Integer - cs <- noFail $ faoptional $ do - char 'e' - cs <- toList <$> fasome (satisfy isDigit) - return cs - let c = maybe 0 read cs :: Integer - return (LFloat ((fromIntegral a + fromIntegral b / 10 ^ length bs) * 10 ^ c)) - ,do char '\'' - facatch (raise Error "Unclosed character literal" >> return (LChar '?')) $ do - cs <- noFail $ famany pStringChar - char '\'' - noFail $ do - c <- case cs of - [c] -> return c - _ -> raise Error "Character literal must contain one character" >> return '?' - return (LChar c) - ,do char '"' - noFail $ do - s <- famany pStringChar - char '"' <|>> raise Error "Unclosed string literal" - return (LString s)] - -pStringChar :: FParser Char -pStringChar = faasum' - [do char '\\' - char 'x' - let hexdig = do - c <- satisfy $ \c' -> - let c = toLower c' - in 'a' <= c && c <= 'f' || '0' <= c && c <= '9' - return $ if 'a' <= c then 10 + ord c - ord 'a' - else ord c - ord '0' - digs <- toList <$> fasome hexdig - return (chr (sum (zipWith (*) (reverse digs) (iterate (*16) 1)))) - ,do char '\\' - satisfy (const True) >>= \case - 'n' -> return '\n' - 'r' -> return '\r' - 't' -> return '\t' - 'a' -> return '\a' - 'b' -> return '\b' - '\'' -> return '\'' - '\"' -> return '\"' - '0' -> return '\0' - c -> do raise Error $ "Invalid escape sequence: \\" ++ [c] - return '?' - ,do satisfy (\c -> c `notElem` "\n\r\\\'")] - -pEList0 :: FParser (Expr ()) -pEList0 = do - char '[' -- special syntax, no need for pKeySym - noFail $ do - es <- sepBy pExpr (inlineWhite >> char ',') - inlineWhite - char ']' <|>> raise Error "Expected closing ']'" - return (EList () es) - -pEVarOrCon0 :: FParser (Expr ()) -pEVarOrCon0 = - pIdentifier0 InBlock Don'tCare () >>= \case - (Lowercase, name) -> return (EVar () name) - (Uppercase, name) -> return (ECon () name) - -pEParens0 :: FParser (Expr ()) -pEParens0 = do - char '(' - noFail $ do - e <- pExpr <|>> (raise Error "Expected expression" >> return (ETup () [])) - inlineWhite - char ')' <|>> raise Error "Expected closing ')'" - return e - -data Associativity = AssocLeft | AssocRight | AssocNone - deriving (Show, Eq) - -data ParsedOperator = PaOp Operator Int Associativity - deriving (Show) - -pInfixOp :: Case care -> FParser (WithCaseOutput care ParsedOperator) -pInfixOp cs = do - inlineWhite - case cs of - Lowercase -> pLowerInfixOp0 - Uppercase -> pUpperInfixOp0 - Don'tCare -> faasum' [(Lowercase,) <$> pLowerInfixOp0 - ,(Uppercase,) <$> pUpperInfixOp0] - -pLowerInfixOp0 :: FParser ParsedOperator -pLowerInfixOp0 = - faasum' [PaOp OEqu 4 AssocNone <$ pKeySym "==" - ,PaOp OAdd 6 AssocLeft <$ pKeySym "+" - ,PaOp OSub 6 AssocLeft <$ pKeySym "-" - ,PaOp OMul 7 AssocLeft <$ pKeySym "*" - ,PaOp ODiv 7 AssocLeft <$ pKeySym "/" - ,PaOp OMod 7 AssocLeft <$ pKeySym "%" - ,PaOp OPow 8 AssocRight <$ pKeySym "^" - ] - -pUpperInfixOp0 :: FParser ParsedOperator -pUpperInfixOp0 = - faasum' [PaOp OCons 5 AssocRight <$ pKeySym ":"] - -pStandaloneTypesig0 :: FParser (Name, Type) -pStandaloneTypesig0 = do - name <- pIdentifier0 AtLeft Lowercase WCBacktrack - inlineWhite - pKeySym "::" - noFail $ pushContext ("type signature for " ++ pretty name) $ do - ty <- pType <|>> (raise Error "Expected type" >> return (TTup [])) - return (name, ty) - -pType :: FParser Type -pType = do - ty1 <- pTypeApp - facatch (return ty1) $ do - inlineWhite - pKeySym "->" - noFail $ do - ty2 <- pType <|>> (raise Error "Expected type" >> return (TTup [])) - return (TFun ty1 ty2) - -pTypeApp :: FParser Type -pTypeApp = fasome pTypeAtom >>= \case - t :| [] -> return t - t :| ts -> return (TApp t ts) - -pTypeAtom :: FParser Type -pTypeAtom = faasum' [pTypeParens, pTypeList, pTypeName] - where - pTypeParens = do - inlineWhite - char '(' - faasum' - [do inlineWhite - char ')' - return (TTup []) - ,do ty1 <- pType - noFail $ do - ty2s <- famany $ do - inlineWhite - char ',' - noFail $ pType <|>> (raise Error "Expected type" >> return (TTup [])) - inlineWhite - char ')' <|>> raise Error "Expected closing ')'" - case ty2s of - [] -> return ty1 - _ -> return (TTup (ty1 : ty2s))] - - pTypeList = do - inlineWhite - char '[' - ty <- pType - noFail $ char ']' <|>> raise Error "Expecte closing ']'" - return (TList ty) - - pTypeName = do - inlineWhite - (cs, name) <- pIdentifier0 InBlock Don'tCare () - case cs of - Uppercase -> return (TCon name) - Lowercase -> return (TVar name) - --- | Parse the given name-like keyword, ensuring that it is the entire word. -pKeyword :: String -> FParser () -pKeyword s = do - string s - -- traceM $ "pKeyword: parsed " ++ show s - notFollowedBy (() <$ satisfy isNameContChar) - --- | Parse the given symbol-like keyword, ensuring that it is the entire symbol. -pKeySym :: String -> FParser () -pKeySym s = do - string s - notFollowedBy (() <$ satisfy isSymbolChar) - -data Case care where - Uppercase :: Case 'True - Lowercase :: Case 'True - Don'tCare :: Case 'False -deriving instance Show (Case care) - -type family WithCaseOutput care a where - WithCaseOutput 'True a = a - WithCaseOutput 'False a = (Case 'True, a) - -type family If c a b where - If 'True a b = a - If 'False a b = b - -data WrongCaseBacktrack - = WCBacktrack -- ^ If a word was found but it had the wrong case, fail and backtrack. - | WCAssume -- ^ Be certain that this case is expected here, and assume incorrect - -- case is a typo. - deriving (Show) - --- | Consumes an identifier (word or parenthesised operator) at the current --- position. The `var` production in Haskell2010. --- var -> varid | "(" varsym ")" -pIdentifier0 :: BlockPos -> Case care -> If care WrongCaseBacktrack () -> FParser (WithCaseOutput care Name) -pIdentifier0 bpos cs wrongcase = - pAlphaName0 bpos cs wrongcase <|>> pParens0 (pSymbol0 bpos cs) - where - -- | Parser between parens, with the opening paren at the current position. - pParens0 :: FParser a -> FParser a - pParens0 p = do - char '(' - inlineWhite - res <- p - inlineWhite - char ')' - return res - --- | Consumes a word-like name at the current position with the given case. The --- `varid` production in Haskell2010 for 'Lowercase', `conid' for 'Uppercase'. --- --- > varid -> (small {small | large | digit | "'"}) without reservedid -pAlphaName0 :: BlockPos -> Case care -> If care WrongCaseBacktrack () -> FParser (WithCaseOutput care Name) -pAlphaName0 bpos cs wrongcase = do - startPos <- gets psCur - (_, s) <- readToken - bpos - (\atfst mc -> case (atfst, mc) of - (True , Just c) | isNameHeadChar c -> Just (Right False) - (True , _ ) -> Nothing - (False, Just c) | isNameContChar c -> Just (Right False) - (False, _ ) -> Just (Left ())) - True - faguard (s `notElem` ["case", "class", "data", "default", "deriving", "do", "else" - ,"foreign", "if", "import", "in", "infix", "infixl" - ,"infixr", "instance", "let", "module", "newtype", "of" - ,"then", "type", "where", "_"]) - (name, adjoin) <- case cs of - Uppercase - | isLower (head s) -> case wrongcase of - WCBacktrack -> faempty - WCAssume -> noFail $ do - raiseAt startPos Error "Unexpected uppercase word at this position, assuming typo" - return (toUpper (head s) : tail s, id) - | otherwise -> return (s, id) - Lowercase - | isUpper (head s) -> case wrongcase of - WCBacktrack -> faempty - WCAssume -> noFail $ do - raiseAt startPos Error "Unexpected lowercase word at this position, assuming typo" - return (toLower (head s) : tail s, id) - | otherwise -> return (s, id) - Don'tCare - | isLower (head s) -> return (s, (Lowercase,)) - | otherwise -> return (s, (Uppercase,)) - return (adjoin (Name name)) - -isNameHeadChar :: Char -> Bool -isNameHeadChar c = isLetter c || c == '_' - -isNameContChar :: Char -> Bool -isNameContChar c = isNameHeadChar c || isDigit c || c == '\'' - --- | Consumes a symbol at the current position. The `varsym` production in --- Haskell2010 for 'Lowercase', `consym` otherwise, and either if 'Don'tCare'. --- --- > varsym -> ((symbol without ":") {symbol}) without (reservedop | dashes) --- > consym -> (":" {symbol}) without reservedop --- > symbol -> ascSymbol | uniSymbol without (special | "_" | "\"" | "'") --- > ascSymbol -> ```!#$%&⋆+./<=>?@^|-~:``` --- > uniSymbol -> unicode symbol or punctuation --- > dashes -> "--" {"-"} --- > special -> ```(),;[]`{}``` --- > reservedop -> ".." | ":" | "::" | "=" | "\" | "|" | "<-" | "->" | "@" | "~" | "=>" -pSymbol0 :: BlockPos -> Case care -> FParser (WithCaseOutput care Name) -pSymbol0 bpos cs = do - case bpos of - AtLeft -> whenM (noFail $ not <$> isAtBlockLeft) (() <$ faempty) - InBlock -> whenM (noFail $ not <$> isInsideBlock) faempty - (c1, adjoin) <- - case cs of Lowercase -> (,id) <$> satisfy (\c -> isSymbolChar c && c /= ':') - Uppercase -> (,id) <$> satisfy (== ':') - Don'tCare -> do c1 <- satisfy (\c -> isSymbolChar c) - return (c1, if c1 == ':' then (Uppercase,) else (Lowercase,)) - crest <- noFail $ famany (satisfy isSymbolChar) - let name = c1 : crest - faguard (name `notElem` ["..", ":", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]) - faguard (take 2 name /= "--") - return (adjoin (Name name)) - -isSymbolChar :: Char -> Bool -isSymbolChar c = (isAscSymbol || isUniSymbol) && not isSpecialExt - where - isSpecialExt = c `elem` "(),;[]`{}_\"'" - isAscSymbol = c `elem` "!#$%&⋆+./<=>?@^|-~:" - isUniSymbol = ord c > 127 && (isSymbol c || isPunctuation c) - - -sepBy1 :: FParser a -> FParser sep -> FParser [a] -sepBy1 p psep = do - x1 <- p - (psep >> (x1 :) <$> sepBy1 p psep) <|>> pure [x1] - -sepBy :: FParser a -> FParser sep -> IParser [a] -sepBy p psep = sepBy1 p psep <|>> pure [] - --- | Start a new layout block at the current position. The old layout block is --- restored after completion of this subparser. -startLayoutBlock :: IParser a -> IParser a -startLayoutBlock p = do - ps0 <- get - put (ps0 { psBlk = psCur ps0 }) - res <- p - modify (\ps -> ps { psBlk = psBlk ps0 }) - return res - -data Fatality fatal where - Error :: Fatality 'False - Fatal :: Fatality 'True -deriving instance Show (Fatality fatal) - -type family FatalCtx fatal a where - FatalCtx 'False a = a ~ () - FatalCtx 'True a = () - -raise_ :: KnownFallible fail => Fatality fatal -> String -> Parser fail () -raise_ Error = raise Error -raise_ Fatal = raise Fatal - -raise :: (KnownFallible fail, FatalCtx fatal a) => Fatality fatal -> String -> Parser fail a -raise fat msg = gets psCur >>= \pos -> raiseAt pos fat msg - --- | Raise an error with the given fatality and description. -raiseAt :: (KnownFallible fail, FatalCtx fatal a) => Pos -> Fatality fatal -> String -> Parser fail a -raiseAt pos fat msg = do - Context { ctxFile = fp , ctxStack = stk, ctxLines = srcLines } <- ask - let err = ErrMsg fp stk pos msg (srcLines !! posLine pos) - case fat of - Error -> dictate (pure err) - Fatal -> confess (pure err) - -describeLocation :: IParser String -describeLocation = do - fp <- asks ctxFile - cur <- gets psCur - return $ fp ++ ":" ++ show (posLine cur + 1) ++ ":" ++ show (posCol cur + 1) - --- | Registers a scope description on the stack for error reporting. -pushContext :: String -> Parser fail a -> Parser fail a -pushContext descr = local (\c -> c { ctxStack = descr : ctxStack c }) - --- | Registers a scope description on the stack for error reporting, suffixed --- with the current parsing location. -pushLocatedContext :: String -> Parser fail a -> Parser fail a -pushLocatedContext descr p = do - loc <- noFail describeLocation - pushContext (descr ++ " at " ++ loc) p - -data BlockPos = AtLeft | InBlock - deriving (Show) - --- | Consumes a token at the current position, asserting that we are --- in the position indicated by the 'BlockPos' argument. The token is defined --- by a pure stateful parser. If encountering a newline or EOF, the parser is --- run on this character ('Nothing' for EOF); if this produces a result, the --- result is returned; otherwise, the parser fails. The newline is not consumed. -readToken :: BlockPos -> (s -> Maybe Char -> Maybe (Either r s)) -> s -> FParser (r, String) -readToken bpos f s0 = do - case bpos of - AtLeft -> whenM (noFail $ not <$> isAtBlockLeft) (() <$ faempty) - InBlock -> whenM (noFail $ not <$> isInsideBlock) faempty - let loop :: (s -> Maybe Char -> Maybe (Either r s)) -> s -> FParser (r, String) - loop f' st = do - ps <- get - case psRest ps of - [] | Just (Left res) <- f' st Nothing -> return (res, "") - | otherwise -> faempty - '\n' : _ | Just (Left res) <- f' st (Just '\n') -> return (res, "") - c : cs -> case f' st (Just c) of - Nothing -> faempty - Just (Left res) -> return (res, "") - Just (Right st') -> do - let Pos line col = psCur ps - put (ps { psCur = Pos line (col + 1), psRest = cs }) - fmap (c :) <$> loop f' st' - loop f s0 - --- | Consumes all whitespace and comments (including newlines), but only if --- this then leaves the parser inside the current block. If not, succeeds and --- consumes nothing. -inlineWhite :: Parser fail () -inlineWhite = do - ps <- get - noFail skipWhiteComment - whenM (noFail $ not <$> isInsideBlock) $ put ps - --- | Consumes all whitespace and comments (including newlines). Note: this may --- end outside the current block. -skipWhiteComment :: IParser () -skipWhiteComment = do - inlineSpaces - _ <- famany (blockComment >> noFail inlineSpaces) - optional_ lineComment - optional_ (consumeNewline >> noFail skipWhiteComment) - where - -- | Consumes some inline whitespace. Stops before newlines. - inlineSpaces :: IParser () - inlineSpaces = readWhileInline isSpace - --- | Consumes an delimited comment including both end markers. Note: this may --- end outside the current block. -blockComment :: FParser () -blockComment = do - string "{-" -- no need for pKeySym here - let loop = do - faasum [string "-}" - ,eof >> raise Error "Unfinished {- -} comment at end of file" - ,blockComment >> noFail loop - ,consumeNewline >> noFail loop] - (readWhileInline (`notElem` "{-")) -- "-}" also starts with '-' - noFail loop - --- | Consumes a line comment marker and the rest of the line, excluding --- newline. -lineComment :: FParser () -lineComment = do - -- '--!' is an operator, so we need to parse a whole symbol here. - pKeySym "--" - noFail $ readWhileInline (const True) - --- | Raises an error if we're not currently at EOF. -assertEOF :: Fatality fatal -> IParser () -assertEOF fat = gets psRest >>= \case - [] -> return () - _ -> raise_ fat "Unexpected stuff" - --- | Returns whether the current position is _within_ the current block, for --- soft-wrapping content. This means that col > blkCol. -isInsideBlock :: IParser Bool -isInsideBlock = do - PS { psCur = cur, psBlk = blk } <- get - return $ posLine cur >= posLine blk && posCol cur > posCol blk - --- | Returns whether the current position is at the left border of the block; --- this is for list content such as function definitions or let bindings. This --- means that col == blkCol. -isAtBlockLeft :: IParser Bool -isAtBlockLeft = do - PS { psCur = cur, psBlk = blk } <- get - return $ posLine cur >= posLine blk && posCol cur == posCol blk - --- | Consumes characters while the predicate holds or until (and excluding) --- a newline, whichever comes first. -readWhileInline :: (Char -> Bool) -> IParser () -readWhileInline p = do - (taken, rest) <- span (\c -> p c && c /= '\n') <$> gets psRest - modify (\ps -> ps { psCur = let Pos line col = psCur ps - in Pos line (col + length taken) - , psRest = rest }) - --- | Consumes exactly one newline at the current position. -consumeNewline :: FParser () -consumeNewline = gets psRest >>= \case - '\n' : rest -> modify (\ps -> ps { psCur = Pos (posLine (psCur ps) + 1) 0 - , psRest = rest }) - _ -> faempty - --- | Consumes exactly one character, unequal to newline, at the current position. -satisfy :: (Char -> Bool) -> FParser Char -satisfy p = do - -- traceM "entering satisfy" - r <- gets psRest - -- traceM "got rest" - r `seq` return () - -- traceM "seqd rest" - -- traceM ("rest is " ++ show r) - case r of - c : rest | c /= '\n', p c -> do - modify (\ps -> let Pos line col = psCur ps - in ps { psCur = Pos line (col + 1) - , psRest = rest }) - return c - _ -> faempty - --- | Consumes exactly this character at the current position. Must not be a --- newline. -char :: Char -> FParser () -char c = string [c] - --- | Consumes exactly this string at the current position. The string must not --- contain a newline. -string :: String -> FParser () -string s | any (== '\n') s = error "Newline in 'string' argument" -string s = do - ps <- get - let Pos line col = psCur ps - if take (length s) (psRest ps) == s - then put (ps { psCur = Pos line (col + length s) - , psRest = drop (length s) (psRest ps) }) - else faempty - --- lookAhead :: FParser a -> FParser a --- lookAhead p = do --- ps <- get --- success <- (Just <$> p) <|>> pure Nothing --- put ps -- restore state, as if nothing happened --- case success of --- Nothing -> faempty --- Just x -> return x - -notFollowedBy :: FParser () -> FParser () -notFollowedBy p = do - ps <- get - success <- (False <$ p) <|>> pure True - put ps -- restore state, as if nothing happened - when (not success) faempty - --- | Only succeeds at EOF. -eof :: FParser () -eof = gets psRest >>= \case [] -> return () - _ -> faempty - -whenM :: (Monad m, Monoid a) => m Bool -> m a -> m a -whenM mb mx = mb >>= \b -> if b then mx else return mempty - -optional_ :: FAlternative f => f 'Fallible a -> f 'Infallible () -optional_ a = (() <$ a) <|>> pure () |