diff options
Diffstat (limited to 'src/HSVIS/Parser.hs')
-rw-r--r-- | src/HSVIS/Parser.hs | 1018 |
1 files changed, 1018 insertions, 0 deletions
diff --git a/src/HSVIS/Parser.hs b/src/HSVIS/Parser.hs new file mode 100644 index 0000000..23ce28e --- /dev/null +++ b/src/HSVIS/Parser.hs @@ -0,0 +1,1018 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE StandaloneDeriving #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE TypeOperators #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE InstanceSigs #-} +-- I don't want a warning for 'head' and 'tail' in this file. But I also don't +-- want GHCs before 9.8 to complain that they don't know the x-partial warning. +{-# OPTIONS_GHC -Wno-unrecognised-warning-flags -Wno-x-partial #-} +module HSVIS.Parser ( + parse, + Pos(..), + ErrMsg(..), + printErrMsg, + -- * Re-exports + These(..), +) where + +-- import Control.Applicative +import Control.Monad +import Control.Monad.Chronicle +import Control.Monad.Reader +import Control.Monad.State.Lazy +import Data.Char +import Data.Either (partitionEithers) +import Data.Foldable +import Data.List (intercalate) +import Data.List.NonEmpty (NonEmpty(..)) +import Data.These + +-- import Debug.Trace + +import Control.FAlternative +import HSVIS.AST +import HSVIS.Pretty + + +data Pos = Pos + { posLine :: Int -- ^ zero-based + , posCol :: Int -- ^ zero-based + } + deriving (Show) + +-- Positions are zero-based in both dimensions. +-- See 'isInsideBlock' and 'isAtBlockLeft' for the two relevant "inside the +-- block" conditions. +data PS = PS + { psBlk :: Pos -- ^ Start of current layout block + , psCur :: Pos -- ^ Current parsing position + , psRest :: String -- ^ Rest of the input + } + deriving (Show) + +data Context = Context + { ctxFile :: FilePath + , ctxLines :: [String] -- ^ The file contents, split up in lines + , ctxStack :: [String] -- ^ Stack of syntax scopes, for error reporting + } + deriving (Show) + +type family BacktrackPath fail r where + BacktrackPath 'Fallible r = r + BacktrackPath 'Infallible r = () + +newtype Parser fail a = Parser + { runParser + :: forall r. + Context + -> PS + -> (PS -> [ErrMsg] -> a -> r) -- ^ OK: some diagnostics, but parsing succeeded + -> ([ErrMsg] -> r) -- ^ Fatal: error that prevented parsing from proceeding + -> BacktrackPath fail r -- ^ Backtrack: alternative was exhausted without success + -> r } + +type IParser = Parser 'Infallible +type FParser = Parser 'Fallible + +instance Functor (Parser fail) where + fmap f (Parser g) = Parser (\ctx ps kok kfat kbt -> + g ctx ps (\ps' errs x -> kok ps' errs (f x)) kfat kbt) + +instance Applicative (Parser fail) where + pure x = Parser (\_ ps kok _ _ -> kok ps [] x) + (<*>) = ap + +instance Monad (Parser fail) where + Parser g >>= f = Parser $ \ctx ps kok kfat kbt -> + g ctx ps + (\ps1 errs x -> + x `seq` + runParser (f x) ctx ps1 + (\ps2 errs' y -> kok ps2 (errs <> errs') y) + (\errs' -> kfat (errs <> errs')) + kbt) + (\errs -> kfat errs) + kbt + +instance FAlternative Parser where + faempty = Parser (\_ _ _ _ kbt -> kbt) + Parser f <|>> Parser g = Parser $ \ctx ps kok kfat kbt -> + f ctx ps kok kfat (g ctx ps kok kfat kbt) + + noFail (Parser f) = Parser $ \ctx ps kok kfat _ -> f ctx ps kok kfat () + + toFallible :: forall fail a. KnownFallible fail => Parser fail a -> Parser 'Fallible a + toFallible (Parser f) = Parser $ \ctx ps kok kfat kbt -> + f ctx ps kok kfat (case knownFallible @fail of + SFallible -> kbt + SInfallible -> ()) + +instance MonadState PS (Parser fail) where + state f = Parser $ \_ ps kok _ _ -> + let (x, ps') = f ps + in kok ps' [] x + +instance MonadReader Context (Parser fail) where + reader f = Parser $ \ctx ps kok _ _ -> kok ps [] (f ctx) + local f (Parser g) = Parser (\ctx -> g (f ctx)) + +instance KnownFallible fail => MonadChronicle [ErrMsg] (Parser fail) where + dictate errs = Parser $ \_ ps kok _ _ -> kok ps errs () + confess errs = Parser $ \_ _ _ kfat _ -> kfat errs + memento (Parser f) = Parser $ \ctx ps kok _ kbt -> + f ctx ps + (\ps' errs x -> kok ps' errs (Right x)) + (\errs -> kok ps [] (Left errs)) + kbt + absolve def (toFallible -> Parser f) = Parser $ \ctx ps kok _ _ -> + f ctx ps + kok + (\_ -> kok ps [] def) + (kok ps [] def) + condemn (Parser f) = Parser $ \ctx ps kok kfat kbt -> + f ctx ps + (\ps' errs x -> case errs of + [] -> kok ps' [] x + _ -> kfat errs) + kfat + kbt + retcon g (Parser f) = Parser $ \ctx ps kok kfat kbt -> + f ctx ps + (\ps' errs x -> kok ps' (g errs) x) + (\errs -> kfat (g errs)) + kbt + chronicle th = case th of + This errs -> Parser (\_ _ _ kfat _ -> kfat errs) + That res -> Parser (\_ ps kok _ _ -> kok ps [] res) + These errs res -> Parser (\_ ps kok _ _ -> kok ps errs res) + +-- Positions are zero-based in both dimensions +data ErrMsg = ErrMsg + { errFile :: FilePath + , errStk :: [String] + , errPos :: Pos + , errMsg :: String + , errSourceLine :: String } + deriving (Show) + +printErrMsg :: ErrMsg -> String +printErrMsg (ErrMsg fp stk (Pos y x) s srcline) = + let linenum = show (y + 1) + in intercalate "\n" $ + map (\descr -> "In " ++ descr ++ ":") (reverse stk) + ++ [fp ++ ":" ++ show (y + 1) ++ ":" ++ show (x + 1) ++ ": " ++ s + ,map (\_ -> ' ') linenum ++ " |" + ,linenum ++ " | " ++ srcline + ,map (\_ -> ' ') linenum ++ " | " ++ replicate x ' ' ++ "^"] + +parse :: FilePath -> String -> These [ErrMsg] (Program ()) +parse fp source = + runParser pProgram (Context fp (lines source) []) (PS (Pos 0 0) (Pos 0 0) source) + (\_ errs res -> case errs of + [] -> That res + _ -> These errs res) + (\errs -> This errs) + () -- (This [ErrMsg fp [] (Pos 0 0) "Parse error, no grammar alternatives match your source"]) + +pProgram :: IParser (Program ()) +pProgram = do + defs <- pTopDefs + let (datadefs, fundefs) = partitionEithers defs + skipWhiteComment + assertEOF Error + return (Program datadefs fundefs) + +pTopDefs :: IParser [Either DataDef (FunDef ())] +pTopDefs = do + faoptional pTopDef >>= \case + Nothing -> do + skipWhiteComment + faoptional eof >>= \case + Nothing -> do + raise Error "Unparseable content" + readWhileInline (const True) + pTopDefs -- will skip the possible newline + Just () -> return [] + Just defs -> do + defs2 <- pTopDefs + return (defs ++ defs2) + +pTopDef :: FParser [Either DataDef (FunDef ())] +pTopDef = do + noFail skipWhiteComment + noFail isAtBlockLeft >>= \case + True -> map Left <$> pDataDef0 <|>> map Right <$> pFunDef0 + False -> do + raise Error "Skipping unparseable content" + noFail $ readWhileInline (const True) + pTopDef + +pDataDef0 :: FParser [DataDef] +pDataDef0 = do + pKeyword "data" + noFail $ do + inlineWhite + faoptional (pIdentifier0 InBlock Uppercase WCAssume) >>= \case + Nothing -> do + raise Error "Expected data declaration after 'data'" + return [] + Just name -> do + params <- famany (inlineWhite >> pIdentifier0 InBlock Lowercase WCBacktrack) + cons <- pDatacons "=" + return [DataDef name params cons] + where + pDatacons :: String -> IParser [(Name, [Type])] + pDatacons leader = do + inlineWhite + facatch (return []) $ do + pKeySym leader + inlineWhite + name <- pIdentifier0 InBlock Uppercase WCAssume + fields <- noFail $ famany pTypeAtom + rest <- noFail $ pDatacons "|" + return ((name, fields) : rest) + +data FunEqContext + = FirstLine + | TypeSig Name + | Continue Name + deriving (Show) + +pFunDef0 :: FParser [FunDef ()] +pFunDef0 = + faasum' + [do (name, typ) <- pStandaloneTypesig0 + noFail $ do + faoptional (pFunEq (TypeSig name)) >>= \case + Nothing -> do + raise Error $ "Expected function equation for " ++ pretty name ++ + " after type signature" + return [] + Just [] -> return [FunDef name (Just typ) (FunEq name [] (Plain (ETup () [])) :| [])] + Just (clause1 : clauses1) -> do + clauses <- concat <$> famany (pFunEq (Continue name)) + return [FunDef name (Just typ) (clause1 :| clauses1 ++ clauses)] + ,do pFunEq FirstLine >>= \case + clause1@(FunEq name _ _) : clauses1 -> noFail $ do + clauses <- concat <$> famany (pFunEq (Continue name)) + return [FunDef name Nothing (clause1 :| clauses1 ++ clauses)] + [] -> faempty] + +-- | Given the name from the type signature or a previous clause, if any. +pFunEq :: FunEqContext -> FParser [FunEq ()] +pFunEq fectx = do + noFail skipWhiteComment + faguardM isAtBlockLeft + + pushLocatedContext "function equation" $ do + name <- pIdentifier0 AtLeft Lowercase WCAssume + + -- We want to do various checks with what came before, and there are + -- multiple branches where we decide to continue parsing this equation. To + -- avoid code duplication or an early exit monad, we use a boolean here. + success <- case fectx of + FirstLine -> return True + TypeSig checkName + | name == checkName -> return True + | otherwise -> noFail $ do + raise Error $ "Name of function clause does not correspond with type signature: " ++ + pretty checkName + return False + Continue checkName -> do + faguard (name == checkName) -- this can still backtrack out of pFunEq + return True + + noFail $ if success + then do + pats <- famany (pPattern 11) + rhs <- pRHS "=" + return [FunEq name pats rhs] + else return [] + +-- | Pass "=" for function definitions and "->" for case clauses. +pRHS :: String -> IParser (RHS ()) +pRHS sepsym = do + -- TODO: parse guards + inlineWhite + pKeySym sepsym <|>> raise Error ("Expected " ++ show sepsym) + expr <- pExpr <|>> (raise Error "Expected expression" >> return (ETup () [])) + return (Plain expr) + +pPattern :: Int -> FParser (Pattern ()) +pPattern d = inlineWhite >> pPattern0 d + +pPattern0 :: Int -> FParser (Pattern ()) +pPattern0 d = do + p0 <- pPatExprAtom0 (max 10 d) + climbRight pPattern (pInfixOp Uppercase) (POp ()) d p0 Nothing + +pExpr :: FParser (Expr ()) +pExpr = do + inlineWhite + -- basics: lit, list, var, con, tup + -- expression atom: application of basics + -- expression parser: op + -- around: let, case, if + pushLocatedContext "expression" $ do + faasum' [pELet0 + ,pECase0 + ,pEIf0 + ,pExprOpExpr0 0] + +pPatExprAtom0 :: Int -> FParser (Pattern ()) +pPatExprAtom0 d = + faasum' [pPatWildcard0 + ,pPatVarOrAs0 + ,pPatCon0 + ,pPatList0 + ,pPatParens0] + where + pPatWildcard0 = pKeySym "_" >> return (PWildcard ()) + pPatVarOrAs0 = do + var <- pIdentifier0 InBlock Lowercase WCBacktrack + facatch (return (PVar () var)) $ do + inlineWhite + pKeySym "@" + noFail $ do + p <- pPattern 11 <|>> (raise Error "Expected pattern after '@'" >> return (PWildcard ())) + return (PAs () var p) + pPatCon0 = do + con <- pIdentifier0 InBlock Uppercase WCBacktrack + noFail $ if d > 10 + then return (PCon () con []) + else do args <- famany (pPattern 11) + return (PCon () con args) + pPatList0 = do + char '[' -- special syntax, no need for pKeySym + noFail $ do + ps <- pPattern 0 `sepBy` (inlineWhite >> char ',') + inlineWhite + char ']' <|>> raise Error "Expected ']'" + return (PList () ps) + pPatParens0 = do + char '(' + inlineWhite + faasum' + [do char ')' + return (PTup () []) + ,do p <- pPattern0 0 + inlineWhite + faasum' + [do char ')' + return p + ,do char ',' + ps <- pPattern 0 `sepBy1` (inlineWhite >> char ',') + return (PTup () (p : ps))]] + +pELet0 :: FParser (Expr ()) +pELet0 = do + pKeyword "let" + noFail $ do + inlineWhite + defss <- startLayoutBlock $ do + -- The first occurrence is also going to skip whitespace in front, + -- which is redundant -- but not harmful. + famany $ do + noFail skipWhiteComment + -- Note: now not necessarily in the indented block. Which is + -- precisely what we need here. If we see "in", let the 'many' + -- choice fail so that the defs loop ends. But let it fail outside + -- this asum so that it is the many that picks it up, not this + -- asum. + res <- faasum' [Nothing <$ pKeyword "in" -- note: will be dropped due to the empty backtrack + ,Just <$> pFunDef0] + case res of + Nothing -> faempty + Just defs -> return defs + + let defs = concat defss + inlineWhite + facatch (do raise Error "Expected 'in' after 'let'" + return (ELet () defs (ETup () []))) $ do + pKeyword "in" + noFail $ do + inlineWhite + body <- pExpr <|>> (raise Error "Expected expression" >> return (ETup () [])) + return (ELet () defs body) + +pECase0 :: FParser (Expr ()) +pECase0 = do + pKeyword "case" + noFail $ do + e <- pExpr <|>> (raise Error "Expected expression" >> return (ETup () [])) + inlineWhite + facatch (raise Error "Expected 'of' after 'case'" >> return (ECase () e [])) $ do + pKeyword "of" + noFail $ do + inlineWhite + startLayoutBlock $ do + -- The first clause is going to skip whitespace, but that's harmless + -- (though redundant). + let pClause = do + noFail $ skipWhiteComment + whenM (noFail $ not <$> isInsideBlock) (() <$ faempty) + pat <- pPattern0 0 + noFail $ do + rhs <- pRHS "->" + return (pat, rhs) + clauses <- famany pClause + return (ECase () e clauses) + +pEIf0 :: FParser (Expr ()) +pEIf0 = do + pKeyword "if" + noFail $ do + e1 <- pExpr <|>> (raise Error "Expected expression" >> return (ECon () (Name "True"))) + inlineWhite + facatch (raise Error "Expected 'then' after 'if'" >> return (EIf () e1 (ETup () []) (ETup () []))) $ do + pKeyword "then" + noFail $ do + e2 <- pExpr <|>> (raise Error "Expected expression" >> return (ETup () [])) + inlineWhite + facatch (raise Error "Expected else after 'if'" >> return (EIf () e1 (ETup () []) (ETup () []))) $ do + pKeyword "else" + noFail $ do + e3 <- pExpr <|>> (raise Error "Expected expression" >> return (ETup () [])) + return (EIf () e1 e2 e3) + +pExprOpExpr :: Int -> FParser (Expr ()) +pExprOpExpr d = inlineWhite >> pExprOpExpr0 d + +pExprOpExpr0 :: Int -> FParser (Expr ()) +pExprOpExpr0 d = do + e0 <- pEApp0 + climbRight pExprOpExpr (snd <$> pInfixOp Don'tCare) (EOp ()) d e0 Nothing + +climbRight + :: (Int -> FParser e) -- ^ Parse an expression at the given precedence level + -> FParser ParsedOperator -- ^ Parse an operator + -> (e -> Operator -> e -> e) -- ^ Build an operator application experssion + -> Int -- ^ Ambient precedence level: minimum precedence of top-level operator in result + -> e -- ^ lhs: Initial non-operator expression already parsed + -> Maybe ParsedOperator -- ^ Top-level operator in lhs (initialise with Nothing) + -> FParser e +climbRight pExpr' pOper makeOp d lhs mlhsop = + facatch (return lhs) $ do + paop@(PaOp op d2 a2) <- pOper + faguard (d2 >= d) -- respect global minimum precedence + case mlhsop of -- check operator compatibility + Just (PaOp _ d1 a1) -> + faguard (d1 > d2 || (d1 == d2 && a1 == a2 && a1 /= AssocNone)) + Nothing -> + return () + let oprhsd = case a2 of AssocRight -> d2 ; _ -> d2 + 1 + rhs <- pExpr' oprhsd + climbRight pExpr' pOper makeOp d (makeOp lhs op rhs) (Just paop) + +pEApp0 :: FParser (Expr ()) +pEApp0 = do + e1 <- pEAtom0 + es <- noFail $ famany (inlineWhite >> pEAtom0) + case es of + [] -> return e1 + _ -> return (EApp () e1 es) + +pEAtom0 :: FParser (Expr ()) +pEAtom0 = faasum' + [ELit () <$> pLiteral0 + ,pEList0 + ,pEVarOrCon0 + ,pEParens0] + +pLiteral0 :: FParser Literal +pLiteral0 = faasum' + [do as <- toList <$> fasome (satisfy isDigit) + let a = read as :: Integer + facatch (return (LInt a)) $ do + char '.' + bs <- toList <$> fasome (satisfy isDigit) + let b = read bs :: Integer + cs <- noFail $ faoptional $ do + char 'e' + cs <- toList <$> fasome (satisfy isDigit) + return cs + let c = maybe 0 read cs :: Integer + return (LFloat ((fromIntegral a + fromIntegral b / 10 ^ length bs) * 10 ^ c)) + ,do char '\'' + facatch (raise Error "Unclosed character literal" >> return (LChar '?')) $ do + cs <- noFail $ famany pStringChar + char '\'' + noFail $ do + c <- case cs of + [c] -> return c + _ -> raise Error "Character literal must contain one character" >> return '?' + return (LChar c) + ,do char '"' + noFail $ do + s <- famany pStringChar + char '"' <|>> raise Error "Unclosed string literal" + return (LString s)] + +pStringChar :: FParser Char +pStringChar = faasum' + [do char '\\' + char 'x' + let hexdig = do + c <- satisfy $ \c' -> + let c = toLower c' + in 'a' <= c && c <= 'f' || '0' <= c && c <= '9' + return $ if 'a' <= c then 10 + ord c - ord 'a' + else ord c - ord '0' + digs <- toList <$> fasome hexdig + return (chr (sum (zipWith (*) (reverse digs) (iterate (*16) 1)))) + ,do char '\\' + satisfy (const True) >>= \case + 'n' -> return '\n' + 'r' -> return '\r' + 't' -> return '\t' + 'a' -> return '\a' + 'b' -> return '\b' + '\'' -> return '\'' + '\"' -> return '\"' + '0' -> return '\0' + c -> do raise Error $ "Invalid escape sequence: \\" ++ [c] + return '?' + ,do satisfy (\c -> c `notElem` "\n\r\\\'")] + +pEList0 :: FParser (Expr ()) +pEList0 = do + char '[' -- special syntax, no need for pKeySym + noFail $ do + es <- sepBy pExpr (inlineWhite >> char ',') + inlineWhite + char ']' <|>> raise Error "Expected closing ']'" + return (EList () es) + +pEVarOrCon0 :: FParser (Expr ()) +pEVarOrCon0 = + pIdentifier0 InBlock Don'tCare () >>= \case + (Lowercase, name) -> return (EVar () name) + (Uppercase, name) -> return (ECon () name) + +pEParens0 :: FParser (Expr ()) +pEParens0 = do + char '(' + noFail $ do + e <- pExpr <|>> (raise Error "Expected expression" >> return (ETup () [])) + inlineWhite + char ')' <|>> raise Error "Expected closing ')'" + return e + +data Associativity = AssocLeft | AssocRight | AssocNone + deriving (Show, Eq) + +data ParsedOperator = PaOp Operator Int Associativity + deriving (Show) + +pInfixOp :: Case care -> FParser (WithCaseOutput care ParsedOperator) +pInfixOp cs = do + inlineWhite + case cs of + Lowercase -> pLowerInfixOp0 + Uppercase -> pUpperInfixOp0 + Don'tCare -> faasum' [(Lowercase,) <$> pLowerInfixOp0 + ,(Uppercase,) <$> pUpperInfixOp0] + +pLowerInfixOp0 :: FParser ParsedOperator +pLowerInfixOp0 = + faasum' [PaOp OEqu 4 AssocNone <$ pKeySym "==" + ,PaOp OAdd 6 AssocLeft <$ pKeySym "+" + ,PaOp OSub 6 AssocLeft <$ pKeySym "-" + ,PaOp OMul 7 AssocLeft <$ pKeySym "*" + ,PaOp ODiv 7 AssocLeft <$ pKeySym "/" + ,PaOp OMod 7 AssocLeft <$ pKeySym "%" + ,PaOp OPow 8 AssocRight <$ pKeySym "^" + ] + +pUpperInfixOp0 :: FParser ParsedOperator +pUpperInfixOp0 = + faasum' [PaOp OCons 5 AssocRight <$ pKeySym ":"] + +pStandaloneTypesig0 :: FParser (Name, Type) +pStandaloneTypesig0 = do + name <- pIdentifier0 AtLeft Lowercase WCBacktrack + inlineWhite + pKeySym "::" + noFail $ pushContext ("type signature for " ++ pretty name) $ do + ty <- pType <|>> (raise Error "Expected type" >> return (TTup [])) + return (name, ty) + +pType :: FParser Type +pType = do + ty1 <- pTypeApp + facatch (return ty1) $ do + inlineWhite + pKeySym "->" + noFail $ do + ty2 <- pType <|>> (raise Error "Expected type" >> return (TTup [])) + return (TFun ty1 ty2) + +pTypeApp :: FParser Type +pTypeApp = fasome pTypeAtom >>= \case + t :| [] -> return t + t :| ts -> return (TApp t ts) + +pTypeAtom :: FParser Type +pTypeAtom = faasum' [pTypeParens, pTypeList, pTypeName] + where + pTypeParens = do + inlineWhite + char '(' + faasum' + [do inlineWhite + char ')' + return (TTup []) + ,do ty1 <- pType + noFail $ do + ty2s <- famany $ do + inlineWhite + char ',' + noFail $ pType <|>> (raise Error "Expected type" >> return (TTup [])) + inlineWhite + char ')' <|>> raise Error "Expected closing ')'" + case ty2s of + [] -> return ty1 + _ -> return (TTup (ty1 : ty2s))] + + pTypeList = do + inlineWhite + char '[' + ty <- pType + noFail $ char ']' <|>> raise Error "Expecte closing ']'" + return (TList ty) + + pTypeName = do + inlineWhite + (cs, name) <- pIdentifier0 InBlock Don'tCare () + case cs of + Uppercase -> return (TCon name) + Lowercase -> return (TVar name) + +-- | Parse the given name-like keyword, ensuring that it is the entire word. +pKeyword :: String -> FParser () +pKeyword s = do + string s + -- traceM $ "pKeyword: parsed " ++ show s + notFollowedBy (() <$ satisfy isNameContChar) + +-- | Parse the given symbol-like keyword, ensuring that it is the entire symbol. +pKeySym :: String -> FParser () +pKeySym s = do + string s + notFollowedBy (() <$ satisfy isSymbolChar) + +data Case care where + Uppercase :: Case 'True + Lowercase :: Case 'True + Don'tCare :: Case 'False +deriving instance Show (Case care) + +type family WithCaseOutput care a where + WithCaseOutput 'True a = a + WithCaseOutput 'False a = (Case 'True, a) + +type family If c a b where + If 'True a b = a + If 'False a b = b + +data WrongCaseBacktrack + = WCBacktrack -- ^ If a word was found but it had the wrong case, fail and backtrack. + | WCAssume -- ^ Be certain that this case is expected here, and assume incorrect + -- case is a typo. + deriving (Show) + +-- | Consumes an identifier (word or parenthesised operator) at the current +-- position. The `var` production in Haskell2010. +-- var -> varid | "(" varsym ")" +pIdentifier0 :: BlockPos -> Case care -> If care WrongCaseBacktrack () -> FParser (WithCaseOutput care Name) +pIdentifier0 bpos cs wrongcase = + pAlphaName0 bpos cs wrongcase <|>> pParens0 (pSymbol0 bpos cs) + where + -- | Parser between parens, with the opening paren at the current position. + pParens0 :: FParser a -> FParser a + pParens0 p = do + char '(' + inlineWhite + res <- p + inlineWhite + char ')' + return res + +-- | Consumes a word-like name at the current position with the given case. The +-- `varid` production in Haskell2010 for 'Lowercase', `conid' for 'Uppercase'. +-- +-- > varid -> (small {small | large | digit | "'"}) without reservedid +pAlphaName0 :: BlockPos -> Case care -> If care WrongCaseBacktrack () -> FParser (WithCaseOutput care Name) +pAlphaName0 bpos cs wrongcase = do + startPos <- gets psCur + (_, s) <- readToken + bpos + (\atfst mc -> case (atfst, mc) of + (True , Just c) | isNameHeadChar c -> Just (Right False) + (True , _ ) -> Nothing + (False, Just c) | isNameContChar c -> Just (Right False) + (False, _ ) -> Just (Left ())) + True + faguard (s `notElem` ["case", "class", "data", "default", "deriving", "do", "else" + ,"foreign", "if", "import", "in", "infix", "infixl" + ,"infixr", "instance", "let", "module", "newtype", "of" + ,"then", "type", "where", "_"]) + (name, adjoin) <- case cs of + Uppercase + | isLower (head s) -> case wrongcase of + WCBacktrack -> faempty + WCAssume -> noFail $ do + raiseAt startPos Error "Unexpected uppercase word at this position, assuming typo" + return (toUpper (head s) : tail s, id) + | otherwise -> return (s, id) + Lowercase + | isUpper (head s) -> case wrongcase of + WCBacktrack -> faempty + WCAssume -> noFail $ do + raiseAt startPos Error "Unexpected lowercase word at this position, assuming typo" + return (toLower (head s) : tail s, id) + | otherwise -> return (s, id) + Don'tCare + | isLower (head s) -> return (s, (Lowercase,)) + | otherwise -> return (s, (Uppercase,)) + return (adjoin (Name name)) + +isNameHeadChar :: Char -> Bool +isNameHeadChar c = isLetter c || c == '_' + +isNameContChar :: Char -> Bool +isNameContChar c = isNameHeadChar c || isDigit c || c == '\'' + +-- | Consumes a symbol at the current position. The `varsym` production in +-- Haskell2010 for 'Lowercase', `consym` otherwise, and either if 'Don'tCare'. +-- +-- > varsym -> ((symbol without ":") {symbol}) without (reservedop | dashes) +-- > consym -> (":" {symbol}) without reservedop +-- > symbol -> ascSymbol | uniSymbol without (special | "_" | "\"" | "'") +-- > ascSymbol -> ```!#$%&⋆+./<=>?@^|-~:``` +-- > uniSymbol -> unicode symbol or punctuation +-- > dashes -> "--" {"-"} +-- > special -> ```(),;[]`{}``` +-- > reservedop -> ".." | ":" | "::" | "=" | "\" | "|" | "<-" | "->" | "@" | "~" | "=>" +pSymbol0 :: BlockPos -> Case care -> FParser (WithCaseOutput care Name) +pSymbol0 bpos cs = do + case bpos of + AtLeft -> whenM (noFail $ not <$> isAtBlockLeft) (() <$ faempty) + InBlock -> whenM (noFail $ not <$> isInsideBlock) faempty + (c1, adjoin) <- + case cs of Lowercase -> (,id) <$> satisfy (\c -> isSymbolChar c && c /= ':') + Uppercase -> (,id) <$> satisfy (== ':') + Don'tCare -> do c1 <- satisfy (\c -> isSymbolChar c) + return (c1, if c1 == ':' then (Uppercase,) else (Lowercase,)) + crest <- noFail $ famany (satisfy isSymbolChar) + let name = c1 : crest + faguard (name `notElem` ["..", ":", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]) + faguard (take 2 name /= "--") + return (adjoin (Name name)) + +isSymbolChar :: Char -> Bool +isSymbolChar c = (isAscSymbol || isUniSymbol) && not isSpecialExt + where + isSpecialExt = c `elem` "(),;[]`{}_\"'" + isAscSymbol = c `elem` "!#$%&⋆+./<=>?@^|-~:" + isUniSymbol = ord c > 127 && (isSymbol c || isPunctuation c) + + +sepBy1 :: FParser a -> FParser sep -> FParser [a] +sepBy1 p psep = do + x1 <- p + (psep >> (x1 :) <$> sepBy1 p psep) <|>> pure [x1] + +sepBy :: FParser a -> FParser sep -> IParser [a] +sepBy p psep = sepBy1 p psep <|>> pure [] + +-- | Start a new layout block at the current position. The old layout block is +-- restored after completion of this subparser. +startLayoutBlock :: IParser a -> IParser a +startLayoutBlock p = do + ps0 <- get + put (ps0 { psBlk = psCur ps0 }) + res <- p + modify (\ps -> ps { psBlk = psBlk ps0 }) + return res + +data Fatality fatal where + Error :: Fatality 'False + Fatal :: Fatality 'True +deriving instance Show (Fatality fatal) + +type family FatalCtx fatal a where + FatalCtx 'False a = a ~ () + FatalCtx 'True a = () + +raise_ :: KnownFallible fail => Fatality fatal -> String -> Parser fail () +raise_ Error = raise Error +raise_ Fatal = raise Fatal + +raise :: (KnownFallible fail, FatalCtx fatal a) => Fatality fatal -> String -> Parser fail a +raise fat msg = gets psCur >>= \pos -> raiseAt pos fat msg + +-- | Raise an error with the given fatality and description. +raiseAt :: (KnownFallible fail, FatalCtx fatal a) => Pos -> Fatality fatal -> String -> Parser fail a +raiseAt pos fat msg = do + Context { ctxFile = fp , ctxStack = stk, ctxLines = srcLines } <- ask + let err = ErrMsg fp stk pos msg (srcLines !! posLine pos) + case fat of + Error -> dictate (pure err) + Fatal -> confess (pure err) + +describeLocation :: IParser String +describeLocation = do + fp <- asks ctxFile + cur <- gets psCur + return $ fp ++ ":" ++ show (posLine cur + 1) ++ ":" ++ show (posCol cur + 1) + +-- | Registers a scope description on the stack for error reporting. +pushContext :: String -> Parser fail a -> Parser fail a +pushContext descr = local (\c -> c { ctxStack = descr : ctxStack c }) + +-- | Registers a scope description on the stack for error reporting, suffixed +-- with the current parsing location. +pushLocatedContext :: String -> Parser fail a -> Parser fail a +pushLocatedContext descr p = do + loc <- noFail describeLocation + pushContext (descr ++ " at " ++ loc) p + +data BlockPos = AtLeft | InBlock + deriving (Show) + +-- | Consumes a token at the current position, asserting that we are +-- in the position indicated by the 'BlockPos' argument. The token is defined +-- by a pure stateful parser. If encountering a newline or EOF, the parser is +-- run on this character ('Nothing' for EOF); if this produces a result, the +-- result is returned; otherwise, the parser fails. The newline is not consumed. +readToken :: BlockPos -> (s -> Maybe Char -> Maybe (Either r s)) -> s -> FParser (r, String) +readToken bpos f s0 = do + case bpos of + AtLeft -> whenM (noFail $ not <$> isAtBlockLeft) (() <$ faempty) + InBlock -> whenM (noFail $ not <$> isInsideBlock) faempty + let loop :: (s -> Maybe Char -> Maybe (Either r s)) -> s -> FParser (r, String) + loop f' st = do + ps <- get + case psRest ps of + [] | Just (Left res) <- f' st Nothing -> return (res, "") + | otherwise -> faempty + '\n' : _ | Just (Left res) <- f' st (Just '\n') -> return (res, "") + c : cs -> case f' st (Just c) of + Nothing -> faempty + Just (Left res) -> return (res, "") + Just (Right st') -> do + let Pos line col = psCur ps + put (ps { psCur = Pos line (col + 1), psRest = cs }) + fmap (c :) <$> loop f' st' + loop f s0 + +-- | Consumes all whitespace and comments (including newlines), but only if +-- this then leaves the parser inside the current block. If not, succeeds and +-- consumes nothing. +inlineWhite :: Parser fail () +inlineWhite = do + ps <- get + noFail skipWhiteComment + whenM (noFail $ not <$> isInsideBlock) $ put ps + +-- | Consumes all whitespace and comments (including newlines). Note: this may +-- end outside the current block. +skipWhiteComment :: IParser () +skipWhiteComment = do + inlineSpaces + _ <- famany (blockComment >> noFail inlineSpaces) + optional_ lineComment + optional_ (consumeNewline >> noFail skipWhiteComment) + where + -- | Consumes some inline whitespace. Stops before newlines. + inlineSpaces :: IParser () + inlineSpaces = readWhileInline isSpace + +-- | Consumes an delimited comment including both end markers. Note: this may +-- end outside the current block. +blockComment :: FParser () +blockComment = do + string "{-" -- no need for pKeySym here + let loop = do + faasum [string "-}" + ,eof >> raise Error "Unfinished {- -} comment at end of file" + ,blockComment >> noFail loop + ,consumeNewline >> noFail loop] + (readWhileInline (`notElem` "{-")) -- "-}" also starts with '-' + noFail loop + +-- | Consumes a line comment marker and the rest of the line, excluding +-- newline. +lineComment :: FParser () +lineComment = do + -- '--!' is an operator, so we need to parse a whole symbol here. + pKeySym "--" + noFail $ readWhileInline (const True) + +-- | Raises an error if we're not currently at EOF. +assertEOF :: Fatality fatal -> IParser () +assertEOF fat = gets psRest >>= \case + [] -> return () + _ -> raise_ fat "Unexpected stuff" + +-- | Returns whether the current position is _within_ the current block, for +-- soft-wrapping content. This means that col > blkCol. +isInsideBlock :: IParser Bool +isInsideBlock = do + PS { psCur = cur, psBlk = blk } <- get + return $ posLine cur >= posLine blk && posCol cur > posCol blk + +-- | Returns whether the current position is at the left border of the block; +-- this is for list content such as function definitions or let bindings. This +-- means that col == blkCol. +isAtBlockLeft :: IParser Bool +isAtBlockLeft = do + PS { psCur = cur, psBlk = blk } <- get + return $ posLine cur >= posLine blk && posCol cur == posCol blk + +-- | Consumes characters while the predicate holds or until (and excluding) +-- a newline, whichever comes first. +readWhileInline :: (Char -> Bool) -> IParser () +readWhileInline p = do + (taken, rest) <- span (\c -> p c && c /= '\n') <$> gets psRest + modify (\ps -> ps { psCur = let Pos line col = psCur ps + in Pos line (col + length taken) + , psRest = rest }) + +-- | Consumes exactly one newline at the current position. +consumeNewline :: FParser () +consumeNewline = gets psRest >>= \case + '\n' : rest -> modify (\ps -> ps { psCur = Pos (posLine (psCur ps) + 1) 0 + , psRest = rest }) + _ -> faempty + +-- | Consumes exactly one character, unequal to newline, at the current position. +satisfy :: (Char -> Bool) -> FParser Char +satisfy p = do + -- traceM "entering satisfy" + r <- gets psRest + -- traceM "got rest" + r `seq` return () + -- traceM "seqd rest" + -- traceM ("rest is " ++ show r) + case r of + c : rest | c /= '\n', p c -> do + modify (\ps -> let Pos line col = psCur ps + in ps { psCur = Pos line (col + 1) + , psRest = rest }) + return c + _ -> faempty + +-- | Consumes exactly this character at the current position. Must not be a +-- newline. +char :: Char -> FParser () +char c = string [c] + +-- | Consumes exactly this string at the current position. The string must not +-- contain a newline. +string :: String -> FParser () +string s | any (== '\n') s = error "Newline in 'string' argument" +string s = do + ps <- get + let Pos line col = psCur ps + if take (length s) (psRest ps) == s + then put (ps { psCur = Pos line (col + length s) + , psRest = drop (length s) (psRest ps) }) + else faempty + +-- lookAhead :: FParser a -> FParser a +-- lookAhead p = do +-- ps <- get +-- success <- (Just <$> p) <|>> pure Nothing +-- put ps -- restore state, as if nothing happened +-- case success of +-- Nothing -> faempty +-- Just x -> return x + +notFollowedBy :: FParser () -> FParser () +notFollowedBy p = do + ps <- get + success <- (False <$ p) <|>> pure True + put ps -- restore state, as if nothing happened + when (not success) faempty + +-- | Only succeeds at EOF. +eof :: FParser () +eof = gets psRest >>= \case [] -> return () + _ -> faempty + +whenM :: (Monad m, Monoid a) => m Bool -> m a -> m a +whenM mb mx = mb >>= \b -> if b then mx else return mempty + +optional_ :: FAlternative f => f 'Fallible a -> f 'Infallible () +optional_ a = (() <$ a) <|>> pure () |