aboutsummaryrefslogtreecommitdiff
path: root/src/HSVIS/Parser.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/HSVIS/Parser.hs')
-rw-r--r--src/HSVIS/Parser.hs1018
1 files changed, 1018 insertions, 0 deletions
diff --git a/src/HSVIS/Parser.hs b/src/HSVIS/Parser.hs
new file mode 100644
index 0000000..23ce28e
--- /dev/null
+++ b/src/HSVIS/Parser.hs
@@ -0,0 +1,1018 @@
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE StandaloneDeriving #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE TypeOperators #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE InstanceSigs #-}
+-- I don't want a warning for 'head' and 'tail' in this file. But I also don't
+-- want GHCs before 9.8 to complain that they don't know the x-partial warning.
+{-# OPTIONS_GHC -Wno-unrecognised-warning-flags -Wno-x-partial #-}
+module HSVIS.Parser (
+ parse,
+ Pos(..),
+ ErrMsg(..),
+ printErrMsg,
+ -- * Re-exports
+ These(..),
+) where
+
+-- import Control.Applicative
+import Control.Monad
+import Control.Monad.Chronicle
+import Control.Monad.Reader
+import Control.Monad.State.Lazy
+import Data.Char
+import Data.Either (partitionEithers)
+import Data.Foldable
+import Data.List (intercalate)
+import Data.List.NonEmpty (NonEmpty(..))
+import Data.These
+
+-- import Debug.Trace
+
+import Control.FAlternative
+import HSVIS.AST
+import HSVIS.Pretty
+
+
+data Pos = Pos
+ { posLine :: Int -- ^ zero-based
+ , posCol :: Int -- ^ zero-based
+ }
+ deriving (Show)
+
+-- Positions are zero-based in both dimensions.
+-- See 'isInsideBlock' and 'isAtBlockLeft' for the two relevant "inside the
+-- block" conditions.
+data PS = PS
+ { psBlk :: Pos -- ^ Start of current layout block
+ , psCur :: Pos -- ^ Current parsing position
+ , psRest :: String -- ^ Rest of the input
+ }
+ deriving (Show)
+
+data Context = Context
+ { ctxFile :: FilePath
+ , ctxLines :: [String] -- ^ The file contents, split up in lines
+ , ctxStack :: [String] -- ^ Stack of syntax scopes, for error reporting
+ }
+ deriving (Show)
+
+type family BacktrackPath fail r where
+ BacktrackPath 'Fallible r = r
+ BacktrackPath 'Infallible r = ()
+
+newtype Parser fail a = Parser
+ { runParser
+ :: forall r.
+ Context
+ -> PS
+ -> (PS -> [ErrMsg] -> a -> r) -- ^ OK: some diagnostics, but parsing succeeded
+ -> ([ErrMsg] -> r) -- ^ Fatal: error that prevented parsing from proceeding
+ -> BacktrackPath fail r -- ^ Backtrack: alternative was exhausted without success
+ -> r }
+
+type IParser = Parser 'Infallible
+type FParser = Parser 'Fallible
+
+instance Functor (Parser fail) where
+ fmap f (Parser g) = Parser (\ctx ps kok kfat kbt ->
+ g ctx ps (\ps' errs x -> kok ps' errs (f x)) kfat kbt)
+
+instance Applicative (Parser fail) where
+ pure x = Parser (\_ ps kok _ _ -> kok ps [] x)
+ (<*>) = ap
+
+instance Monad (Parser fail) where
+ Parser g >>= f = Parser $ \ctx ps kok kfat kbt ->
+ g ctx ps
+ (\ps1 errs x ->
+ x `seq`
+ runParser (f x) ctx ps1
+ (\ps2 errs' y -> kok ps2 (errs <> errs') y)
+ (\errs' -> kfat (errs <> errs'))
+ kbt)
+ (\errs -> kfat errs)
+ kbt
+
+instance FAlternative Parser where
+ faempty = Parser (\_ _ _ _ kbt -> kbt)
+ Parser f <|>> Parser g = Parser $ \ctx ps kok kfat kbt ->
+ f ctx ps kok kfat (g ctx ps kok kfat kbt)
+
+ noFail (Parser f) = Parser $ \ctx ps kok kfat _ -> f ctx ps kok kfat ()
+
+ toFallible :: forall fail a. KnownFallible fail => Parser fail a -> Parser 'Fallible a
+ toFallible (Parser f) = Parser $ \ctx ps kok kfat kbt ->
+ f ctx ps kok kfat (case knownFallible @fail of
+ SFallible -> kbt
+ SInfallible -> ())
+
+instance MonadState PS (Parser fail) where
+ state f = Parser $ \_ ps kok _ _ ->
+ let (x, ps') = f ps
+ in kok ps' [] x
+
+instance MonadReader Context (Parser fail) where
+ reader f = Parser $ \ctx ps kok _ _ -> kok ps [] (f ctx)
+ local f (Parser g) = Parser (\ctx -> g (f ctx))
+
+instance KnownFallible fail => MonadChronicle [ErrMsg] (Parser fail) where
+ dictate errs = Parser $ \_ ps kok _ _ -> kok ps errs ()
+ confess errs = Parser $ \_ _ _ kfat _ -> kfat errs
+ memento (Parser f) = Parser $ \ctx ps kok _ kbt ->
+ f ctx ps
+ (\ps' errs x -> kok ps' errs (Right x))
+ (\errs -> kok ps [] (Left errs))
+ kbt
+ absolve def (toFallible -> Parser f) = Parser $ \ctx ps kok _ _ ->
+ f ctx ps
+ kok
+ (\_ -> kok ps [] def)
+ (kok ps [] def)
+ condemn (Parser f) = Parser $ \ctx ps kok kfat kbt ->
+ f ctx ps
+ (\ps' errs x -> case errs of
+ [] -> kok ps' [] x
+ _ -> kfat errs)
+ kfat
+ kbt
+ retcon g (Parser f) = Parser $ \ctx ps kok kfat kbt ->
+ f ctx ps
+ (\ps' errs x -> kok ps' (g errs) x)
+ (\errs -> kfat (g errs))
+ kbt
+ chronicle th = case th of
+ This errs -> Parser (\_ _ _ kfat _ -> kfat errs)
+ That res -> Parser (\_ ps kok _ _ -> kok ps [] res)
+ These errs res -> Parser (\_ ps kok _ _ -> kok ps errs res)
+
+-- Positions are zero-based in both dimensions
+data ErrMsg = ErrMsg
+ { errFile :: FilePath
+ , errStk :: [String]
+ , errPos :: Pos
+ , errMsg :: String
+ , errSourceLine :: String }
+ deriving (Show)
+
+printErrMsg :: ErrMsg -> String
+printErrMsg (ErrMsg fp stk (Pos y x) s srcline) =
+ let linenum = show (y + 1)
+ in intercalate "\n" $
+ map (\descr -> "In " ++ descr ++ ":") (reverse stk)
+ ++ [fp ++ ":" ++ show (y + 1) ++ ":" ++ show (x + 1) ++ ": " ++ s
+ ,map (\_ -> ' ') linenum ++ " |"
+ ,linenum ++ " | " ++ srcline
+ ,map (\_ -> ' ') linenum ++ " | " ++ replicate x ' ' ++ "^"]
+
+parse :: FilePath -> String -> These [ErrMsg] (Program ())
+parse fp source =
+ runParser pProgram (Context fp (lines source) []) (PS (Pos 0 0) (Pos 0 0) source)
+ (\_ errs res -> case errs of
+ [] -> That res
+ _ -> These errs res)
+ (\errs -> This errs)
+ () -- (This [ErrMsg fp [] (Pos 0 0) "Parse error, no grammar alternatives match your source"])
+
+pProgram :: IParser (Program ())
+pProgram = do
+ defs <- pTopDefs
+ let (datadefs, fundefs) = partitionEithers defs
+ skipWhiteComment
+ assertEOF Error
+ return (Program datadefs fundefs)
+
+pTopDefs :: IParser [Either DataDef (FunDef ())]
+pTopDefs = do
+ faoptional pTopDef >>= \case
+ Nothing -> do
+ skipWhiteComment
+ faoptional eof >>= \case
+ Nothing -> do
+ raise Error "Unparseable content"
+ readWhileInline (const True)
+ pTopDefs -- will skip the possible newline
+ Just () -> return []
+ Just defs -> do
+ defs2 <- pTopDefs
+ return (defs ++ defs2)
+
+pTopDef :: FParser [Either DataDef (FunDef ())]
+pTopDef = do
+ noFail skipWhiteComment
+ noFail isAtBlockLeft >>= \case
+ True -> map Left <$> pDataDef0 <|>> map Right <$> pFunDef0
+ False -> do
+ raise Error "Skipping unparseable content"
+ noFail $ readWhileInline (const True)
+ pTopDef
+
+pDataDef0 :: FParser [DataDef]
+pDataDef0 = do
+ pKeyword "data"
+ noFail $ do
+ inlineWhite
+ faoptional (pIdentifier0 InBlock Uppercase WCAssume) >>= \case
+ Nothing -> do
+ raise Error "Expected data declaration after 'data'"
+ return []
+ Just name -> do
+ params <- famany (inlineWhite >> pIdentifier0 InBlock Lowercase WCBacktrack)
+ cons <- pDatacons "="
+ return [DataDef name params cons]
+ where
+ pDatacons :: String -> IParser [(Name, [Type])]
+ pDatacons leader = do
+ inlineWhite
+ facatch (return []) $ do
+ pKeySym leader
+ inlineWhite
+ name <- pIdentifier0 InBlock Uppercase WCAssume
+ fields <- noFail $ famany pTypeAtom
+ rest <- noFail $ pDatacons "|"
+ return ((name, fields) : rest)
+
+data FunEqContext
+ = FirstLine
+ | TypeSig Name
+ | Continue Name
+ deriving (Show)
+
+pFunDef0 :: FParser [FunDef ()]
+pFunDef0 =
+ faasum'
+ [do (name, typ) <- pStandaloneTypesig0
+ noFail $ do
+ faoptional (pFunEq (TypeSig name)) >>= \case
+ Nothing -> do
+ raise Error $ "Expected function equation for " ++ pretty name ++
+ " after type signature"
+ return []
+ Just [] -> return [FunDef name (Just typ) (FunEq name [] (Plain (ETup () [])) :| [])]
+ Just (clause1 : clauses1) -> do
+ clauses <- concat <$> famany (pFunEq (Continue name))
+ return [FunDef name (Just typ) (clause1 :| clauses1 ++ clauses)]
+ ,do pFunEq FirstLine >>= \case
+ clause1@(FunEq name _ _) : clauses1 -> noFail $ do
+ clauses <- concat <$> famany (pFunEq (Continue name))
+ return [FunDef name Nothing (clause1 :| clauses1 ++ clauses)]
+ [] -> faempty]
+
+-- | Given the name from the type signature or a previous clause, if any.
+pFunEq :: FunEqContext -> FParser [FunEq ()]
+pFunEq fectx = do
+ noFail skipWhiteComment
+ faguardM isAtBlockLeft
+
+ pushLocatedContext "function equation" $ do
+ name <- pIdentifier0 AtLeft Lowercase WCAssume
+
+ -- We want to do various checks with what came before, and there are
+ -- multiple branches where we decide to continue parsing this equation. To
+ -- avoid code duplication or an early exit monad, we use a boolean here.
+ success <- case fectx of
+ FirstLine -> return True
+ TypeSig checkName
+ | name == checkName -> return True
+ | otherwise -> noFail $ do
+ raise Error $ "Name of function clause does not correspond with type signature: " ++
+ pretty checkName
+ return False
+ Continue checkName -> do
+ faguard (name == checkName) -- this can still backtrack out of pFunEq
+ return True
+
+ noFail $ if success
+ then do
+ pats <- famany (pPattern 11)
+ rhs <- pRHS "="
+ return [FunEq name pats rhs]
+ else return []
+
+-- | Pass "=" for function definitions and "->" for case clauses.
+pRHS :: String -> IParser (RHS ())
+pRHS sepsym = do
+ -- TODO: parse guards
+ inlineWhite
+ pKeySym sepsym <|>> raise Error ("Expected " ++ show sepsym)
+ expr <- pExpr <|>> (raise Error "Expected expression" >> return (ETup () []))
+ return (Plain expr)
+
+pPattern :: Int -> FParser (Pattern ())
+pPattern d = inlineWhite >> pPattern0 d
+
+pPattern0 :: Int -> FParser (Pattern ())
+pPattern0 d = do
+ p0 <- pPatExprAtom0 (max 10 d)
+ climbRight pPattern (pInfixOp Uppercase) (POp ()) d p0 Nothing
+
+pExpr :: FParser (Expr ())
+pExpr = do
+ inlineWhite
+ -- basics: lit, list, var, con, tup
+ -- expression atom: application of basics
+ -- expression parser: op
+ -- around: let, case, if
+ pushLocatedContext "expression" $ do
+ faasum' [pELet0
+ ,pECase0
+ ,pEIf0
+ ,pExprOpExpr0 0]
+
+pPatExprAtom0 :: Int -> FParser (Pattern ())
+pPatExprAtom0 d =
+ faasum' [pPatWildcard0
+ ,pPatVarOrAs0
+ ,pPatCon0
+ ,pPatList0
+ ,pPatParens0]
+ where
+ pPatWildcard0 = pKeySym "_" >> return (PWildcard ())
+ pPatVarOrAs0 = do
+ var <- pIdentifier0 InBlock Lowercase WCBacktrack
+ facatch (return (PVar () var)) $ do
+ inlineWhite
+ pKeySym "@"
+ noFail $ do
+ p <- pPattern 11 <|>> (raise Error "Expected pattern after '@'" >> return (PWildcard ()))
+ return (PAs () var p)
+ pPatCon0 = do
+ con <- pIdentifier0 InBlock Uppercase WCBacktrack
+ noFail $ if d > 10
+ then return (PCon () con [])
+ else do args <- famany (pPattern 11)
+ return (PCon () con args)
+ pPatList0 = do
+ char '[' -- special syntax, no need for pKeySym
+ noFail $ do
+ ps <- pPattern 0 `sepBy` (inlineWhite >> char ',')
+ inlineWhite
+ char ']' <|>> raise Error "Expected ']'"
+ return (PList () ps)
+ pPatParens0 = do
+ char '('
+ inlineWhite
+ faasum'
+ [do char ')'
+ return (PTup () [])
+ ,do p <- pPattern0 0
+ inlineWhite
+ faasum'
+ [do char ')'
+ return p
+ ,do char ','
+ ps <- pPattern 0 `sepBy1` (inlineWhite >> char ',')
+ return (PTup () (p : ps))]]
+
+pELet0 :: FParser (Expr ())
+pELet0 = do
+ pKeyword "let"
+ noFail $ do
+ inlineWhite
+ defss <- startLayoutBlock $ do
+ -- The first occurrence is also going to skip whitespace in front,
+ -- which is redundant -- but not harmful.
+ famany $ do
+ noFail skipWhiteComment
+ -- Note: now not necessarily in the indented block. Which is
+ -- precisely what we need here. If we see "in", let the 'many'
+ -- choice fail so that the defs loop ends. But let it fail outside
+ -- this asum so that it is the many that picks it up, not this
+ -- asum.
+ res <- faasum' [Nothing <$ pKeyword "in" -- note: will be dropped due to the empty backtrack
+ ,Just <$> pFunDef0]
+ case res of
+ Nothing -> faempty
+ Just defs -> return defs
+
+ let defs = concat defss
+ inlineWhite
+ facatch (do raise Error "Expected 'in' after 'let'"
+ return (ELet () defs (ETup () []))) $ do
+ pKeyword "in"
+ noFail $ do
+ inlineWhite
+ body <- pExpr <|>> (raise Error "Expected expression" >> return (ETup () []))
+ return (ELet () defs body)
+
+pECase0 :: FParser (Expr ())
+pECase0 = do
+ pKeyword "case"
+ noFail $ do
+ e <- pExpr <|>> (raise Error "Expected expression" >> return (ETup () []))
+ inlineWhite
+ facatch (raise Error "Expected 'of' after 'case'" >> return (ECase () e [])) $ do
+ pKeyword "of"
+ noFail $ do
+ inlineWhite
+ startLayoutBlock $ do
+ -- The first clause is going to skip whitespace, but that's harmless
+ -- (though redundant).
+ let pClause = do
+ noFail $ skipWhiteComment
+ whenM (noFail $ not <$> isInsideBlock) (() <$ faempty)
+ pat <- pPattern0 0
+ noFail $ do
+ rhs <- pRHS "->"
+ return (pat, rhs)
+ clauses <- famany pClause
+ return (ECase () e clauses)
+
+pEIf0 :: FParser (Expr ())
+pEIf0 = do
+ pKeyword "if"
+ noFail $ do
+ e1 <- pExpr <|>> (raise Error "Expected expression" >> return (ECon () (Name "True")))
+ inlineWhite
+ facatch (raise Error "Expected 'then' after 'if'" >> return (EIf () e1 (ETup () []) (ETup () []))) $ do
+ pKeyword "then"
+ noFail $ do
+ e2 <- pExpr <|>> (raise Error "Expected expression" >> return (ETup () []))
+ inlineWhite
+ facatch (raise Error "Expected else after 'if'" >> return (EIf () e1 (ETup () []) (ETup () []))) $ do
+ pKeyword "else"
+ noFail $ do
+ e3 <- pExpr <|>> (raise Error "Expected expression" >> return (ETup () []))
+ return (EIf () e1 e2 e3)
+
+pExprOpExpr :: Int -> FParser (Expr ())
+pExprOpExpr d = inlineWhite >> pExprOpExpr0 d
+
+pExprOpExpr0 :: Int -> FParser (Expr ())
+pExprOpExpr0 d = do
+ e0 <- pEApp0
+ climbRight pExprOpExpr (snd <$> pInfixOp Don'tCare) (EOp ()) d e0 Nothing
+
+climbRight
+ :: (Int -> FParser e) -- ^ Parse an expression at the given precedence level
+ -> FParser ParsedOperator -- ^ Parse an operator
+ -> (e -> Operator -> e -> e) -- ^ Build an operator application experssion
+ -> Int -- ^ Ambient precedence level: minimum precedence of top-level operator in result
+ -> e -- ^ lhs: Initial non-operator expression already parsed
+ -> Maybe ParsedOperator -- ^ Top-level operator in lhs (initialise with Nothing)
+ -> FParser e
+climbRight pExpr' pOper makeOp d lhs mlhsop =
+ facatch (return lhs) $ do
+ paop@(PaOp op d2 a2) <- pOper
+ faguard (d2 >= d) -- respect global minimum precedence
+ case mlhsop of -- check operator compatibility
+ Just (PaOp _ d1 a1) ->
+ faguard (d1 > d2 || (d1 == d2 && a1 == a2 && a1 /= AssocNone))
+ Nothing ->
+ return ()
+ let oprhsd = case a2 of AssocRight -> d2 ; _ -> d2 + 1
+ rhs <- pExpr' oprhsd
+ climbRight pExpr' pOper makeOp d (makeOp lhs op rhs) (Just paop)
+
+pEApp0 :: FParser (Expr ())
+pEApp0 = do
+ e1 <- pEAtom0
+ es <- noFail $ famany (inlineWhite >> pEAtom0)
+ case es of
+ [] -> return e1
+ _ -> return (EApp () e1 es)
+
+pEAtom0 :: FParser (Expr ())
+pEAtom0 = faasum'
+ [ELit () <$> pLiteral0
+ ,pEList0
+ ,pEVarOrCon0
+ ,pEParens0]
+
+pLiteral0 :: FParser Literal
+pLiteral0 = faasum'
+ [do as <- toList <$> fasome (satisfy isDigit)
+ let a = read as :: Integer
+ facatch (return (LInt a)) $ do
+ char '.'
+ bs <- toList <$> fasome (satisfy isDigit)
+ let b = read bs :: Integer
+ cs <- noFail $ faoptional $ do
+ char 'e'
+ cs <- toList <$> fasome (satisfy isDigit)
+ return cs
+ let c = maybe 0 read cs :: Integer
+ return (LFloat ((fromIntegral a + fromIntegral b / 10 ^ length bs) * 10 ^ c))
+ ,do char '\''
+ facatch (raise Error "Unclosed character literal" >> return (LChar '?')) $ do
+ cs <- noFail $ famany pStringChar
+ char '\''
+ noFail $ do
+ c <- case cs of
+ [c] -> return c
+ _ -> raise Error "Character literal must contain one character" >> return '?'
+ return (LChar c)
+ ,do char '"'
+ noFail $ do
+ s <- famany pStringChar
+ char '"' <|>> raise Error "Unclosed string literal"
+ return (LString s)]
+
+pStringChar :: FParser Char
+pStringChar = faasum'
+ [do char '\\'
+ char 'x'
+ let hexdig = do
+ c <- satisfy $ \c' ->
+ let c = toLower c'
+ in 'a' <= c && c <= 'f' || '0' <= c && c <= '9'
+ return $ if 'a' <= c then 10 + ord c - ord 'a'
+ else ord c - ord '0'
+ digs <- toList <$> fasome hexdig
+ return (chr (sum (zipWith (*) (reverse digs) (iterate (*16) 1))))
+ ,do char '\\'
+ satisfy (const True) >>= \case
+ 'n' -> return '\n'
+ 'r' -> return '\r'
+ 't' -> return '\t'
+ 'a' -> return '\a'
+ 'b' -> return '\b'
+ '\'' -> return '\''
+ '\"' -> return '\"'
+ '0' -> return '\0'
+ c -> do raise Error $ "Invalid escape sequence: \\" ++ [c]
+ return '?'
+ ,do satisfy (\c -> c `notElem` "\n\r\\\'")]
+
+pEList0 :: FParser (Expr ())
+pEList0 = do
+ char '[' -- special syntax, no need for pKeySym
+ noFail $ do
+ es <- sepBy pExpr (inlineWhite >> char ',')
+ inlineWhite
+ char ']' <|>> raise Error "Expected closing ']'"
+ return (EList () es)
+
+pEVarOrCon0 :: FParser (Expr ())
+pEVarOrCon0 =
+ pIdentifier0 InBlock Don'tCare () >>= \case
+ (Lowercase, name) -> return (EVar () name)
+ (Uppercase, name) -> return (ECon () name)
+
+pEParens0 :: FParser (Expr ())
+pEParens0 = do
+ char '('
+ noFail $ do
+ e <- pExpr <|>> (raise Error "Expected expression" >> return (ETup () []))
+ inlineWhite
+ char ')' <|>> raise Error "Expected closing ')'"
+ return e
+
+data Associativity = AssocLeft | AssocRight | AssocNone
+ deriving (Show, Eq)
+
+data ParsedOperator = PaOp Operator Int Associativity
+ deriving (Show)
+
+pInfixOp :: Case care -> FParser (WithCaseOutput care ParsedOperator)
+pInfixOp cs = do
+ inlineWhite
+ case cs of
+ Lowercase -> pLowerInfixOp0
+ Uppercase -> pUpperInfixOp0
+ Don'tCare -> faasum' [(Lowercase,) <$> pLowerInfixOp0
+ ,(Uppercase,) <$> pUpperInfixOp0]
+
+pLowerInfixOp0 :: FParser ParsedOperator
+pLowerInfixOp0 =
+ faasum' [PaOp OEqu 4 AssocNone <$ pKeySym "=="
+ ,PaOp OAdd 6 AssocLeft <$ pKeySym "+"
+ ,PaOp OSub 6 AssocLeft <$ pKeySym "-"
+ ,PaOp OMul 7 AssocLeft <$ pKeySym "*"
+ ,PaOp ODiv 7 AssocLeft <$ pKeySym "/"
+ ,PaOp OMod 7 AssocLeft <$ pKeySym "%"
+ ,PaOp OPow 8 AssocRight <$ pKeySym "^"
+ ]
+
+pUpperInfixOp0 :: FParser ParsedOperator
+pUpperInfixOp0 =
+ faasum' [PaOp OCons 5 AssocRight <$ pKeySym ":"]
+
+pStandaloneTypesig0 :: FParser (Name, Type)
+pStandaloneTypesig0 = do
+ name <- pIdentifier0 AtLeft Lowercase WCBacktrack
+ inlineWhite
+ pKeySym "::"
+ noFail $ pushContext ("type signature for " ++ pretty name) $ do
+ ty <- pType <|>> (raise Error "Expected type" >> return (TTup []))
+ return (name, ty)
+
+pType :: FParser Type
+pType = do
+ ty1 <- pTypeApp
+ facatch (return ty1) $ do
+ inlineWhite
+ pKeySym "->"
+ noFail $ do
+ ty2 <- pType <|>> (raise Error "Expected type" >> return (TTup []))
+ return (TFun ty1 ty2)
+
+pTypeApp :: FParser Type
+pTypeApp = fasome pTypeAtom >>= \case
+ t :| [] -> return t
+ t :| ts -> return (TApp t ts)
+
+pTypeAtom :: FParser Type
+pTypeAtom = faasum' [pTypeParens, pTypeList, pTypeName]
+ where
+ pTypeParens = do
+ inlineWhite
+ char '('
+ faasum'
+ [do inlineWhite
+ char ')'
+ return (TTup [])
+ ,do ty1 <- pType
+ noFail $ do
+ ty2s <- famany $ do
+ inlineWhite
+ char ','
+ noFail $ pType <|>> (raise Error "Expected type" >> return (TTup []))
+ inlineWhite
+ char ')' <|>> raise Error "Expected closing ')'"
+ case ty2s of
+ [] -> return ty1
+ _ -> return (TTup (ty1 : ty2s))]
+
+ pTypeList = do
+ inlineWhite
+ char '['
+ ty <- pType
+ noFail $ char ']' <|>> raise Error "Expecte closing ']'"
+ return (TList ty)
+
+ pTypeName = do
+ inlineWhite
+ (cs, name) <- pIdentifier0 InBlock Don'tCare ()
+ case cs of
+ Uppercase -> return (TCon name)
+ Lowercase -> return (TVar name)
+
+-- | Parse the given name-like keyword, ensuring that it is the entire word.
+pKeyword :: String -> FParser ()
+pKeyword s = do
+ string s
+ -- traceM $ "pKeyword: parsed " ++ show s
+ notFollowedBy (() <$ satisfy isNameContChar)
+
+-- | Parse the given symbol-like keyword, ensuring that it is the entire symbol.
+pKeySym :: String -> FParser ()
+pKeySym s = do
+ string s
+ notFollowedBy (() <$ satisfy isSymbolChar)
+
+data Case care where
+ Uppercase :: Case 'True
+ Lowercase :: Case 'True
+ Don'tCare :: Case 'False
+deriving instance Show (Case care)
+
+type family WithCaseOutput care a where
+ WithCaseOutput 'True a = a
+ WithCaseOutput 'False a = (Case 'True, a)
+
+type family If c a b where
+ If 'True a b = a
+ If 'False a b = b
+
+data WrongCaseBacktrack
+ = WCBacktrack -- ^ If a word was found but it had the wrong case, fail and backtrack.
+ | WCAssume -- ^ Be certain that this case is expected here, and assume incorrect
+ -- case is a typo.
+ deriving (Show)
+
+-- | Consumes an identifier (word or parenthesised operator) at the current
+-- position. The `var` production in Haskell2010.
+-- var -> varid | "(" varsym ")"
+pIdentifier0 :: BlockPos -> Case care -> If care WrongCaseBacktrack () -> FParser (WithCaseOutput care Name)
+pIdentifier0 bpos cs wrongcase =
+ pAlphaName0 bpos cs wrongcase <|>> pParens0 (pSymbol0 bpos cs)
+ where
+ -- | Parser between parens, with the opening paren at the current position.
+ pParens0 :: FParser a -> FParser a
+ pParens0 p = do
+ char '('
+ inlineWhite
+ res <- p
+ inlineWhite
+ char ')'
+ return res
+
+-- | Consumes a word-like name at the current position with the given case. The
+-- `varid` production in Haskell2010 for 'Lowercase', `conid' for 'Uppercase'.
+--
+-- > varid -> (small {small | large | digit | "'"}) without reservedid
+pAlphaName0 :: BlockPos -> Case care -> If care WrongCaseBacktrack () -> FParser (WithCaseOutput care Name)
+pAlphaName0 bpos cs wrongcase = do
+ startPos <- gets psCur
+ (_, s) <- readToken
+ bpos
+ (\atfst mc -> case (atfst, mc) of
+ (True , Just c) | isNameHeadChar c -> Just (Right False)
+ (True , _ ) -> Nothing
+ (False, Just c) | isNameContChar c -> Just (Right False)
+ (False, _ ) -> Just (Left ()))
+ True
+ faguard (s `notElem` ["case", "class", "data", "default", "deriving", "do", "else"
+ ,"foreign", "if", "import", "in", "infix", "infixl"
+ ,"infixr", "instance", "let", "module", "newtype", "of"
+ ,"then", "type", "where", "_"])
+ (name, adjoin) <- case cs of
+ Uppercase
+ | isLower (head s) -> case wrongcase of
+ WCBacktrack -> faempty
+ WCAssume -> noFail $ do
+ raiseAt startPos Error "Unexpected uppercase word at this position, assuming typo"
+ return (toUpper (head s) : tail s, id)
+ | otherwise -> return (s, id)
+ Lowercase
+ | isUpper (head s) -> case wrongcase of
+ WCBacktrack -> faempty
+ WCAssume -> noFail $ do
+ raiseAt startPos Error "Unexpected lowercase word at this position, assuming typo"
+ return (toLower (head s) : tail s, id)
+ | otherwise -> return (s, id)
+ Don'tCare
+ | isLower (head s) -> return (s, (Lowercase,))
+ | otherwise -> return (s, (Uppercase,))
+ return (adjoin (Name name))
+
+isNameHeadChar :: Char -> Bool
+isNameHeadChar c = isLetter c || c == '_'
+
+isNameContChar :: Char -> Bool
+isNameContChar c = isNameHeadChar c || isDigit c || c == '\''
+
+-- | Consumes a symbol at the current position. The `varsym` production in
+-- Haskell2010 for 'Lowercase', `consym` otherwise, and either if 'Don'tCare'.
+--
+-- > varsym -> ((symbol without ":") {symbol}) without (reservedop | dashes)
+-- > consym -> (":" {symbol}) without reservedop
+-- > symbol -> ascSymbol | uniSymbol without (special | "_" | "\"" | "'")
+-- > ascSymbol -> ```!#$%&⋆+./<=>?@^|-~:```
+-- > uniSymbol -> unicode symbol or punctuation
+-- > dashes -> "--" {"-"}
+-- > special -> ```(),;[]`{}```
+-- > reservedop -> ".." | ":" | "::" | "=" | "\" | "|" | "<-" | "->" | "@" | "~" | "=>"
+pSymbol0 :: BlockPos -> Case care -> FParser (WithCaseOutput care Name)
+pSymbol0 bpos cs = do
+ case bpos of
+ AtLeft -> whenM (noFail $ not <$> isAtBlockLeft) (() <$ faempty)
+ InBlock -> whenM (noFail $ not <$> isInsideBlock) faempty
+ (c1, adjoin) <-
+ case cs of Lowercase -> (,id) <$> satisfy (\c -> isSymbolChar c && c /= ':')
+ Uppercase -> (,id) <$> satisfy (== ':')
+ Don'tCare -> do c1 <- satisfy (\c -> isSymbolChar c)
+ return (c1, if c1 == ':' then (Uppercase,) else (Lowercase,))
+ crest <- noFail $ famany (satisfy isSymbolChar)
+ let name = c1 : crest
+ faguard (name `notElem` ["..", ":", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"])
+ faguard (take 2 name /= "--")
+ return (adjoin (Name name))
+
+isSymbolChar :: Char -> Bool
+isSymbolChar c = (isAscSymbol || isUniSymbol) && not isSpecialExt
+ where
+ isSpecialExt = c `elem` "(),;[]`{}_\"'"
+ isAscSymbol = c `elem` "!#$%&⋆+./<=>?@^|-~:"
+ isUniSymbol = ord c > 127 && (isSymbol c || isPunctuation c)
+
+
+sepBy1 :: FParser a -> FParser sep -> FParser [a]
+sepBy1 p psep = do
+ x1 <- p
+ (psep >> (x1 :) <$> sepBy1 p psep) <|>> pure [x1]
+
+sepBy :: FParser a -> FParser sep -> IParser [a]
+sepBy p psep = sepBy1 p psep <|>> pure []
+
+-- | Start a new layout block at the current position. The old layout block is
+-- restored after completion of this subparser.
+startLayoutBlock :: IParser a -> IParser a
+startLayoutBlock p = do
+ ps0 <- get
+ put (ps0 { psBlk = psCur ps0 })
+ res <- p
+ modify (\ps -> ps { psBlk = psBlk ps0 })
+ return res
+
+data Fatality fatal where
+ Error :: Fatality 'False
+ Fatal :: Fatality 'True
+deriving instance Show (Fatality fatal)
+
+type family FatalCtx fatal a where
+ FatalCtx 'False a = a ~ ()
+ FatalCtx 'True a = ()
+
+raise_ :: KnownFallible fail => Fatality fatal -> String -> Parser fail ()
+raise_ Error = raise Error
+raise_ Fatal = raise Fatal
+
+raise :: (KnownFallible fail, FatalCtx fatal a) => Fatality fatal -> String -> Parser fail a
+raise fat msg = gets psCur >>= \pos -> raiseAt pos fat msg
+
+-- | Raise an error with the given fatality and description.
+raiseAt :: (KnownFallible fail, FatalCtx fatal a) => Pos -> Fatality fatal -> String -> Parser fail a
+raiseAt pos fat msg = do
+ Context { ctxFile = fp , ctxStack = stk, ctxLines = srcLines } <- ask
+ let err = ErrMsg fp stk pos msg (srcLines !! posLine pos)
+ case fat of
+ Error -> dictate (pure err)
+ Fatal -> confess (pure err)
+
+describeLocation :: IParser String
+describeLocation = do
+ fp <- asks ctxFile
+ cur <- gets psCur
+ return $ fp ++ ":" ++ show (posLine cur + 1) ++ ":" ++ show (posCol cur + 1)
+
+-- | Registers a scope description on the stack for error reporting.
+pushContext :: String -> Parser fail a -> Parser fail a
+pushContext descr = local (\c -> c { ctxStack = descr : ctxStack c })
+
+-- | Registers a scope description on the stack for error reporting, suffixed
+-- with the current parsing location.
+pushLocatedContext :: String -> Parser fail a -> Parser fail a
+pushLocatedContext descr p = do
+ loc <- noFail describeLocation
+ pushContext (descr ++ " at " ++ loc) p
+
+data BlockPos = AtLeft | InBlock
+ deriving (Show)
+
+-- | Consumes a token at the current position, asserting that we are
+-- in the position indicated by the 'BlockPos' argument. The token is defined
+-- by a pure stateful parser. If encountering a newline or EOF, the parser is
+-- run on this character ('Nothing' for EOF); if this produces a result, the
+-- result is returned; otherwise, the parser fails. The newline is not consumed.
+readToken :: BlockPos -> (s -> Maybe Char -> Maybe (Either r s)) -> s -> FParser (r, String)
+readToken bpos f s0 = do
+ case bpos of
+ AtLeft -> whenM (noFail $ not <$> isAtBlockLeft) (() <$ faempty)
+ InBlock -> whenM (noFail $ not <$> isInsideBlock) faempty
+ let loop :: (s -> Maybe Char -> Maybe (Either r s)) -> s -> FParser (r, String)
+ loop f' st = do
+ ps <- get
+ case psRest ps of
+ [] | Just (Left res) <- f' st Nothing -> return (res, "")
+ | otherwise -> faempty
+ '\n' : _ | Just (Left res) <- f' st (Just '\n') -> return (res, "")
+ c : cs -> case f' st (Just c) of
+ Nothing -> faempty
+ Just (Left res) -> return (res, "")
+ Just (Right st') -> do
+ let Pos line col = psCur ps
+ put (ps { psCur = Pos line (col + 1), psRest = cs })
+ fmap (c :) <$> loop f' st'
+ loop f s0
+
+-- | Consumes all whitespace and comments (including newlines), but only if
+-- this then leaves the parser inside the current block. If not, succeeds and
+-- consumes nothing.
+inlineWhite :: Parser fail ()
+inlineWhite = do
+ ps <- get
+ noFail skipWhiteComment
+ whenM (noFail $ not <$> isInsideBlock) $ put ps
+
+-- | Consumes all whitespace and comments (including newlines). Note: this may
+-- end outside the current block.
+skipWhiteComment :: IParser ()
+skipWhiteComment = do
+ inlineSpaces
+ _ <- famany (blockComment >> noFail inlineSpaces)
+ optional_ lineComment
+ optional_ (consumeNewline >> noFail skipWhiteComment)
+ where
+ -- | Consumes some inline whitespace. Stops before newlines.
+ inlineSpaces :: IParser ()
+ inlineSpaces = readWhileInline isSpace
+
+-- | Consumes an delimited comment including both end markers. Note: this may
+-- end outside the current block.
+blockComment :: FParser ()
+blockComment = do
+ string "{-" -- no need for pKeySym here
+ let loop = do
+ faasum [string "-}"
+ ,eof >> raise Error "Unfinished {- -} comment at end of file"
+ ,blockComment >> noFail loop
+ ,consumeNewline >> noFail loop]
+ (readWhileInline (`notElem` "{-")) -- "-}" also starts with '-'
+ noFail loop
+
+-- | Consumes a line comment marker and the rest of the line, excluding
+-- newline.
+lineComment :: FParser ()
+lineComment = do
+ -- '--!' is an operator, so we need to parse a whole symbol here.
+ pKeySym "--"
+ noFail $ readWhileInline (const True)
+
+-- | Raises an error if we're not currently at EOF.
+assertEOF :: Fatality fatal -> IParser ()
+assertEOF fat = gets psRest >>= \case
+ [] -> return ()
+ _ -> raise_ fat "Unexpected stuff"
+
+-- | Returns whether the current position is _within_ the current block, for
+-- soft-wrapping content. This means that col > blkCol.
+isInsideBlock :: IParser Bool
+isInsideBlock = do
+ PS { psCur = cur, psBlk = blk } <- get
+ return $ posLine cur >= posLine blk && posCol cur > posCol blk
+
+-- | Returns whether the current position is at the left border of the block;
+-- this is for list content such as function definitions or let bindings. This
+-- means that col == blkCol.
+isAtBlockLeft :: IParser Bool
+isAtBlockLeft = do
+ PS { psCur = cur, psBlk = blk } <- get
+ return $ posLine cur >= posLine blk && posCol cur == posCol blk
+
+-- | Consumes characters while the predicate holds or until (and excluding)
+-- a newline, whichever comes first.
+readWhileInline :: (Char -> Bool) -> IParser ()
+readWhileInline p = do
+ (taken, rest) <- span (\c -> p c && c /= '\n') <$> gets psRest
+ modify (\ps -> ps { psCur = let Pos line col = psCur ps
+ in Pos line (col + length taken)
+ , psRest = rest })
+
+-- | Consumes exactly one newline at the current position.
+consumeNewline :: FParser ()
+consumeNewline = gets psRest >>= \case
+ '\n' : rest -> modify (\ps -> ps { psCur = Pos (posLine (psCur ps) + 1) 0
+ , psRest = rest })
+ _ -> faempty
+
+-- | Consumes exactly one character, unequal to newline, at the current position.
+satisfy :: (Char -> Bool) -> FParser Char
+satisfy p = do
+ -- traceM "entering satisfy"
+ r <- gets psRest
+ -- traceM "got rest"
+ r `seq` return ()
+ -- traceM "seqd rest"
+ -- traceM ("rest is " ++ show r)
+ case r of
+ c : rest | c /= '\n', p c -> do
+ modify (\ps -> let Pos line col = psCur ps
+ in ps { psCur = Pos line (col + 1)
+ , psRest = rest })
+ return c
+ _ -> faempty
+
+-- | Consumes exactly this character at the current position. Must not be a
+-- newline.
+char :: Char -> FParser ()
+char c = string [c]
+
+-- | Consumes exactly this string at the current position. The string must not
+-- contain a newline.
+string :: String -> FParser ()
+string s | any (== '\n') s = error "Newline in 'string' argument"
+string s = do
+ ps <- get
+ let Pos line col = psCur ps
+ if take (length s) (psRest ps) == s
+ then put (ps { psCur = Pos line (col + length s)
+ , psRest = drop (length s) (psRest ps) })
+ else faempty
+
+-- lookAhead :: FParser a -> FParser a
+-- lookAhead p = do
+-- ps <- get
+-- success <- (Just <$> p) <|>> pure Nothing
+-- put ps -- restore state, as if nothing happened
+-- case success of
+-- Nothing -> faempty
+-- Just x -> return x
+
+notFollowedBy :: FParser () -> FParser ()
+notFollowedBy p = do
+ ps <- get
+ success <- (False <$ p) <|>> pure True
+ put ps -- restore state, as if nothing happened
+ when (not success) faempty
+
+-- | Only succeeds at EOF.
+eof :: FParser ()
+eof = gets psRest >>= \case [] -> return ()
+ _ -> faempty
+
+whenM :: (Monad m, Monoid a) => m Bool -> m a -> m a
+whenM mb mx = mb >>= \b -> if b then mx else return mempty
+
+optional_ :: FAlternative f => f 'Fallible a -> f 'Infallible ()
+optional_ a = (() <$ a) <|>> pure ()