From 720372e3deac26a064fb1c711db2ccf54e655fab Mon Sep 17 00:00:00 2001 From: Tom Smeding Date: Sat, 17 Feb 2024 12:04:53 +0100 Subject: Lots of parser work --- src/AST.hs | 1 + src/Main.hs | 5 +- src/Parser.hs | 207 +++++++++++++++++++++++++++++++++++----------------------- 3 files changed, 128 insertions(+), 85 deletions(-) (limited to 'src') diff --git a/src/AST.hs b/src/AST.hs index 47652b6..2048e87 100644 --- a/src/AST.hs +++ b/src/AST.hs @@ -58,4 +58,5 @@ data Literal = LInt Integer | LFloat Rational | LChar Char | LString String deriving (Show) data Operator = OAdd | OSub | OMul | ODiv | OMod | OEqu | OPow + | OCons deriving (Show) diff --git a/src/Main.hs b/src/Main.hs index c9de0cc..750f749 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -2,6 +2,7 @@ {-# LANGUAGE TupleSections #-} module Main where +import Data.List (intersperse) import System.Environment (getArgs) import System.Exit (die, exitFailure) @@ -17,10 +18,10 @@ main = do prog <- case parse fname source of This errs -> do - mapM_ (putStrLn . printErrMsg) errs + sequence_ $ intersperse (putStrLn "") (map (putStrLn . printErrMsg) errs) exitFailure These errs res -> do - mapM_ (putStrLn . printErrMsg) errs + sequence_ $ intersperse (putStrLn "") (map (putStrLn . printErrMsg) errs) return res That res -> return res diff --git a/src/Parser.hs b/src/Parser.hs index 0f0bd0c..bef0c39 100644 --- a/src/Parser.hs +++ b/src/Parser.hs @@ -23,14 +23,12 @@ import Control.Monad import Control.Monad.Chronicle import Control.Monad.Reader import Control.Monad.State.Lazy -import Data.Bifunctor (first) import Data.Char import Data.Either (partitionEithers) import Data.List.NonEmpty (NonEmpty(..)) import Data.These -import Data.Tuple (swap) --- import Debug.Trace +import Debug.Trace import AST @@ -53,62 +51,79 @@ data Context = Context } deriving (Show) --- ReaderT Context (ChronicleT [ErrMsg] (State PS) a) --- Context -> ChronicleT [ErrMsg] (State PS) a --- Context -> State PS (These [ErrMsg] a) --- Context -> PS -> Identity (These [ErrMsg] a, PS) --- Context -> PS -> (These [ErrMsg] a, PS) --- whereas I want: --- Context -> PS -> These [ErrMsg] (a, PS) --- which is not any transformer stack, but a new monad. -newtype Parser a = Parser { runParser :: Context -> PS -> These [ErrMsg] (PS, a) } +newtype Parser a = Parser + { runParser + :: forall r. + Context + -> PS + -> (PS -> [ErrMsg] -> a -> r) -- ^ OK: some diagnostics, but parsing succeeded + -> ([ErrMsg] -> r) -- ^ Fatal: error that prevented parsing from proceeding + -> r -- ^ Backtrack: alternative was exhausted without success + -> r } instance Functor Parser where - fmap f (Parser g) = Parser (\ctx ps -> fmap (fmap f) (g ctx ps)) + fmap f (Parser g) = Parser (\ctx ps kok kfat kbt -> + g ctx ps (\ps' errs x -> kok ps' errs (f x)) kfat kbt) instance Applicative Parser where - pure x = Parser (\_ ps -> That (ps, x)) + pure x = Parser (\_ ps kok _ _ -> kok ps [] x) (<*>) = ap instance Monad Parser where - Parser g >>= f = Parser $ \ctx ps -> - case g ctx ps of - This errs -> This errs - That (ps', x) -> runParser (f x) ctx ps' - These errs (ps', x) -> case runParser (f x) ctx ps' of - This errs' -> This (errs <> errs') - That res -> These errs res - These errs' res -> These (errs <> errs') res + Parser g >>= f = Parser $ \ctx ps kok kfat kbt -> + g ctx ps + (\ps1 errs x -> + x `seq` + runParser (f x) ctx ps1 + (\ps2 errs' y -> kok ps2 (errs <> errs') y) + (\errs' -> kfat (errs <> errs')) + kbt) + (\errs -> kfat errs) + kbt instance Alternative Parser where - empty = Parser (\_ _ -> This []) - Parser f <|> Parser g = Parser $ \ctx ps -> - case f ctx ps of - This _ -> g ctx ps - success -> success + empty = Parser (\_ _ _ _ kbt -> kbt) + Parser f <|> Parser g = Parser $ \ctx ps kok kfat kbt -> + f ctx ps kok kfat (g ctx ps kok kfat kbt) instance MonadState PS Parser where - state f = Parser $ \_ ps -> That (swap (f ps)) + state f = Parser $ \_ ps kok _ _ -> + let (x, ps') = f ps + in kok ps' [] x instance MonadReader Context Parser where - reader f = Parser $ \ctx ps -> That (ps, f ctx) - local f (Parser g) = Parser (g . f) + reader f = Parser $ \ctx ps kok _ _ -> kok ps [] (f ctx) + local f (Parser g) = Parser (\ctx -> g (f ctx)) instance MonadChronicle [ErrMsg] Parser where - dictate errs = Parser (\_ ps -> These errs (ps, ())) - confess errs = Parser (\_ _ -> This errs) - memento (Parser f) = Parser (\ctx ps -> case f ctx ps of - This errs -> That (ps, Left errs) - That res -> That (Right <$> res) - These errs res -> These errs (Right <$> res)) - absolve def (Parser f) = Parser (\ctx ps -> case f ctx ps of - This _ -> That (ps, def) - success -> success) - condemn (Parser f) = Parser (\ctx ps -> case f ctx ps of - These errs _ -> This errs - res -> res) - retcon g (Parser f) = Parser (\ctx ps -> first g (f ctx ps)) - chronicle th = Parser (\_ ps -> (ps,) <$> th) + dictate errs = Parser $ \_ ps kok _ _ -> kok ps errs () + confess errs = Parser $ \_ _ _ kfat _ -> kfat errs + memento (Parser f) = Parser $ \ctx ps kok _ kbt -> + f ctx ps + (\ps' errs x -> kok ps' errs (Right x)) + (\errs -> kok ps [] (Left errs)) + kbt + absolve def (Parser f) = Parser $ \ctx ps kok _ _ -> + f ctx ps + kok + (\_ -> kok ps [] def) + (kok ps [] def) + condemn (Parser f) = Parser $ \ctx ps kok kfat kbt -> + f ctx ps + (\ps' errs x -> case errs of + [] -> kok ps' [] x + _ -> kfat errs) + kfat + kbt + retcon g (Parser f) = Parser $ \ctx ps kok kfat kbt -> + f ctx ps + (\ps' errs x -> kok ps' (g errs) x) + (\errs -> kfat (g errs)) + kbt + chronicle th = case th of + This errs -> Parser (\_ _ _ kfat _ -> kfat errs) + That res -> Parser (\_ ps kok _ _ -> kok ps [] res) + These errs res -> Parser (\_ ps kok _ _ -> kok ps errs res) -- Positions are zero-based in both dimensions data ErrMsg = ErrMsg @@ -125,7 +140,13 @@ printErrMsg (ErrMsg fp stk y x s) = fp ++ ":" ++ show (y + 1) ++ ":" ++ show (x + 1) ++ ": " ++ s parse :: FilePath -> String -> These [ErrMsg] (Program ()) -parse fp source = fmap snd $ runParser pProgram (Context fp []) (PS 0 0 0 0 source) +parse fp source = + runParser pProgram (Context fp []) (PS 0 0 0 0 source) + (\_ errs res -> case errs of + [] -> That res + _ -> These errs res) + (\errs -> This errs) + (This [ErrMsg fp [] 0 0 "Parse error, no grammar alternatives match your source"]) pProgram :: Parser (Program ()) pProgram = do @@ -159,43 +180,48 @@ pDataDef0 = do rest <- pDatacons "|" <|> return [] return ((name, fields) : rest) +data FunEqContext + = FirstLine + | TypeSig Name + | Continue Name + deriving (Show) + pFunDef0 :: Parser (FunDef ()) pFunDef0 = do mtypesig <- optional pStandaloneTypesig0 let mname = fst <$> mtypesig mtype = snd <$> mtypesig - (clauses, name) <- someClauses mname - return (FunDef name mtype clauses) - where - someClauses :: Maybe Name -> Parser (NonEmpty (FunEq ()), Name) - someClauses Nothing = do - clause@(FunEq name _ _) <- pFunEq Nothing - (,name) . (clause :|) <$> many (pFunEq (Just name)) - someClauses (Just name) = (,name) <$> someNE (pFunEq (Just name)) + clause@(FunEq name _ _) <- pFunEq (maybe FirstLine TypeSig mname) + clauses <- many (pFunEq (Continue name)) + return (FunDef name mtype (clause :| clauses)) -- | Given the name of the type signature, if any. -pFunEq :: Maybe Name -> Parser (FunEq ()) -pFunEq mCheckName = do +pFunEq :: FunEqContext -> Parser (FunEq ()) +pFunEq fectx = do skipWhiteComment - assertAtBlockLeft Fatal "Expected function clause, found indented stuff" - - name <- pIdentifier0 AtLeft Lowercase - case mCheckName of - Just checkName | name /= checkName -> - raise Fatal "Name of function clause does not correspond with type signature" - _ -> return () - - pats <- many (pPattern 11) - rhs <- pRHS "=" - return (FunEq name pats rhs) + pushLocatedContext "funeq" $ do + assertAtBlockLeft Fatal "Expected function clause, found indented stuff" + + name <- pIdentifier0 AtLeft Lowercase + case fectx of + FirstLine -> return () + TypeSig checkName -> + when (name /= checkName) $ + raise Fatal "Name of function clause does not correspond with type signature" + Continue checkName -> + guard (name == checkName) + + pats <- many (pPattern 11) + rhs <- pRHS "=" + return (FunEq name pats rhs) -- | Pass "=" for function definitions and "->" for case clauses. pRHS :: String -> Parser (RHS ()) pRHS sepsym = do -- TODO: parse guards inlineWhite - pKeySym sepsym - Plain <$> pExpr + pKeySym sepsym <|> raise Error ("Expected " ++ show sepsym) + Plain <$> (pExpr <|> (raise Error "Expected expression" >> return (ETup () []))) pPattern :: Int -> Parser (Pattern ()) pPattern d = inlineWhite >> pPattern0 d @@ -248,10 +274,11 @@ pExpr = do -- expression atom: application of basics -- expression parser: op -- around: let, case, if - asum [pELet0 - ,pECase0 - ,pEIf0 - ,pExprOpExpr0 0] + pushLocatedContext "expression" $ do + asum [pELet0 + ,pECase0 + ,pEIf0 + ,pExprOpExpr0 0] pELet0 :: Parser (Expr ()) pELet0 = do @@ -422,6 +449,7 @@ pInfixOp :: Parser ParsedOperator pInfixOp = do inlineWhite asum [PaOp OEqu 4 AssocNone <$ pKeySym "==" + ,PaOp OCons 5 AssocRight <$ pKeySym ":" ,PaOp OAdd 6 AssocLeft <$ pKeySym "+" ,PaOp OSub 6 AssocLeft <$ pKeySym "-" ,PaOp OMul 7 AssocLeft <$ pKeySym "*" @@ -493,6 +521,7 @@ pTypeAtom = pTypeParens <|> pTypeList <|> pTypeName pKeyword :: String -> Parser () pKeyword s = do string s + traceM $ "pKeyword: parsed " ++ show s notFollowedBy (() <$ satisfy isNameContChar) -- | Parse the given symbol-like keyword, ensuring that it is the entire symbol. @@ -554,7 +583,7 @@ pAlphaName0 bpos cs = do | otherwise -> return (s, id) Don'tCare | isLower (head s) -> return (s, (Lowercase,)) - | otherwise -> return (s, (Lowercase,)) + | otherwise -> return (s, (Uppercase,)) guard (name `notElem` ["case", "class", "data", "default", "deriving", "do", "else" ,"foreign", "if", "import", "in", "infix", "infixl" ,"infixr", "instance", "let", "module", "newtype", "of" @@ -636,19 +665,32 @@ raise :: FatalCtx fatal a => Fatality fatal -> String -> Parser a raise fat msg = do Context { ctxFile = fp , ctxStack = stk } <- ask PS { psLine = line, psCol = col } <- get - let fun = case fat of - Error -> dictate . pure - Fatal -> confess . pure - fun (ErrMsg fp stk line col msg) + let err = ErrMsg fp stk line col msg + case fat of + Error -> dictate (pure err) + Fatal -> confess (pure err) raise' :: Fatality fatal -> String -> Parser () raise' Error = raise Error raise' Fatal = raise Fatal +describeLocation :: Parser String +describeLocation = do + fp <- asks ctxFile + ps <- get + return $ fp ++ ":" ++ show (psLine ps + 1) ++ ":" ++ show (psCol ps + 1) + -- | Registers a scope description on the stack for error reporting. pushContext :: String -> Parser a -> Parser a pushContext descr = local (\c -> c { ctxStack = descr : ctxStack c }) +-- | Registers a scope description on the stack for error reporting, suffixed +-- with the current parsing location. +pushLocatedContext :: String -> Parser a -> Parser a +pushLocatedContext descr p = do + loc <- describeLocation + pushContext (descr ++ " at " ++ loc) p + data BlockPos = AtLeft | InBlock deriving (Show) @@ -795,12 +837,14 @@ string s = do , psRest = drop (length s) (psRest ps) }) else empty -lookAhead :: Parser () -> Parser () +lookAhead :: Parser a -> Parser a lookAhead p = do ps <- get - success <- absolve False (True <$ p) + success <- absolve Nothing (Just <$> p) put ps -- restore state, as if nothing happened - when (not success) empty + case success of + Nothing -> empty + Just x -> return x notFollowedBy :: Parser () -> Parser () notFollowedBy p = do @@ -819,6 +863,3 @@ whenM mb mx = mb >>= \b -> if b then mx else return mempty optional_ :: Alternative f => f a -> f () optional_ a = (() <$ a) <|> pure () - -someNE :: Alternative f => f a -> f (NonEmpty a) -someNE a = (:|) <$> a <*> many a -- cgit v1.2.3-70-g09d2