From aea2a97d0aace7f7466e55bf8fed7e47497d2dc7 Mon Sep 17 00:00:00 2001 From: Tom Smeding Date: Sat, 15 Apr 2023 22:04:36 +0200 Subject: Some parser work --- Parser.hs | 184 ++++++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 119 insertions(+), 65 deletions(-) (limited to 'Parser.hs') diff --git a/Parser.hs b/Parser.hs index 4a37134..71a0f69 100644 --- a/Parser.hs +++ b/Parser.hs @@ -10,7 +10,6 @@ import Control.Monad.Reader import Control.Monad.State.Lazy import Data.Bifunctor (first) import Data.Char -import Data.Foldable (asum) import Data.These import Data.Tuple (swap) @@ -19,15 +18,22 @@ import Debug.Trace import AST --- Positions are zero-based in both dimensions +-- Positions are zero-based in both dimensions. +-- See 'isInsideBlock' and 'isAtBlockLeft' for the two relevant "inside the +-- block" conditions. data PS = PS - { psRefCol :: Int - , psLine :: Int - , psCol :: Int - , psRest :: String } + { psBlkLine :: Int -- ^ Start line of current layout block + , psBlkCol :: Int -- ^ Start column of current layout block + , psLine :: Int -- ^ Current line + , psCol :: Int -- ^ Current column + , psRest :: String -- ^ Rest of the input + } deriving (Show) -data Context = Context { ctxFile :: FilePath } +data Context = Context + { ctxFile :: FilePath + , ctxStack :: [String] -- ^ Stack of syntax scopes, for error reporting + } deriving (Show) -- ReaderT Context (ChronicleT [ErrMsg] (State PS) a) @@ -89,16 +95,19 @@ instance MonadChronicle [ErrMsg] Parser where -- Positions are zero-based in both dimensions data ErrMsg = ErrMsg { errFile :: FilePath + , errStk :: [String] , errLine :: Int , errCol :: Int , errMsg :: String } deriving (Show) printErrMsg :: ErrMsg -> String -printErrMsg (ErrMsg fp y x s) = fp ++ ":" ++ show (y + 1) ++ ":" ++ show (x + 1) ++ ": " ++ s +printErrMsg (ErrMsg fp stk y x s) = + unlines (map (\descr -> "In " ++ descr ++ ":") (reverse stk)) ++ + fp ++ ":" ++ show (y + 1) ++ ":" ++ show (x + 1) ++ ": " ++ s parse :: FilePath -> String -> These [ErrMsg] (Program ()) -parse fp source = fmap snd $ runParser pProgram (Context fp) (PS 0 0 0 source) +parse fp source = fmap snd $ runParser pProgram (Context fp []) (PS 0 0 0 0 source) pProgram :: Parser (Program ()) pProgram = do @@ -111,15 +120,39 @@ pFunDef :: Parser (FunDef ()) pFunDef = do skipWhiteComment mtypesig <- optional pStandaloneTypesig0 + let mname = fst <$> mtypesig + mtype = snd <$> mtypesig + (clauses, name) <- someClauses mname + return (FunDef name mtype clauses) + where + someClauses :: Maybe Name -> Parser ([FunEq ()], Name) + someClauses Nothing = do + clause@(FunEq name _ _) <- pFunEq Nothing + (,name) . (clause:) <$> many (pFunEq (Just name)) + someClauses (Just name) = (,name) <$> some (pFunEq (Just name)) + +-- | Given the name of the type signature, if any. +pFunEq :: Maybe Name -> Parser (FunEq ()) +pFunEq mCheckName = do + skipWhiteComment + assertAtBlockLeft Fatal "Expected function clause, found indented stuff" + + name <- pIdentifier0 Lowercase + case mCheckName of + Just checkName | name /= checkName -> + raise Fatal "Name of function clause does not correspond with type signature" + _ -> return () + + pats <- many pPattern _ pStandaloneTypesig0 :: Parser (Name, Type) pStandaloneTypesig0 = do - assertAtCol 0 Fatal "Expected top-level definition, found indented stuff" - withRefCol 0 $ do - name <- pIdentifier0 Lowercase - inlineWhite - string "::" + assertAtBlockLeft Fatal "Expected top-level definition, found indented stuff" + name@(Name namestr) <- pIdentifier0 Lowercase + inlineWhite + string "::" + pushContext ("type signature for '" ++ namestr ++ "'") $ do ty <- pType return (name, ty) @@ -236,23 +269,25 @@ pParens0 :: Parser a -> Parser a pParens0 p = do string "(" skipWhiteComment - assertWithinBlock Error "Unexpected dedent after opening parenthesis" + assertInsideBlock Error "Unexpected dedent after opening parenthesis" res <- p - assertWithinBlock Error "Unexpected dedent in parenthesised expression" + assertInsideBlock Error "Unexpected dedent in parenthesised expression" skipWhiteComment - assertWithinBlock Error "Unexpected dedent in parenthesised expression" + assertInsideBlock Error "Unexpected dedent in parenthesised expression" string ")" return res --- | Run a parser under a modified psRefCol. The current psRefCol is reinstated --- after completion of this parser. -withRefCol :: Int -> Parser a -> Parser a -withRefCol refcol p = do - old <- gets psRefCol - modify (\ps -> ps { psRefCol = refcol }) +-- | Start a new layout block at the current position. The old layout block is +-- restored after completion of this subparser. +startLayoutBlock :: Parser a -> Parser a +startLayoutBlock p = do + ps0 <- get + put (ps0 { psBlkLine = psLine ps0 + , psBlkCol = psCol ps0 }) res <- p - modify (\ps -> ps { psRefCol = old }) + modify (\ps -> ps { psBlkLine = psBlkLine ps0 + , psBlkCol = psBlkCol ps0 }) return res data Fatality = Error | Fatal @@ -261,39 +296,25 @@ data Fatality = Error | Fatal -- | Raise an error with the given fatality and description. raise :: Fatality -> String -> Parser () raise fat msg = do - fp <- asks ctxFile - ps <- get + Context { ctxFile = fp , ctxStack = stk } <- ask + PS { psLine = line, psCol = col } <- get let fun = case fat of Error -> dictate . pure Fatal -> confess . pure - fun (ErrMsg fp (psLine ps) (psCol ps) msg) - --- | Raises an error if we're not currently at the given column. -assertAtCol :: Int -> Fatality -> String -> Parser () -assertAtCol col fat msg = gets psCol >>= \col' -> - when (col' /= col) $ raise fat msg + fun (ErrMsg fp stk line col msg) --- | Raises an error if psCol is not greater than psRefCol. -assertWithinBlock :: Fatality -> String -> Parser () -assertWithinBlock fat msg = get >>= \ps -> - when (psCol ps <= psRefCol ps) $ raise fat msg +-- | Registers a scope description on the stack for error reporting. +pushContext :: String -> Parser a -> Parser a +pushContext descr = local (\c -> c { ctxStack = descr : ctxStack c }) --- | Raises an error if we're not currently at EOF. -assertEOF :: Fatality -> Parser () -assertEOF fat = gets psRest >>= \case - [] -> return () - _ -> raise fat "Unexpected stuff" - --- | Consumes an inline token at the current position, asserting that psCol > --- psRefCol at the start. The token is defined by a pure stateful parser. --- If encountering a newline or EOF, the parser is run on this character --- ('Nothing' for EOF); if this produces a result, the result is returned; --- otherwise, the parser fails. +-- | Consumes an inline token at the current position, asserting that we are +-- within the current block at the start. The token is defined by a pure +-- stateful parser. If encountering a newline or EOF, the parser is run on this +-- character ('Nothing' for EOF); if this produces a result, the result is +-- returned; otherwise, the parser fails. The newline is not consumed. readInline :: (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String) readInline f s0 = do - ps0 <- get - when (psCol ps0 <= psRefCol ps0) $ - raise Fatal "Expected stuff, but found end of indented expression" + assertInsideBlock Fatal "Expected stuff, but found end of indented expression" let loop :: (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String) loop f' st = do ps <- get @@ -310,37 +331,35 @@ readInline f s0 = do loop f s0 -- | Consumes all whitespace and comments (including newlines), but only if --- this then leaves psCol > psRefCol. If not, this fails. +-- this then leaves the parser inside the current block. If not, this fails. inlineWhite :: Parser () inlineWhite = do skipWhiteComment - ps <- get - TODO this check (and other similar checks) need to allow equality if the _line_ is also the reference starting line - when (psCol ps <= psRefCol ps) empty + whenM (not <$> isInsideBlock) empty -- | Consumes all whitespace and comments (including newlines). Note: this may --- leave psCol <= psRefCol. +-- end outside the current block. skipWhiteComment :: Parser () skipWhiteComment = do inlineSpaces - _ <- many (inlineComment >> inlineSpaces) - _ <- optional lineComment - (consumeNewline >> skipWhiteComment) <|> return () + _ <- many (blockComment >> inlineSpaces) + optional_ lineComment + optional_ (consumeNewline >> skipWhiteComment) --- | Consumes some inline whitespace. +-- | Consumes some inline whitespace. Stops before newlines. inlineSpaces :: Parser () inlineSpaces = readWhileInline isSpace --- | Consumes an inline comment including both end markers. Note: this may --- leave psCol < psRefCol. -inlineComment :: Parser () -inlineComment = do +-- | Consumes an delimited comment including both end markers. Note: this may +-- end outside the current block. +blockComment :: Parser () +blockComment = do string "{-" let loop = do - readWhileInline (`notElem` "{-") + readWhileInline (`notElem` "{-") -- "-}" also starts with '-' asum [string "-}" ,eof >> raise Error "Unfinished {- -} comment at end of file" - ,inlineComment >> loop + ,blockComment >> loop ,consumeNewline >> loop] loop @@ -349,6 +368,35 @@ inlineComment = do lineComment :: Parser () lineComment = string "--" >> readWhileInline (const True) +-- | Raises an error if we're not currently at the given column. +assertAtBlockLeft :: Fatality -> String -> Parser () +assertAtBlockLeft fat msg = whenM (not <$> isAtBlockLeft) $ raise fat msg + +-- | Raises an error if psCol is not greater than psRefCol. +assertInsideBlock :: Fatality -> String -> Parser () +assertInsideBlock fat msg = whenM (not <$> isInsideBlock) $ raise fat msg + +-- | Raises an error if we're not currently at EOF. +assertEOF :: Fatality -> Parser () +assertEOF fat = gets psRest >>= \case + [] -> return () + _ -> raise fat "Unexpected stuff" + +-- | Returns whether the current position is _within_ the current block, for +-- soft-wrapping content. This means that col > blkCol. +isInsideBlock :: Parser Bool +isInsideBlock = do + ps <- get + return $ psLine ps >= psBlkLine ps && psCol ps > psBlkCol ps + +-- | Returns whether the current position is at the left border of the block; +-- this is for list content such as function definitions or let bindings. This +-- means that col == blkCol. +isAtBlockLeft :: Parser Bool +isAtBlockLeft = do + ps <- get + return $ psLine ps >= psBlkLine ps && psCol ps == psBlkCol ps + -- | Consumes characters while the predicate holds or until (and excluding) -- a newline, whichever comes first. readWhileInline :: (Char -> Bool) -> Parser () @@ -396,3 +444,9 @@ string s = do eof :: Parser () eof = gets psRest >>= \case [] -> return () _ -> empty + +whenM :: (Monad m, Monoid a) => m Bool -> m a -> m a +whenM mb mx = mb >>= \b -> if b then mx else return mempty + +optional_ :: Alternative f => f a -> f () +optional_ a = (() <$ a) <|> pure () -- cgit v1.2.3-70-g09d2