From 7ebf27051c61f69d5c12a9350273df4ec20e3d86 Mon Sep 17 00:00:00 2001 From: Tom Smeding Date: Mon, 22 Nov 2021 22:47:56 +0100 Subject: Work on parser --- Parser.hs | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 99 insertions(+), 12 deletions(-) (limited to 'Parser.hs') diff --git a/Parser.hs b/Parser.hs index 420e2b8..aedd557 100644 --- a/Parser.hs +++ b/Parser.hs @@ -3,7 +3,7 @@ module Parser where import Control.Applicative -import Data.Char (isSpace) +import Data.Char (isLower, isUpper, isLetter, isDigit, isSpace, toUpper, toLower) import Control.Monad.Chronicle import Control.Monad.Reader import Control.Monad.State.Strict @@ -51,13 +51,71 @@ pProgram = do return prog pFunDef :: Parser (FunDef ()) -pFunDef = _ +pFunDef = do + skipWhiteComment + assertAtCol 0 Fatal "Expected top-level definition, found indented stuff" + withRefCol 0 $ do + _ + +data Case = Uppercase | Lowercase + deriving (Show) + +-- | Consumes an identifier (word or parenthesised operator) at the current +-- position. +pIdentifier0 :: Case -> Parser Name +pIdentifier0 cs = pAlphaName0 cs <|> pParens0 (pSymbol0 cs) + +-- | Consumes a word-like name at the current position with the given case. +pAlphaName0 :: Case -> Parser Name +pAlphaName0 cs = do + (_, s) <- readInline (\case True -> \case Just c | isInitNameChar c -> Just (Right False) + _ -> Nothing + False -> \case Just c | isNameContChar c -> Just (Right False) + _ -> Just (Left ())) + True + case cs of + Uppercase | isLower (head s) -> do + raise Error "Unexpected uppercase word at this position, assuming typo" + return (Name (toUpper (head s) : tail s)) + Lowercase | isUpper (head s) -> do + raise Error "Unexpected lowercase word at this position, assuming typo" + return (Name (toLower (head s) : tail s)) + _ -> return (Name s) + where + isInitNameChar, isNameContChar :: Char -> Bool + isInitNameChar c = isLetter c || c == '_' + isNameContChar c = isInitNameChar c || isDigit c || c == '\'' + +pSymbol0 :: Case -> Parser Name +pSymbol0 cs = do + _ + +-- | Parser between parens, with the opening paren at the current position. +-- Enforces that all components are within the current indented block. +pParens0 :: Parser a -> Parser a +pParens0 p = do + string "(" + skipWhiteComment + res <- p + skipWhiteComment + string ")" + return res +-- | Run a parser under a modified psRefCol. The current psRefCol is reinstated +-- after completion of this parser. +withRefCol :: Int -> Parser a -> Parser a +withRefCol refcol p = do + old <- gets psRefCol + modify (\ps -> ps { psRefCol = refcol }) + res <- p + modify (\ps -> ps { psRefCol = old }) + return res data Fatality = Error | Fatal deriving (Show) +-- | Raise an error with the given fatality and description. raise :: Fatality -> String -> Parser () raise fat msg = do fp <- asks ctxFile @@ -67,30 +125,49 @@ raise fat msg = do Fatal -> confess . pure fun (ErrMsg fp (psLine ps) (psCol ps) msg) +-- | Raises an error if we're not currently at the given column. +assertAtCol :: Int -> Fatality -> String -> Parser () +assertAtCol col fat msg = gets psCol >>= \col' -> + when (col' /= col) $ raise fat msg + +-- | Raises an error if psCol is not greater than psRefCol. +assertWithinBlock :: Fatality -> String -> Parser () +assertWithinBlock fat msg = get >>= \ps -> + when (psCol ps <= psRefCol ps) $ raise fat msg + +-- | Raises an error if we're not currently at EOF. assertEOF :: Fatality -> Parser () assertEOF fat = gets psRest >>= \case [] -> return () _ -> raise fat "Unexpected stuff" -data ReadResult a = Token a | Truncated a - deriving (Show, Functor) - -readInline :: (s -> Char -> Maybe s) -> s -> Parser (ReadResult String) +-- | Consumes an inline token at the current position, asserting that psCol > +-- psRefCol at the start. The token is defined by a pure stateful parser. +-- If encountering a newline or EOF, the parser is run on this character +-- ('Nothing' for EOF); if this produces a result, the result is returned; +-- otherwise, the parser fails. +readInline :: (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String) readInline f s0 = do ps0 <- get when (psCol ps0 <= psRefCol ps0) $ raise Fatal "Expected stuff, but found end of indented expression" - let loop :: (s -> Char -> Maybe s) -> s -> Parser (ReadResult String) + let loop :: (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String) loop f' st = do ps <- get case psRest ps of - c : cs | Just st' <- f' st c -> do - put (ps { psCol = psCol ps + 1, psRest = cs }) - fmap (c :) <$> loop f' st' - | otherwise -> return (Token "") - [] -> return (Truncated "") + [] | Just (Left res) <- f' st Nothing -> return (res, "") + | otherwise -> empty + '\n' : _ | Just (Left res) <- f' st (Just '\n') -> return (res, "") + c : cs -> case f' st (Just c) of + Nothing -> empty + Just (Left res) -> return (res, "") + Just (Right st') -> do + put (ps { psCol = psCol ps + 1, psRest = cs }) + fmap (c :) <$> loop f' st' loop f s0 +-- | Consumes all whitespace and comments (including newlines). Note: this may +-- leave psCol < psRefCol. skipWhiteComment :: Parser () skipWhiteComment = do inlineWhite @@ -98,9 +175,12 @@ skipWhiteComment = do _ <- optional lineComment (consumeNewline >> skipWhiteComment) <|> return () where + -- | Consumes some inline whitespace. inlineWhite :: Parser () inlineWhite = readWhileInline isSpace + -- | Consumes an inline comment including both end markers. Note: this may + -- leave psCol < psRefCol. inlineComment :: Parser () inlineComment = do string "{-" @@ -111,15 +191,20 @@ skipWhiteComment = do ,consumeNewline >> loop] loop + -- | Consumes a line comment marker and the rest of the line, excluding + -- newline. lineComment :: Parser () lineComment = string "--" >> readWhileInline (const True) + -- | Consumes characters while the predicate holds or until (and excluding) + -- a newline, whichever comes first. readWhileInline :: (Char -> Bool) -> Parser () readWhileInline p = do (taken, rest) <- span (\c -> p c && c /= '\n') <$> gets psRest modify (\ps -> ps { psCol = psCol ps + length taken , psRest = rest }) +-- | Consumes exactly one newline at the current position. consumeNewline :: Parser () consumeNewline = gets psRest >>= \case '\n' : rest -> modify (\ps -> ps { psLine = psLine ps + 1 @@ -127,6 +212,8 @@ consumeNewline = gets psRest >>= \case , psRest = rest }) _ -> empty +-- | Consumes exactly this string at the current position. The string must not +-- contain a newline. string :: String -> Parser () string s | any (== '\n') s = error "Newline in 'string' argument" string s = do -- cgit v1.2.3-70-g09d2