{-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE LambdaCase #-} module Parser where import Control.Applicative import Data.Char (isLower, isUpper, isLetter, isDigit, isSpace, toUpper, toLower) import Control.Monad.Chronicle import Control.Monad.Reader import Control.Monad.State.Strict import Data.Foldable (asum) import Data.These import AST -- Positions are zero-based in both dimensions data PS = PS { psRefCol :: Int , psLine :: Int , psCol :: Int , psRest :: String } deriving (Show) data Context = Context { ctxFile :: FilePath } deriving (Show) type Parser = ReaderT Context (ChronicleT [ErrMsg] (State PS)) -- Positions are zero-based in both dimensions data ErrMsg = ErrMsg { errFile :: FilePath , errLine :: Int , errCol :: Int , errMsg :: String } deriving (Show) printErrMsg :: ErrMsg -> String printErrMsg (ErrMsg fp y x s) = fp ++ ":" ++ show (y + 1) ++ ":" ++ show (x + 1) ++ ": " ++ s parse :: FilePath -> String -> These [ErrMsg] (Program ()) parse fp source = flip evalState (PS 0 0 0 source) . runChronicleT . flip runReaderT (Context fp) $ pProgram pProgram :: Parser (Program ()) pProgram = do prog <- Program <$> many pFunDef skipWhiteComment assertEOF Error return prog pFunDef :: Parser (FunDef ()) pFunDef = do skipWhiteComment assertAtCol 0 Fatal "Expected top-level definition, found indented stuff" withRefCol 0 $ do _ data Case = Uppercase | Lowercase deriving (Show) -- | Consumes an identifier (word or parenthesised operator) at the current -- position. pIdentifier0 :: Case -> Parser Name pIdentifier0 cs = pAlphaName0 cs <|> pParens0 (pSymbol0 cs) -- | Consumes a word-like name at the current position with the given case. pAlphaName0 :: Case -> Parser Name pAlphaName0 cs = do (_, s) <- readInline (\case True -> \case Just c | isInitNameChar c -> Just (Right False) _ -> Nothing False -> \case Just c | isNameContChar c -> Just (Right False) _ -> Just (Left ())) True case cs of Uppercase | isLower (head s) -> do raise Error "Unexpected uppercase word at this position, assuming typo" return (Name (toUpper (head s) : tail s)) Lowercase | isUpper (head s) -> do raise Error "Unexpected lowercase word at this position, assuming typo" return (Name (toLower (head s) : tail s)) _ -> return (Name s) where isInitNameChar, isNameContChar :: Char -> Bool isInitNameChar c = isLetter c || c == '_' isNameContChar c = isInitNameChar c || isDigit c || c == '\'' pSymbol0 :: Case -> Parser Name pSymbol0 cs = do _ -- | Parser between parens, with the opening paren at the current position. -- Enforces that all components are within the current indented block. pParens0 :: Parser a -> Parser a pParens0 p = do string "(" skipWhiteComment res <- p skipWhiteComment string ")" return res -- | Run a parser under a modified psRefCol. The current psRefCol is reinstated -- after completion of this parser. withRefCol :: Int -> Parser a -> Parser a withRefCol refcol p = do old <- gets psRefCol modify (\ps -> ps { psRefCol = refcol }) res <- p modify (\ps -> ps { psRefCol = old }) return res data Fatality = Error | Fatal deriving (Show) -- | Raise an error with the given fatality and description. raise :: Fatality -> String -> Parser () raise fat msg = do fp <- asks ctxFile ps <- get let fun = case fat of Error -> dictate . pure Fatal -> confess . pure fun (ErrMsg fp (psLine ps) (psCol ps) msg) -- | Raises an error if we're not currently at the given column. assertAtCol :: Int -> Fatality -> String -> Parser () assertAtCol col fat msg = gets psCol >>= \col' -> when (col' /= col) $ raise fat msg -- | Raises an error if psCol is not greater than psRefCol. assertWithinBlock :: Fatality -> String -> Parser () assertWithinBlock fat msg = get >>= \ps -> when (psCol ps <= psRefCol ps) $ raise fat msg -- | Raises an error if we're not currently at EOF. assertEOF :: Fatality -> Parser () assertEOF fat = gets psRest >>= \case [] -> return () _ -> raise fat "Unexpected stuff" -- | Consumes an inline token at the current position, asserting that psCol > -- psRefCol at the start. The token is defined by a pure stateful parser. -- If encountering a newline or EOF, the parser is run on this character -- ('Nothing' for EOF); if this produces a result, the result is returned; -- otherwise, the parser fails. readInline :: (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String) readInline f s0 = do ps0 <- get when (psCol ps0 <= psRefCol ps0) $ raise Fatal "Expected stuff, but found end of indented expression" let loop :: (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String) loop f' st = do ps <- get case psRest ps of [] | Just (Left res) <- f' st Nothing -> return (res, "") | otherwise -> empty '\n' : _ | Just (Left res) <- f' st (Just '\n') -> return (res, "") c : cs -> case f' st (Just c) of Nothing -> empty Just (Left res) -> return (res, "") Just (Right st') -> do put (ps { psCol = psCol ps + 1, psRest = cs }) fmap (c :) <$> loop f' st' loop f s0 -- | Consumes all whitespace and comments (including newlines). Note: this may -- leave psCol < psRefCol. skipWhiteComment :: Parser () skipWhiteComment = do inlineWhite _ <- many (inlineComment >> inlineWhite) _ <- optional lineComment (consumeNewline >> skipWhiteComment) <|> return () where -- | Consumes some inline whitespace. inlineWhite :: Parser () inlineWhite = readWhileInline isSpace -- | Consumes an inline comment including both end markers. Note: this may -- leave psCol < psRefCol. inlineComment :: Parser () inlineComment = do string "{-" let loop = do readWhileInline (`notElem` "{-") asum [string "-}" ,inlineComment >> loop ,consumeNewline >> loop] loop -- | Consumes a line comment marker and the rest of the line, excluding -- newline. lineComment :: Parser () lineComment = string "--" >> readWhileInline (const True) -- | Consumes characters while the predicate holds or until (and excluding) -- a newline, whichever comes first. readWhileInline :: (Char -> Bool) -> Parser () readWhileInline p = do (taken, rest) <- span (\c -> p c && c /= '\n') <$> gets psRest modify (\ps -> ps { psCol = psCol ps + length taken , psRest = rest }) -- | Consumes exactly one newline at the current position. consumeNewline :: Parser () consumeNewline = gets psRest >>= \case '\n' : rest -> modify (\ps -> ps { psLine = psLine ps + 1 , psCol = 0 , psRest = rest }) _ -> empty -- | Consumes exactly this string at the current position. The string must not -- contain a newline. string :: String -> Parser () string s | any (== '\n') s = error "Newline in 'string' argument" string s = do ps <- get if take (length s) (psRest ps) == s then put (ps { psCol = psCol ps + length s , psRest = drop (length s) (psRest ps) }) else empty