From 3faca807fe96f2cefa50023fe373d8bcf1430121 Mon Sep 17 00:00:00 2001 From: Tom Smeding Date: Sat, 17 Feb 2024 09:22:49 +0100 Subject: Move to src/, working HLS in examples/ --- src/Parser.hs | 824 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 824 insertions(+) create mode 100644 src/Parser.hs (limited to 'src/Parser.hs') diff --git a/src/Parser.hs b/src/Parser.hs new file mode 100644 index 0000000..0f0bd0c --- /dev/null +++ b/src/Parser.hs @@ -0,0 +1,824 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE StandaloneDeriving #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE TypeOperators #-} +-- I don't want a warning for 'head' and 'tail' in this file. But I also don't +-- want GHCs before 9.8 to complain that they don't know the x-partial warning. +{-# OPTIONS_GHC -Wno-unrecognised-warning-flags -Wno-x-partial #-} +module Parser ( + parse, + printErrMsg, + -- * Re-exports + These(..), +) where + +import Control.Applicative +import Control.Monad +import Control.Monad.Chronicle +import Control.Monad.Reader +import Control.Monad.State.Lazy +import Data.Bifunctor (first) +import Data.Char +import Data.Either (partitionEithers) +import Data.List.NonEmpty (NonEmpty(..)) +import Data.These +import Data.Tuple (swap) + +-- import Debug.Trace + +import AST + + +-- Positions are zero-based in both dimensions. +-- See 'isInsideBlock' and 'isAtBlockLeft' for the two relevant "inside the +-- block" conditions. +data PS = PS + { psBlkLine :: Int -- ^ Start line of current layout block + , psBlkCol :: Int -- ^ Start column of current layout block + , psLine :: Int -- ^ Current line + , psCol :: Int -- ^ Current column + , psRest :: String -- ^ Rest of the input + } + deriving (Show) + +data Context = Context + { ctxFile :: FilePath + , ctxStack :: [String] -- ^ Stack of syntax scopes, for error reporting + } + deriving (Show) + +-- ReaderT Context (ChronicleT [ErrMsg] (State PS) a) +-- Context -> ChronicleT [ErrMsg] (State PS) a +-- Context -> State PS (These [ErrMsg] a) +-- Context -> PS -> Identity (These [ErrMsg] a, PS) +-- Context -> PS -> (These [ErrMsg] a, PS) +-- whereas I want: +-- Context -> PS -> These [ErrMsg] (a, PS) +-- which is not any transformer stack, but a new monad. +newtype Parser a = Parser { runParser :: Context -> PS -> These [ErrMsg] (PS, a) } + +instance Functor Parser where + fmap f (Parser g) = Parser (\ctx ps -> fmap (fmap f) (g ctx ps)) + +instance Applicative Parser where + pure x = Parser (\_ ps -> That (ps, x)) + (<*>) = ap + +instance Monad Parser where + Parser g >>= f = Parser $ \ctx ps -> + case g ctx ps of + This errs -> This errs + That (ps', x) -> runParser (f x) ctx ps' + These errs (ps', x) -> case runParser (f x) ctx ps' of + This errs' -> This (errs <> errs') + That res -> These errs res + These errs' res -> These (errs <> errs') res + +instance Alternative Parser where + empty = Parser (\_ _ -> This []) + Parser f <|> Parser g = Parser $ \ctx ps -> + case f ctx ps of + This _ -> g ctx ps + success -> success + +instance MonadState PS Parser where + state f = Parser $ \_ ps -> That (swap (f ps)) + +instance MonadReader Context Parser where + reader f = Parser $ \ctx ps -> That (ps, f ctx) + local f (Parser g) = Parser (g . f) + +instance MonadChronicle [ErrMsg] Parser where + dictate errs = Parser (\_ ps -> These errs (ps, ())) + confess errs = Parser (\_ _ -> This errs) + memento (Parser f) = Parser (\ctx ps -> case f ctx ps of + This errs -> That (ps, Left errs) + That res -> That (Right <$> res) + These errs res -> These errs (Right <$> res)) + absolve def (Parser f) = Parser (\ctx ps -> case f ctx ps of + This _ -> That (ps, def) + success -> success) + condemn (Parser f) = Parser (\ctx ps -> case f ctx ps of + These errs _ -> This errs + res -> res) + retcon g (Parser f) = Parser (\ctx ps -> first g (f ctx ps)) + chronicle th = Parser (\_ ps -> (ps,) <$> th) + +-- Positions are zero-based in both dimensions +data ErrMsg = ErrMsg + { errFile :: FilePath + , errStk :: [String] + , errLine :: Int + , errCol :: Int + , errMsg :: String } + deriving (Show) + +printErrMsg :: ErrMsg -> String +printErrMsg (ErrMsg fp stk y x s) = + unlines (map (\descr -> "In " ++ descr ++ ":") (reverse stk)) ++ + fp ++ ":" ++ show (y + 1) ++ ":" ++ show (x + 1) ++ ": " ++ s + +parse :: FilePath -> String -> These [ErrMsg] (Program ()) +parse fp source = fmap snd $ runParser pProgram (Context fp []) (PS 0 0 0 0 source) + +pProgram :: Parser (Program ()) +pProgram = do + defs <- many pTopDef + let (datadefs, fundefs) = partitionEithers defs + skipWhiteComment + assertEOF Error + return (Program datadefs fundefs) + +pTopDef :: Parser (Either DataDef (FunDef ())) +pTopDef = do + skipWhiteComment + Left <$> pDataDef0 <|> Right <$> pFunDef0 + +pDataDef0 :: Parser DataDef +pDataDef0 = do + pKeyword "data" + inlineWhite + name <- pIdentifier0 InBlock Uppercase + params <- many (inlineWhite >> pIdentifier0 InBlock Lowercase) + cons <- pDatacons "=" + return (DataDef name params cons) + where + pDatacons :: String -> Parser [(Name, [Type])] + pDatacons leader = do + inlineWhite + pKeySym leader + inlineWhite + name <- pIdentifier0 InBlock Uppercase + fields <- many pTypeAtom + rest <- pDatacons "|" <|> return [] + return ((name, fields) : rest) + +pFunDef0 :: Parser (FunDef ()) +pFunDef0 = do + mtypesig <- optional pStandaloneTypesig0 + let mname = fst <$> mtypesig + mtype = snd <$> mtypesig + (clauses, name) <- someClauses mname + return (FunDef name mtype clauses) + where + someClauses :: Maybe Name -> Parser (NonEmpty (FunEq ()), Name) + someClauses Nothing = do + clause@(FunEq name _ _) <- pFunEq Nothing + (,name) . (clause :|) <$> many (pFunEq (Just name)) + someClauses (Just name) = (,name) <$> someNE (pFunEq (Just name)) + +-- | Given the name of the type signature, if any. +pFunEq :: Maybe Name -> Parser (FunEq ()) +pFunEq mCheckName = do + skipWhiteComment + assertAtBlockLeft Fatal "Expected function clause, found indented stuff" + + name <- pIdentifier0 AtLeft Lowercase + case mCheckName of + Just checkName | name /= checkName -> + raise Fatal "Name of function clause does not correspond with type signature" + _ -> return () + + pats <- many (pPattern 11) + rhs <- pRHS "=" + return (FunEq name pats rhs) + +-- | Pass "=" for function definitions and "->" for case clauses. +pRHS :: String -> Parser (RHS ()) +pRHS sepsym = do + -- TODO: parse guards + inlineWhite + pKeySym sepsym + Plain <$> pExpr + +pPattern :: Int -> Parser (Pattern ()) +pPattern d = inlineWhite >> pPattern0 d + +pPattern0 :: Int -> Parser (Pattern ()) +pPattern0 d = do + asum [pPatWildcard0 + ,pPatVarOrAs0 + ,pPatCon0 + ,pPatList0 + ,pPatParens0] + where + pPatWildcard0 = pKeySym "_" >> return (PWildcard ()) + pPatVarOrAs0 = do + var <- pIdentifier0 InBlock Lowercase + asum [do inlineWhite + pKeySym "@" + p <- pPattern 11 + return (PAs () var p) + ,return (PVar () var)] + pPatCon0 = do + con <- pIdentifier0 InBlock Uppercase + if d > 0 + then return (PCon () con []) + else do args <- many (pPattern 11) + return (PCon () con args) + pPatList0 = do + char '[' -- special syntax, no need for pKeySym + ps <- pPattern 0 `sepBy` (inlineWhite >> char ',') + inlineWhite + char ']' + return (PList () ps) + pPatParens0 = do + char '(' + inlineWhite + asum [do char ')' + return (PTup () []) + ,do p <- pPattern0 0 + inlineWhite + asum [do char ')' + return p + ,do char ',' + ps <- pPattern 0 `sepBy1` (inlineWhite >> char ',') + return (PTup () (p : ps))]] + +pExpr :: Parser (Expr ()) +pExpr = do + inlineWhite + -- basics: lit, list, var, con, tup + -- expression atom: application of basics + -- expression parser: op + -- around: let, case, if + asum [pELet0 + ,pECase0 + ,pEIf0 + ,pExprOpExpr0 0] + +pELet0 :: Parser (Expr ()) +pELet0 = do + pKeyword "let" + inlineWhite + startLayoutBlock $ do + -- The first occurrence is also going to skip whitespace in front, + -- which is redundant -- but not harmful. + defs <- many $ do + skipWhiteComment + -- Note: now not necessarily in the indented block. Which is + -- precisely what we need here. If we see "in", let the 'many' + -- choice fail so that the defs loop ends. But let it fail outside + -- this asum so that it is the many that picks it up, not this + -- asum. + res <- asum [Nothing <$ lookAhead (pKeyword "in") + ,Just <$> pFunDef0] + case res of + Nothing -> empty + Just def -> return def + inlineWhite + body <- pExpr + return (ELet () defs body) + +pECase0 :: Parser (Expr ()) +pECase0 = do + pKeyword "case" + e <- pExpr + inlineWhite + pKeyword "of" + inlineWhite + startLayoutBlock $ do + -- The first clause is going to skip whitespace, but that's harmless + -- (though redundant). + let pClause = do + skipWhiteComment + whenM (not <$> isInsideBlock) (() <$ empty) + pat <- pPattern0 0 + rhs <- pRHS "->" + return (pat, rhs) + clauses <- many pClause + return (ECase () e clauses) + +pEIf0 :: Parser (Expr ()) +pEIf0 = do + pKeyword "if" + e1 <- pExpr + inlineWhite + pKeyword "then" + e2 <- pExpr + inlineWhite + pKeyword "else" + e3 <- pExpr + return (EIf () e1 e2 e3) + +pExprOpExpr :: Int -> Parser (Expr ()) +pExprOpExpr d = inlineWhite >> pExprOpExpr0 d + +pExprOpExpr0 :: Int -> Parser (Expr ()) +pExprOpExpr0 d = do + e0 <- pEApp0 + climbRight e0 Nothing + where + climbRight :: Expr () -> Maybe ParsedOperator -> Parser (Expr ()) + climbRight lhs mlhsop = do + asum [do paop@(PaOp op d2 a2) <- pInfixOp + guard (d2 >= d) -- respect global minimum precedence + case mlhsop of -- check operator compatibility + Just (PaOp _ d1 a1) -> + guard (d1 > d2 || (d1 == d2 && a1 == a2 && a1 /= AssocNone)) + Nothing -> + return () + let oprhsd = case a2 of AssocRight -> d2 ; _ -> d2 + 1 + rhs <- pExprOpExpr oprhsd + climbRight (EOp () lhs op rhs) (Just paop) + ,return lhs] + +pEApp0 :: Parser (Expr ()) +pEApp0 = do + e1 <- pEAtom0 + es <- many (inlineWhite >> pEAtom0) + case es of + [] -> return e1 + _ -> return (EApp () e1 es) + +pEAtom0 :: Parser (Expr ()) +pEAtom0 = (ELit () <$> pLiteral0) <|> pEList0 <|> pEVar0 <|> pECon0 <|> pEParens0 + +pLiteral0 :: Parser Literal +pLiteral0 = asum + [do as <- some (satisfy isDigit) + let a = read as :: Integer + asum + [do char '.' + bs <- some (satisfy isDigit) + let b = read bs :: Integer + cs <- optional $ do + char 'e' + cs <- some (satisfy isDigit) + return cs + let c = maybe 0 read cs :: Integer + return (LFloat ((fromIntegral a + fromIntegral b / 10 ^ length bs) * 10 ^ c)) + ,return (LInt a)] + ,do char '\'' + c <- pStringChar + char '\'' + return (LChar c) + ,do char '"' + s <- many pStringChar + char '"' + return (LString s)] + +pStringChar :: Parser Char +pStringChar = asum + [do char '\\' + char 'x' + let hexdig = do + c <- satisfy $ \c' -> + let c = toLower c' + in 'a' <= c && c <= 'f' || '0' <= c && c <= '9' + return $ if 'a' <= c then 10 + ord c - ord 'a' + else ord c - ord '0' + digs <- some hexdig + return (chr (sum (zipWith (*) (reverse digs) (iterate (*16) 1)))) + ,do char '\\' + satisfy (const True) >>= \case + 'n' -> return '\n' + 'r' -> return '\r' + 't' -> return '\t' + 'a' -> return '\a' + 'b' -> return '\b' + '\'' -> return '\'' + '\"' -> return '\"' + '0' -> return '\0' + c -> do raise Error $ "Invalid escape sequence: \\" ++ [c] + return '?' + ,do satisfy (\c -> c `notElem` "\n\r\\\'")] + +pEList0 :: Parser (Expr ()) +pEList0 = do + char '[' -- special syntax, no need for pKeySym + es <- sepBy pExpr (inlineWhite >> char ',') + inlineWhite + char ']' + return (EList () es) + +pEVar0 :: Parser (Expr ()) +pEVar0 = EVar () <$> pIdentifier0 InBlock Lowercase + +pECon0 :: Parser (Expr ()) +pECon0 = ECon () <$> pIdentifier0 InBlock Uppercase + +pEParens0 :: Parser (Expr ()) +pEParens0 = do + char '(' + e <- pExpr + inlineWhite + char ')' + return e + +data Associativity = AssocLeft | AssocRight | AssocNone + deriving (Show, Eq) + +data ParsedOperator = PaOp Operator Int Associativity + deriving (Show) + +pInfixOp :: Parser ParsedOperator +pInfixOp = do + inlineWhite + asum [PaOp OEqu 4 AssocNone <$ pKeySym "==" + ,PaOp OAdd 6 AssocLeft <$ pKeySym "+" + ,PaOp OSub 6 AssocLeft <$ pKeySym "-" + ,PaOp OMul 7 AssocLeft <$ pKeySym "*" + ,PaOp ODiv 7 AssocLeft <$ pKeySym "/" + ,PaOp OMod 7 AssocLeft <$ pKeySym "%" + ,PaOp OPow 8 AssocRight <$ pKeySym "^" + ] + +pStandaloneTypesig0 :: Parser (Name, Type) +pStandaloneTypesig0 = do + assertAtBlockLeft Fatal "Expected top-level definition, found indented stuff" + name@(Name namestr) <- pIdentifier0 AtLeft Lowercase + inlineWhite + pKeySym "::" + pushContext ("type signature for '" ++ namestr ++ "'") $ do + ty <- pType + return (name, ty) + +pType :: Parser Type +pType = do + ty1 <- pTypeApp + asum [do inlineWhite + pKeySym "->" + ty2 <- pType + return (TFun ty1 ty2) + ,return ty1] + +pTypeApp :: Parser Type +pTypeApp = many pTypeAtom >>= \case + [] -> raise Fatal "Expected type" + [t] -> return t + t:ts -> return (TApp t ts) + +pTypeAtom :: Parser Type +pTypeAtom = pTypeParens <|> pTypeList <|> pTypeName + where + pTypeParens = do + inlineWhite + char '(' + asum [do inlineWhite + char ')' + return (TTup []) + ,do ty1 <- pType + ty2s <- many $ do + inlineWhite + char ',' + pType + inlineWhite + char ')' + case ty2s of + [] -> return ty1 + _ -> return (TTup (ty1 : ty2s))] + + pTypeList = do + inlineWhite + char '[' + ty <- pType + char ']' + return (TList ty) + + pTypeName = do + inlineWhite + (cs, name) <- pIdentifier0 InBlock Don'tCare + case cs of + Uppercase -> return (TCon name) + Lowercase -> return (TVar name) + +-- | Parse the given name-like keyword, ensuring that it is the entire word. +pKeyword :: String -> Parser () +pKeyword s = do + string s + notFollowedBy (() <$ satisfy isNameContChar) + +-- | Parse the given symbol-like keyword, ensuring that it is the entire symbol. +pKeySym :: String -> Parser () +pKeySym s = do + string s + notFollowedBy (() <$ satisfy isSymbolChar) + +data Case care where + Uppercase :: Case 'True + Lowercase :: Case 'True + Don'tCare :: Case 'False +deriving instance Show (Case care) + +type family WithCaseOutput care a where + WithCaseOutput 'True a = a + WithCaseOutput 'False a = (Case 'True, a) + +-- | Consumes an identifier (word or parenthesised operator) at the current +-- position. The `var` production in Haskell2010. +-- var -> varid | "(" varsym ")" +pIdentifier0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name) +pIdentifier0 bpos cs = pAlphaName0 bpos cs <|> pParens0 (pSymbol0 bpos cs) + where + -- | Parser between parens, with the opening paren at the current position. + pParens0 :: Parser a -> Parser a + pParens0 p = do + char '(' + inlineWhite + res <- p + inlineWhite + char ')' + return res + +-- | Consumes a word-like name at the current position with the given case. The +-- `varid` production in Haskell2010 for 'Lowercase', `conid' for 'Uppercase'. +-- +-- > varid -> (small {small | large | digit | "'"}) without reservedid +pAlphaName0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name) +pAlphaName0 bpos cs = do + (_, s) <- readToken + bpos + (\atfst mc -> case (atfst, mc) of + (True , Just c) | isNameHeadChar c -> Just (Right False) + (True , _ ) -> Nothing + (False, Just c) | isNameContChar c -> Just (Right False) + (False, _ ) -> Just (Left ())) + True + (name, adjoin) <- case cs of + Uppercase + | isLower (head s) -> do + raise Error "Unexpected uppercase word at this position, assuming typo" + return (toUpper (head s) : tail s, id) + | otherwise -> return (s, id) + Lowercase + | isUpper (head s) -> do + raise Error "Unexpected lowercase word at this position, assuming typo" + return (toLower (head s) : tail s, id) + | otherwise -> return (s, id) + Don'tCare + | isLower (head s) -> return (s, (Lowercase,)) + | otherwise -> return (s, (Lowercase,)) + guard (name `notElem` ["case", "class", "data", "default", "deriving", "do", "else" + ,"foreign", "if", "import", "in", "infix", "infixl" + ,"infixr", "instance", "let", "module", "newtype", "of" + ,"then", "type", "where", "_"]) + return (adjoin (Name name)) + +isNameHeadChar :: Char -> Bool +isNameHeadChar c = isLetter c || c == '_' + +isNameContChar :: Char -> Bool +isNameContChar c = isNameHeadChar c || isDigit c || c == '\'' + +-- | Consumes a symbol at the current position. The `varsym` production in +-- Haskell2010 for 'Lowercase', `consym` otherwise, and either if 'Don'tCare'. +-- +-- > varsym -> ((symbol without ":") {symbol}) without (reservedop | dashes) +-- > consym -> (":" {symbol}) without reservedop +-- > symbol -> ascSymbol | uniSymbol without (special | "_" | "\"" | "'") +-- > ascSymbol -> ```!#$%&⋆+./<=>?@^|-~:``` +-- > uniSymbol -> unicode symbol or punctuation +-- > dashes -> "--" {"-"} +-- > special -> ```(),;[]`{}``` +-- > reservedop -> ".." | ":" | "::" | "=" | "\" | "|" | "<-" | "->" | "@" | "~" | "=>" +pSymbol0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name) +pSymbol0 bpos cs = do + case bpos of + AtLeft -> assertAtBlockLeft Fatal "Expected symbol, but found indented expression" + InBlock -> assertInsideBlock Fatal "Expected symbol, but found end of indented expression" + (c1, adjoin) <- + case cs of Lowercase -> (,id) <$> satisfy (\c -> isSymbolChar c && c /= ':') + Uppercase -> (,id) <$> satisfy (== ':') + Don'tCare -> do c1 <- satisfy (\c -> isSymbolChar c) + return (c1, if c1 == ':' then (Uppercase,) else (Lowercase,)) + crest <- many (satisfy isSymbolChar) + let name = c1 : crest + guard (name `notElem` ["..", ":", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]) + guard (take 2 name /= "--") + return (adjoin (Name name)) + +isSymbolChar :: Char -> Bool +isSymbolChar c = (isAscSymbol || isUniSymbol) && not isSpecialExt + where + isSpecialExt = c `elem` "(),;[]`{}_\"'" + isAscSymbol = c `elem` "!#$%&⋆+./<=>?@^|-~:" + isUniSymbol = ord c > 127 && (isSymbol c || isPunctuation c) + + +sepBy1 :: Parser a -> Parser sep -> Parser [a] +sepBy1 p psep = do + x1 <- p + (psep >> (x1 :) <$> sepBy1 p psep) <|> return [x1] + +sepBy :: Parser a -> Parser sep -> Parser [a] +sepBy p psep = sepBy1 p psep <|> return [] + +-- | Start a new layout block at the current position. The old layout block is +-- restored after completion of this subparser. +startLayoutBlock :: Parser a -> Parser a +startLayoutBlock p = do + ps0 <- get + put (ps0 { psBlkLine = psLine ps0 + , psBlkCol = psCol ps0 }) + res <- p + modify (\ps -> ps { psBlkLine = psBlkLine ps0 + , psBlkCol = psBlkCol ps0 }) + return res + +data Fatality fatal where + Error :: Fatality 'False + Fatal :: Fatality 'True +deriving instance Show (Fatality fatal) + +type family FatalCtx fatal a where + FatalCtx 'False a = a ~ () + FatalCtx 'True a = () + +-- | Raise an error with the given fatality and description. +raise :: FatalCtx fatal a => Fatality fatal -> String -> Parser a +raise fat msg = do + Context { ctxFile = fp , ctxStack = stk } <- ask + PS { psLine = line, psCol = col } <- get + let fun = case fat of + Error -> dictate . pure + Fatal -> confess . pure + fun (ErrMsg fp stk line col msg) + +raise' :: Fatality fatal -> String -> Parser () +raise' Error = raise Error +raise' Fatal = raise Fatal + +-- | Registers a scope description on the stack for error reporting. +pushContext :: String -> Parser a -> Parser a +pushContext descr = local (\c -> c { ctxStack = descr : ctxStack c }) + +data BlockPos = AtLeft | InBlock + deriving (Show) + +-- | Consumes a token at the current position, asserting that we are +-- in the position indicated by the 'BlockPos' argument. The token is defined +-- by a pure stateful parser. If encountering a newline or EOF, the parser is +-- run on this character ('Nothing' for EOF); if this produces a result, the +-- result is returned; otherwise, the parser fails. The newline is not consumed. +readToken :: BlockPos -> (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String) +readToken bpos f s0 = do + case bpos of + AtLeft -> assertAtBlockLeft Fatal "Expected token, but found indented expression" + InBlock -> assertInsideBlock Fatal "Expected token, but found end of indented expression" + let loop :: (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String) + loop f' st = do + ps <- get + case psRest ps of + [] | Just (Left res) <- f' st Nothing -> return (res, "") + | otherwise -> empty + '\n' : _ | Just (Left res) <- f' st (Just '\n') -> return (res, "") + c : cs -> case f' st (Just c) of + Nothing -> empty + Just (Left res) -> return (res, "") + Just (Right st') -> do + put (ps { psCol = psCol ps + 1, psRest = cs }) + fmap (c :) <$> loop f' st' + loop f s0 + +-- | Consumes all whitespace and comments (including newlines), but only if +-- this then leaves the parser inside the current block. If not, this fails. +inlineWhite :: Parser () +inlineWhite = do + skipWhiteComment + whenM (not <$> isInsideBlock) empty + +-- | Consumes all whitespace and comments (including newlines). Note: this may +-- end outside the current block. +skipWhiteComment :: Parser () +skipWhiteComment = do + inlineSpaces + _ <- many (blockComment >> inlineSpaces) + optional_ lineComment + optional_ (consumeNewline >> skipWhiteComment) + where + -- | Consumes some inline whitespace. Stops before newlines. + inlineSpaces :: Parser () + inlineSpaces = readWhileInline isSpace + +-- | Consumes an delimited comment including both end markers. Note: this may +-- end outside the current block. +blockComment :: Parser () +blockComment = do + string "{-" -- no need for pKeySym here + let loop = do + readWhileInline (`notElem` "{-") -- "-}" also starts with '-' + asum [string "-}" + ,eof >> raise Error "Unfinished {- -} comment at end of file" + ,blockComment >> loop + ,consumeNewline >> loop] + loop + +-- | Consumes a line comment marker and the rest of the line, excluding +-- newline. +lineComment :: Parser () +lineComment = do + -- '--!' is an operator, so we need to parse a whole symbol here. + pKeySym "--" + readWhileInline (const True) + +-- | Raises an error if we're not currently at the given column. +assertAtBlockLeft :: Fatality fatal -> String -> Parser () +assertAtBlockLeft fat msg = whenM (not <$> isAtBlockLeft) $ raise' fat msg + +-- | Raises an error if psCol is not greater than psRefCol. +assertInsideBlock :: Fatality fatal -> String -> Parser () +assertInsideBlock fat msg = whenM (not <$> isInsideBlock) $ raise' fat msg + +-- | Raises an error if we're not currently at EOF. +assertEOF :: Fatality fatal -> Parser () +assertEOF fat = gets psRest >>= \case + [] -> return () + _ -> raise' fat "Unexpected stuff" + +-- | Returns whether the current position is _within_ the current block, for +-- soft-wrapping content. This means that col > blkCol. +isInsideBlock :: Parser Bool +isInsideBlock = do + ps <- get + return $ psLine ps >= psBlkLine ps && psCol ps > psBlkCol ps + +-- | Returns whether the current position is at the left border of the block; +-- this is for list content such as function definitions or let bindings. This +-- means that col == blkCol. +isAtBlockLeft :: Parser Bool +isAtBlockLeft = do + ps <- get + return $ psLine ps >= psBlkLine ps && psCol ps == psBlkCol ps + +-- | Consumes characters while the predicate holds or until (and excluding) +-- a newline, whichever comes first. +readWhileInline :: (Char -> Bool) -> Parser () +readWhileInline p = do + (taken, rest) <- span (\c -> p c && c /= '\n') <$> gets psRest + modify (\ps -> ps { psCol = psCol ps + length taken + , psRest = rest }) + +-- | Consumes exactly one newline at the current position. +consumeNewline :: Parser () +consumeNewline = gets psRest >>= \case + '\n' : rest -> modify (\ps -> ps { psLine = psLine ps + 1 + , psCol = 0 + , psRest = rest }) + _ -> empty + +-- | Consumes exactly one character, unequal to newline, at the current position. +satisfy :: (Char -> Bool) -> Parser Char +satisfy p = do + -- traceM "entering satisfy" + r <- gets psRest + -- traceM "got rest" + r `seq` return () + -- traceM "seqd rest" + -- traceM ("rest is " ++ show r) + case r of + c : rest | c /= '\n', p c -> do + modify (\ps -> ps { psCol = psCol ps + 1 + , psRest = rest }) + return c + _ -> empty + +-- | Consumes exactly this character at the current position. Must not be a +-- newline. +char :: Char -> Parser () +char c = string [c] + +-- | Consumes exactly this string at the current position. The string must not +-- contain a newline. +string :: String -> Parser () +string s | any (== '\n') s = error "Newline in 'string' argument" +string s = do + ps <- get + if take (length s) (psRest ps) == s + then put (ps { psCol = psCol ps + length s + , psRest = drop (length s) (psRest ps) }) + else empty + +lookAhead :: Parser () -> Parser () +lookAhead p = do + ps <- get + success <- absolve False (True <$ p) + put ps -- restore state, as if nothing happened + when (not success) empty + +notFollowedBy :: Parser () -> Parser () +notFollowedBy p = do + ps <- get + success <- absolve True (False <$ p) + put ps -- restore state, as if nothing happened + when (not success) empty + +-- | Only succeeds at EOF. +eof :: Parser () +eof = gets psRest >>= \case [] -> return () + _ -> empty + +whenM :: (Monad m, Monoid a) => m Bool -> m a -> m a +whenM mb mx = mb >>= \b -> if b then mx else return mempty + +optional_ :: Alternative f => f a -> f () +optional_ a = (() <$ a) <|> pure () + +someNE :: Alternative f => f a -> f (NonEmpty a) +someNE a = (:|) <$> a <*> many a -- cgit v1.2.3-70-g09d2