From 78ffb5ed5fbda230675310b37f798c500a13ef11 Mon Sep 17 00:00:00 2001 From: Tom Smeding Date: Sun, 18 Feb 2024 17:14:11 +0100 Subject: Parser work --- src/Parser.hs | 182 ++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 112 insertions(+), 70 deletions(-) (limited to 'src/Parser.hs') diff --git a/src/Parser.hs b/src/Parser.hs index bef0c39..517ec64 100644 --- a/src/Parser.hs +++ b/src/Parser.hs @@ -33,14 +33,18 @@ import Debug.Trace import AST +data Pos = Pos + { posLine :: Int -- ^ zero-based + , posCol :: Int -- ^ zero-based + } + deriving (Show) + -- Positions are zero-based in both dimensions. -- See 'isInsideBlock' and 'isAtBlockLeft' for the two relevant "inside the -- block" conditions. data PS = PS - { psBlkLine :: Int -- ^ Start line of current layout block - , psBlkCol :: Int -- ^ Start column of current layout block - , psLine :: Int -- ^ Current line - , psCol :: Int -- ^ Current column + { psBlk :: Pos -- ^ Start of current layout block + , psCur :: Pos -- ^ Current parsing position , psRest :: String -- ^ Rest of the input } deriving (Show) @@ -129,24 +133,23 @@ instance MonadChronicle [ErrMsg] Parser where data ErrMsg = ErrMsg { errFile :: FilePath , errStk :: [String] - , errLine :: Int - , errCol :: Int + , errPos :: Pos , errMsg :: String } deriving (Show) printErrMsg :: ErrMsg -> String -printErrMsg (ErrMsg fp stk y x s) = +printErrMsg (ErrMsg fp stk (Pos y x) s) = unlines (map (\descr -> "In " ++ descr ++ ":") (reverse stk)) ++ fp ++ ":" ++ show (y + 1) ++ ":" ++ show (x + 1) ++ ": " ++ s parse :: FilePath -> String -> These [ErrMsg] (Program ()) parse fp source = - runParser pProgram (Context fp []) (PS 0 0 0 0 source) + runParser pProgram (Context fp []) (PS (Pos 0 0) (Pos 0 0) source) (\_ errs res -> case errs of [] -> That res _ -> These errs res) (\errs -> This errs) - (This [ErrMsg fp [] 0 0 "Parse error, no grammar alternatives match your source"]) + (This [ErrMsg fp [] (Pos 0 0) "Parse error, no grammar alternatives match your source"]) pProgram :: Parser (Program ()) pProgram = do @@ -159,7 +162,12 @@ pProgram = do pTopDef :: Parser (Either DataDef (FunDef ())) pTopDef = do skipWhiteComment - Left <$> pDataDef0 <|> Right <$> pFunDef0 + isAtBlockLeft >>= \case + True -> Left <$> pDataDef0 <|> Right <$> pFunDef0 + False -> do + raise Error "Skipping unparseable content" + readWhileInline (const True) + pTopDef pDataDef0 :: Parser DataDef pDataDef0 = do @@ -200,7 +208,8 @@ pFunEq :: FunEqContext -> Parser (FunEq ()) pFunEq fectx = do skipWhiteComment pushLocatedContext "funeq" $ do - assertAtBlockLeft Fatal "Expected function clause, found indented stuff" + isAtBlockLeft >>= guard + -- assertAtBlockLeft Fatal "Expected function clause, found indented stuff" name <- pIdentifier0 AtLeft Lowercase case fectx of @@ -221,13 +230,19 @@ pRHS sepsym = do -- TODO: parse guards inlineWhite pKeySym sepsym <|> raise Error ("Expected " ++ show sepsym) - Plain <$> (pExpr <|> (raise Error "Expected expression" >> return (ETup () []))) + expr <- pExpr <|> (raise Error "Expected expression" >> return (ETup () [])) + return (Plain expr) pPattern :: Int -> Parser (Pattern ()) pPattern d = inlineWhite >> pPattern0 d pPattern0 :: Int -> Parser (Pattern ()) pPattern0 d = do + p0 <- pPatExprAtom0 (max 10 d) + climbRight pPattern (pInfixOp Uppercase) (POp ()) d p0 Nothing + +pPatExprAtom0 :: Int -> Parser (Pattern ()) +pPatExprAtom0 d = asum [pPatWildcard0 ,pPatVarOrAs0 ,pPatCon0 @@ -244,7 +259,7 @@ pPattern0 d = do ,return (PVar () var)] pPatCon0 = do con <- pIdentifier0 InBlock Uppercase - if d > 0 + if d > 10 then return (PCon () con []) else do args <- many (pPattern 11) return (PCon () con args) @@ -284,24 +299,28 @@ pELet0 :: Parser (Expr ()) pELet0 = do pKeyword "let" inlineWhite - startLayoutBlock $ do + defs <- startLayoutBlock $ do -- The first occurrence is also going to skip whitespace in front, -- which is redundant -- but not harmful. - defs <- many $ do + many $ do skipWhiteComment -- Note: now not necessarily in the indented block. Which is -- precisely what we need here. If we see "in", let the 'many' -- choice fail so that the defs loop ends. But let it fail outside -- this asum so that it is the many that picks it up, not this -- asum. - res <- asum [Nothing <$ lookAhead (pKeyword "in") + res <- asum [Nothing <$ pKeyword "in" -- note: will be dropped due to the empty backtrack ,Just <$> pFunDef0] case res of Nothing -> empty Just def -> return def - inlineWhite - body <- pExpr - return (ELet () defs body) + inlineWhite + asum [do pKeyword "in" + inlineWhite + body <- pExpr <|> (raise Error "Expected expression" >> return (ETup () [])) + return (ELet () defs body) + ,do raise Error "Expected 'in' after 'let'" + return (ELet () defs (ETup () []))] pECase0 :: Parser (Expr ()) pECase0 = do @@ -340,21 +359,28 @@ pExprOpExpr d = inlineWhite >> pExprOpExpr0 d pExprOpExpr0 :: Int -> Parser (Expr ()) pExprOpExpr0 d = do e0 <- pEApp0 - climbRight e0 Nothing - where - climbRight :: Expr () -> Maybe ParsedOperator -> Parser (Expr ()) - climbRight lhs mlhsop = do - asum [do paop@(PaOp op d2 a2) <- pInfixOp - guard (d2 >= d) -- respect global minimum precedence - case mlhsop of -- check operator compatibility - Just (PaOp _ d1 a1) -> - guard (d1 > d2 || (d1 == d2 && a1 == a2 && a1 /= AssocNone)) - Nothing -> - return () - let oprhsd = case a2 of AssocRight -> d2 ; _ -> d2 + 1 - rhs <- pExprOpExpr oprhsd - climbRight (EOp () lhs op rhs) (Just paop) - ,return lhs] + climbRight pExprOpExpr (snd <$> pInfixOp Don'tCare) (EOp ()) d e0 Nothing + +climbRight + :: (Int -> Parser e) -- ^ Parse an expression at the given precedence level + -> Parser ParsedOperator -- ^ Parse an operator + -> (e -> Operator -> e -> e) -- ^ Build an operator application experssion + -> Int -- ^ Ambient precedence level: minimum precedence of top-level operator in result + -> e -- ^ lhs: Initial non-operator expression already parsed + -> Maybe ParsedOperator -- ^ Top-level operator in lhs (initialise with Nothing) + -> Parser e +climbRight pExpr' pOper makeOp d lhs mlhsop = + asum [do paop@(PaOp op d2 a2) <- pOper + guard (d2 >= d) -- respect global minimum precedence + case mlhsop of -- check operator compatibility + Just (PaOp _ d1 a1) -> + guard (d1 > d2 || (d1 == d2 && a1 == a2 && a1 /= AssocNone)) + Nothing -> + return () + let oprhsd = case a2 of AssocRight -> d2 ; _ -> d2 + 1 + rhs <- pExpr' oprhsd + climbRight pExpr' pOper makeOp d (makeOp lhs op rhs) (Just paop) + ,return lhs] pEApp0 :: Parser (Expr ()) pEApp0 = do @@ -445,11 +471,18 @@ data Associativity = AssocLeft | AssocRight | AssocNone data ParsedOperator = PaOp Operator Int Associativity deriving (Show) -pInfixOp :: Parser ParsedOperator -pInfixOp = do +pInfixOp :: Case care -> Parser (WithCaseOutput care ParsedOperator) +pInfixOp cs = do inlineWhite + case cs of + Lowercase -> pLowerInfixOp0 + Uppercase -> pUpperInfixOp0 + Don'tCare -> asum [(Lowercase,) <$> pLowerInfixOp0 + ,(Uppercase,) <$> pUpperInfixOp0] + +pLowerInfixOp0 :: Parser ParsedOperator +pLowerInfixOp0 = asum [PaOp OEqu 4 AssocNone <$ pKeySym "==" - ,PaOp OCons 5 AssocRight <$ pKeySym ":" ,PaOp OAdd 6 AssocLeft <$ pKeySym "+" ,PaOp OSub 6 AssocLeft <$ pKeySym "-" ,PaOp OMul 7 AssocLeft <$ pKeySym "*" @@ -458,9 +491,14 @@ pInfixOp = do ,PaOp OPow 8 AssocRight <$ pKeySym "^" ] +pUpperInfixOp0 :: Parser ParsedOperator +pUpperInfixOp0 = + asum [PaOp OCons 5 AssocRight <$ pKeySym ":"] + pStandaloneTypesig0 :: Parser (Name, Type) pStandaloneTypesig0 = do - assertAtBlockLeft Fatal "Expected top-level definition, found indented stuff" + isAtBlockLeft >>= guard + -- assertAtBlockLeft Fatal "Expected definition, found indented stuff" name@(Name namestr) <- pIdentifier0 AtLeft Lowercase inlineWhite pKeySym "::" @@ -521,7 +559,7 @@ pTypeAtom = pTypeParens <|> pTypeList <|> pTypeName pKeyword :: String -> Parser () pKeyword s = do string s - traceM $ "pKeyword: parsed " ++ show s + -- traceM $ "pKeyword: parsed " ++ show s notFollowedBy (() <$ satisfy isNameContChar) -- | Parse the given symbol-like keyword, ensuring that it is the entire symbol. @@ -562,6 +600,7 @@ pIdentifier0 bpos cs = pAlphaName0 bpos cs <|> pParens0 (pSymbol0 bpos cs) -- > varid -> (small {small | large | digit | "'"}) without reservedid pAlphaName0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name) pAlphaName0 bpos cs = do + startPos <- gets psCur (_, s) <- readToken bpos (\atfst mc -> case (atfst, mc) of @@ -570,24 +609,24 @@ pAlphaName0 bpos cs = do (False, Just c) | isNameContChar c -> Just (Right False) (False, _ ) -> Just (Left ())) True + guard (s `notElem` ["case", "class", "data", "default", "deriving", "do", "else" + ,"foreign", "if", "import", "in", "infix", "infixl" + ,"infixr", "instance", "let", "module", "newtype", "of" + ,"then", "type", "where", "_"]) (name, adjoin) <- case cs of Uppercase | isLower (head s) -> do - raise Error "Unexpected uppercase word at this position, assuming typo" + raiseAt startPos Error "Unexpected uppercase word at this position, assuming typo" return (toUpper (head s) : tail s, id) | otherwise -> return (s, id) Lowercase | isUpper (head s) -> do - raise Error "Unexpected lowercase word at this position, assuming typo" + raiseAt startPos Error "Unexpected lowercase word at this position, assuming typo" return (toLower (head s) : tail s, id) | otherwise -> return (s, id) Don'tCare | isLower (head s) -> return (s, (Lowercase,)) | otherwise -> return (s, (Uppercase,)) - guard (name `notElem` ["case", "class", "data", "default", "deriving", "do", "else" - ,"foreign", "if", "import", "in", "infix", "infixl" - ,"infixr", "instance", "let", "module", "newtype", "of" - ,"then", "type", "where", "_"]) return (adjoin (Name name)) isNameHeadChar :: Char -> Bool @@ -644,11 +683,9 @@ sepBy p psep = sepBy1 p psep <|> return [] startLayoutBlock :: Parser a -> Parser a startLayoutBlock p = do ps0 <- get - put (ps0 { psBlkLine = psLine ps0 - , psBlkCol = psCol ps0 }) + put (ps0 { psBlk = psCur ps0 }) res <- p - modify (\ps -> ps { psBlkLine = psBlkLine ps0 - , psBlkCol = psBlkCol ps0 }) + modify (\ps -> ps { psBlk = psBlk ps0 }) return res data Fatality fatal where @@ -660,25 +697,27 @@ type family FatalCtx fatal a where FatalCtx 'False a = a ~ () FatalCtx 'True a = () --- | Raise an error with the given fatality and description. +raise' :: Fatality fatal -> String -> Parser () +raise' Error = raise Error +raise' Fatal = raise Fatal + raise :: FatalCtx fatal a => Fatality fatal -> String -> Parser a -raise fat msg = do +raise fat msg = gets psCur >>= \pos -> raiseAt pos fat msg + +-- | Raise an error with the given fatality and description. +raiseAt :: FatalCtx fatal a => Pos -> Fatality fatal -> String -> Parser a +raiseAt pos fat msg = do Context { ctxFile = fp , ctxStack = stk } <- ask - PS { psLine = line, psCol = col } <- get - let err = ErrMsg fp stk line col msg + let err = ErrMsg fp stk pos msg case fat of Error -> dictate (pure err) Fatal -> confess (pure err) -raise' :: Fatality fatal -> String -> Parser () -raise' Error = raise Error -raise' Fatal = raise Fatal - describeLocation :: Parser String describeLocation = do fp <- asks ctxFile - ps <- get - return $ fp ++ ":" ++ show (psLine ps + 1) ++ ":" ++ show (psCol ps + 1) + cur <- gets psCur + return $ fp ++ ":" ++ show (posLine cur + 1) ++ ":" ++ show (posCol cur + 1) -- | Registers a scope description on the stack for error reporting. pushContext :: String -> Parser a -> Parser a @@ -715,7 +754,8 @@ readToken bpos f s0 = do Nothing -> empty Just (Left res) -> return (res, "") Just (Right st') -> do - put (ps { psCol = psCol ps + 1, psRest = cs }) + let Pos line col = psCur ps + put (ps { psCur = Pos line (col + 1), psRest = cs }) fmap (c :) <$> loop f' st' loop f s0 @@ -778,30 +818,30 @@ assertEOF fat = gets psRest >>= \case -- soft-wrapping content. This means that col > blkCol. isInsideBlock :: Parser Bool isInsideBlock = do - ps <- get - return $ psLine ps >= psBlkLine ps && psCol ps > psBlkCol ps + PS { psCur = cur, psBlk = blk } <- get + return $ posLine cur >= posLine blk && posCol cur > posCol blk -- | Returns whether the current position is at the left border of the block; -- this is for list content such as function definitions or let bindings. This -- means that col == blkCol. isAtBlockLeft :: Parser Bool isAtBlockLeft = do - ps <- get - return $ psLine ps >= psBlkLine ps && psCol ps == psBlkCol ps + PS { psCur = cur, psBlk = blk } <- get + return $ posLine cur >= posLine blk && posCol cur == posCol blk -- | Consumes characters while the predicate holds or until (and excluding) -- a newline, whichever comes first. readWhileInline :: (Char -> Bool) -> Parser () readWhileInline p = do (taken, rest) <- span (\c -> p c && c /= '\n') <$> gets psRest - modify (\ps -> ps { psCol = psCol ps + length taken + modify (\ps -> ps { psCur = let Pos line col = psCur ps + in Pos line (col + length taken) , psRest = rest }) -- | Consumes exactly one newline at the current position. consumeNewline :: Parser () consumeNewline = gets psRest >>= \case - '\n' : rest -> modify (\ps -> ps { psLine = psLine ps + 1 - , psCol = 0 + '\n' : rest -> modify (\ps -> ps { psCur = Pos (posLine (psCur ps) + 1) 0 , psRest = rest }) _ -> empty @@ -816,8 +856,9 @@ satisfy p = do -- traceM ("rest is " ++ show r) case r of c : rest | c /= '\n', p c -> do - modify (\ps -> ps { psCol = psCol ps + 1 - , psRest = rest }) + modify (\ps -> let Pos line col = psCur ps + in ps { psCur = Pos line (col + 1) + , psRest = rest }) return c _ -> empty @@ -832,8 +873,9 @@ string :: String -> Parser () string s | any (== '\n') s = error "Newline in 'string' argument" string s = do ps <- get + let Pos line col = psCur ps if take (length s) (psRest ps) == s - then put (ps { psCol = psCol ps + length s + then put (ps { psCur = Pos line (col + length s) , psRest = drop (length s) (psRest ps) }) else empty -- cgit v1.2.3-70-g09d2