aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Smeding <tom@tomsmeding.com>2024-02-18 17:14:11 +0100
committerTom Smeding <tom@tomsmeding.com>2024-02-18 17:15:50 +0100
commit78ffb5ed5fbda230675310b37f798c500a13ef11 (patch)
tree893dd8aa1a451e88958993d0b4b0f5ce2d5b124b
parent720372e3deac26a064fb1c711db2ccf54e655fab (diff)
Parser work
-rw-r--r--examples/test1.hs6
-rw-r--r--src/AST.hs1
-rw-r--r--src/Parser.hs182
3 files changed, 117 insertions, 72 deletions
diff --git a/examples/test1.hs b/examples/test1.hs
index 6acbeda..7bd2ee3 100644
--- a/examples/test1.hs
+++ b/examples/test1.hs
@@ -10,7 +10,9 @@ g :: Int -> ((Int -> Int) -> (Int -> Int)) -> Int
g y ding = ding f (y - 2) + 7
reverse :: [a] -> [a]
-reverse =
+reverse l =
let go [] acc = acc
go (x:xs) acc = go xs (x:acc)
- in \l -> go l []
+ in go l []
+
+kaas = 42
diff --git a/src/AST.hs b/src/AST.hs
index 2048e87..76b39b6 100644
--- a/src/AST.hs
+++ b/src/AST.hs
@@ -32,6 +32,7 @@ data Pattern t
| PVar t Name
| PAs t Name (Pattern t)
| PCon t Name [Pattern t]
+ | POp t (Pattern t) Operator (Pattern t)
| PList t [Pattern t]
| PTup t [Pattern t]
deriving (Show)
diff --git a/src/Parser.hs b/src/Parser.hs
index bef0c39..517ec64 100644
--- a/src/Parser.hs
+++ b/src/Parser.hs
@@ -33,14 +33,18 @@ import Debug.Trace
import AST
+data Pos = Pos
+ { posLine :: Int -- ^ zero-based
+ , posCol :: Int -- ^ zero-based
+ }
+ deriving (Show)
+
-- Positions are zero-based in both dimensions.
-- See 'isInsideBlock' and 'isAtBlockLeft' for the two relevant "inside the
-- block" conditions.
data PS = PS
- { psBlkLine :: Int -- ^ Start line of current layout block
- , psBlkCol :: Int -- ^ Start column of current layout block
- , psLine :: Int -- ^ Current line
- , psCol :: Int -- ^ Current column
+ { psBlk :: Pos -- ^ Start of current layout block
+ , psCur :: Pos -- ^ Current parsing position
, psRest :: String -- ^ Rest of the input
}
deriving (Show)
@@ -129,24 +133,23 @@ instance MonadChronicle [ErrMsg] Parser where
data ErrMsg = ErrMsg
{ errFile :: FilePath
, errStk :: [String]
- , errLine :: Int
- , errCol :: Int
+ , errPos :: Pos
, errMsg :: String }
deriving (Show)
printErrMsg :: ErrMsg -> String
-printErrMsg (ErrMsg fp stk y x s) =
+printErrMsg (ErrMsg fp stk (Pos y x) s) =
unlines (map (\descr -> "In " ++ descr ++ ":") (reverse stk)) ++
fp ++ ":" ++ show (y + 1) ++ ":" ++ show (x + 1) ++ ": " ++ s
parse :: FilePath -> String -> These [ErrMsg] (Program ())
parse fp source =
- runParser pProgram (Context fp []) (PS 0 0 0 0 source)
+ runParser pProgram (Context fp []) (PS (Pos 0 0) (Pos 0 0) source)
(\_ errs res -> case errs of
[] -> That res
_ -> These errs res)
(\errs -> This errs)
- (This [ErrMsg fp [] 0 0 "Parse error, no grammar alternatives match your source"])
+ (This [ErrMsg fp [] (Pos 0 0) "Parse error, no grammar alternatives match your source"])
pProgram :: Parser (Program ())
pProgram = do
@@ -159,7 +162,12 @@ pProgram = do
pTopDef :: Parser (Either DataDef (FunDef ()))
pTopDef = do
skipWhiteComment
- Left <$> pDataDef0 <|> Right <$> pFunDef0
+ isAtBlockLeft >>= \case
+ True -> Left <$> pDataDef0 <|> Right <$> pFunDef0
+ False -> do
+ raise Error "Skipping unparseable content"
+ readWhileInline (const True)
+ pTopDef
pDataDef0 :: Parser DataDef
pDataDef0 = do
@@ -200,7 +208,8 @@ pFunEq :: FunEqContext -> Parser (FunEq ())
pFunEq fectx = do
skipWhiteComment
pushLocatedContext "funeq" $ do
- assertAtBlockLeft Fatal "Expected function clause, found indented stuff"
+ isAtBlockLeft >>= guard
+ -- assertAtBlockLeft Fatal "Expected function clause, found indented stuff"
name <- pIdentifier0 AtLeft Lowercase
case fectx of
@@ -221,13 +230,19 @@ pRHS sepsym = do
-- TODO: parse guards
inlineWhite
pKeySym sepsym <|> raise Error ("Expected " ++ show sepsym)
- Plain <$> (pExpr <|> (raise Error "Expected expression" >> return (ETup () [])))
+ expr <- pExpr <|> (raise Error "Expected expression" >> return (ETup () []))
+ return (Plain expr)
pPattern :: Int -> Parser (Pattern ())
pPattern d = inlineWhite >> pPattern0 d
pPattern0 :: Int -> Parser (Pattern ())
pPattern0 d = do
+ p0 <- pPatExprAtom0 (max 10 d)
+ climbRight pPattern (pInfixOp Uppercase) (POp ()) d p0 Nothing
+
+pPatExprAtom0 :: Int -> Parser (Pattern ())
+pPatExprAtom0 d =
asum [pPatWildcard0
,pPatVarOrAs0
,pPatCon0
@@ -244,7 +259,7 @@ pPattern0 d = do
,return (PVar () var)]
pPatCon0 = do
con <- pIdentifier0 InBlock Uppercase
- if d > 0
+ if d > 10
then return (PCon () con [])
else do args <- many (pPattern 11)
return (PCon () con args)
@@ -284,24 +299,28 @@ pELet0 :: Parser (Expr ())
pELet0 = do
pKeyword "let"
inlineWhite
- startLayoutBlock $ do
+ defs <- startLayoutBlock $ do
-- The first occurrence is also going to skip whitespace in front,
-- which is redundant -- but not harmful.
- defs <- many $ do
+ many $ do
skipWhiteComment
-- Note: now not necessarily in the indented block. Which is
-- precisely what we need here. If we see "in", let the 'many'
-- choice fail so that the defs loop ends. But let it fail outside
-- this asum so that it is the many that picks it up, not this
-- asum.
- res <- asum [Nothing <$ lookAhead (pKeyword "in")
+ res <- asum [Nothing <$ pKeyword "in" -- note: will be dropped due to the empty backtrack
,Just <$> pFunDef0]
case res of
Nothing -> empty
Just def -> return def
- inlineWhite
- body <- pExpr
- return (ELet () defs body)
+ inlineWhite
+ asum [do pKeyword "in"
+ inlineWhite
+ body <- pExpr <|> (raise Error "Expected expression" >> return (ETup () []))
+ return (ELet () defs body)
+ ,do raise Error "Expected 'in' after 'let'"
+ return (ELet () defs (ETup () []))]
pECase0 :: Parser (Expr ())
pECase0 = do
@@ -340,21 +359,28 @@ pExprOpExpr d = inlineWhite >> pExprOpExpr0 d
pExprOpExpr0 :: Int -> Parser (Expr ())
pExprOpExpr0 d = do
e0 <- pEApp0
- climbRight e0 Nothing
- where
- climbRight :: Expr () -> Maybe ParsedOperator -> Parser (Expr ())
- climbRight lhs mlhsop = do
- asum [do paop@(PaOp op d2 a2) <- pInfixOp
- guard (d2 >= d) -- respect global minimum precedence
- case mlhsop of -- check operator compatibility
- Just (PaOp _ d1 a1) ->
- guard (d1 > d2 || (d1 == d2 && a1 == a2 && a1 /= AssocNone))
- Nothing ->
- return ()
- let oprhsd = case a2 of AssocRight -> d2 ; _ -> d2 + 1
- rhs <- pExprOpExpr oprhsd
- climbRight (EOp () lhs op rhs) (Just paop)
- ,return lhs]
+ climbRight pExprOpExpr (snd <$> pInfixOp Don'tCare) (EOp ()) d e0 Nothing
+
+climbRight
+ :: (Int -> Parser e) -- ^ Parse an expression at the given precedence level
+ -> Parser ParsedOperator -- ^ Parse an operator
+ -> (e -> Operator -> e -> e) -- ^ Build an operator application experssion
+ -> Int -- ^ Ambient precedence level: minimum precedence of top-level operator in result
+ -> e -- ^ lhs: Initial non-operator expression already parsed
+ -> Maybe ParsedOperator -- ^ Top-level operator in lhs (initialise with Nothing)
+ -> Parser e
+climbRight pExpr' pOper makeOp d lhs mlhsop =
+ asum [do paop@(PaOp op d2 a2) <- pOper
+ guard (d2 >= d) -- respect global minimum precedence
+ case mlhsop of -- check operator compatibility
+ Just (PaOp _ d1 a1) ->
+ guard (d1 > d2 || (d1 == d2 && a1 == a2 && a1 /= AssocNone))
+ Nothing ->
+ return ()
+ let oprhsd = case a2 of AssocRight -> d2 ; _ -> d2 + 1
+ rhs <- pExpr' oprhsd
+ climbRight pExpr' pOper makeOp d (makeOp lhs op rhs) (Just paop)
+ ,return lhs]
pEApp0 :: Parser (Expr ())
pEApp0 = do
@@ -445,11 +471,18 @@ data Associativity = AssocLeft | AssocRight | AssocNone
data ParsedOperator = PaOp Operator Int Associativity
deriving (Show)
-pInfixOp :: Parser ParsedOperator
-pInfixOp = do
+pInfixOp :: Case care -> Parser (WithCaseOutput care ParsedOperator)
+pInfixOp cs = do
inlineWhite
+ case cs of
+ Lowercase -> pLowerInfixOp0
+ Uppercase -> pUpperInfixOp0
+ Don'tCare -> asum [(Lowercase,) <$> pLowerInfixOp0
+ ,(Uppercase,) <$> pUpperInfixOp0]
+
+pLowerInfixOp0 :: Parser ParsedOperator
+pLowerInfixOp0 =
asum [PaOp OEqu 4 AssocNone <$ pKeySym "=="
- ,PaOp OCons 5 AssocRight <$ pKeySym ":"
,PaOp OAdd 6 AssocLeft <$ pKeySym "+"
,PaOp OSub 6 AssocLeft <$ pKeySym "-"
,PaOp OMul 7 AssocLeft <$ pKeySym "*"
@@ -458,9 +491,14 @@ pInfixOp = do
,PaOp OPow 8 AssocRight <$ pKeySym "^"
]
+pUpperInfixOp0 :: Parser ParsedOperator
+pUpperInfixOp0 =
+ asum [PaOp OCons 5 AssocRight <$ pKeySym ":"]
+
pStandaloneTypesig0 :: Parser (Name, Type)
pStandaloneTypesig0 = do
- assertAtBlockLeft Fatal "Expected top-level definition, found indented stuff"
+ isAtBlockLeft >>= guard
+ -- assertAtBlockLeft Fatal "Expected definition, found indented stuff"
name@(Name namestr) <- pIdentifier0 AtLeft Lowercase
inlineWhite
pKeySym "::"
@@ -521,7 +559,7 @@ pTypeAtom = pTypeParens <|> pTypeList <|> pTypeName
pKeyword :: String -> Parser ()
pKeyword s = do
string s
- traceM $ "pKeyword: parsed " ++ show s
+ -- traceM $ "pKeyword: parsed " ++ show s
notFollowedBy (() <$ satisfy isNameContChar)
-- | Parse the given symbol-like keyword, ensuring that it is the entire symbol.
@@ -562,6 +600,7 @@ pIdentifier0 bpos cs = pAlphaName0 bpos cs <|> pParens0 (pSymbol0 bpos cs)
-- > varid -> (small {small | large | digit | "'"}) without reservedid
pAlphaName0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name)
pAlphaName0 bpos cs = do
+ startPos <- gets psCur
(_, s) <- readToken
bpos
(\atfst mc -> case (atfst, mc) of
@@ -570,24 +609,24 @@ pAlphaName0 bpos cs = do
(False, Just c) | isNameContChar c -> Just (Right False)
(False, _ ) -> Just (Left ()))
True
+ guard (s `notElem` ["case", "class", "data", "default", "deriving", "do", "else"
+ ,"foreign", "if", "import", "in", "infix", "infixl"
+ ,"infixr", "instance", "let", "module", "newtype", "of"
+ ,"then", "type", "where", "_"])
(name, adjoin) <- case cs of
Uppercase
| isLower (head s) -> do
- raise Error "Unexpected uppercase word at this position, assuming typo"
+ raiseAt startPos Error "Unexpected uppercase word at this position, assuming typo"
return (toUpper (head s) : tail s, id)
| otherwise -> return (s, id)
Lowercase
| isUpper (head s) -> do
- raise Error "Unexpected lowercase word at this position, assuming typo"
+ raiseAt startPos Error "Unexpected lowercase word at this position, assuming typo"
return (toLower (head s) : tail s, id)
| otherwise -> return (s, id)
Don'tCare
| isLower (head s) -> return (s, (Lowercase,))
| otherwise -> return (s, (Uppercase,))
- guard (name `notElem` ["case", "class", "data", "default", "deriving", "do", "else"
- ,"foreign", "if", "import", "in", "infix", "infixl"
- ,"infixr", "instance", "let", "module", "newtype", "of"
- ,"then", "type", "where", "_"])
return (adjoin (Name name))
isNameHeadChar :: Char -> Bool
@@ -644,11 +683,9 @@ sepBy p psep = sepBy1 p psep <|> return []
startLayoutBlock :: Parser a -> Parser a
startLayoutBlock p = do
ps0 <- get
- put (ps0 { psBlkLine = psLine ps0
- , psBlkCol = psCol ps0 })
+ put (ps0 { psBlk = psCur ps0 })
res <- p
- modify (\ps -> ps { psBlkLine = psBlkLine ps0
- , psBlkCol = psBlkCol ps0 })
+ modify (\ps -> ps { psBlk = psBlk ps0 })
return res
data Fatality fatal where
@@ -660,25 +697,27 @@ type family FatalCtx fatal a where
FatalCtx 'False a = a ~ ()
FatalCtx 'True a = ()
--- | Raise an error with the given fatality and description.
+raise' :: Fatality fatal -> String -> Parser ()
+raise' Error = raise Error
+raise' Fatal = raise Fatal
+
raise :: FatalCtx fatal a => Fatality fatal -> String -> Parser a
-raise fat msg = do
+raise fat msg = gets psCur >>= \pos -> raiseAt pos fat msg
+
+-- | Raise an error with the given fatality and description.
+raiseAt :: FatalCtx fatal a => Pos -> Fatality fatal -> String -> Parser a
+raiseAt pos fat msg = do
Context { ctxFile = fp , ctxStack = stk } <- ask
- PS { psLine = line, psCol = col } <- get
- let err = ErrMsg fp stk line col msg
+ let err = ErrMsg fp stk pos msg
case fat of
Error -> dictate (pure err)
Fatal -> confess (pure err)
-raise' :: Fatality fatal -> String -> Parser ()
-raise' Error = raise Error
-raise' Fatal = raise Fatal
-
describeLocation :: Parser String
describeLocation = do
fp <- asks ctxFile
- ps <- get
- return $ fp ++ ":" ++ show (psLine ps + 1) ++ ":" ++ show (psCol ps + 1)
+ cur <- gets psCur
+ return $ fp ++ ":" ++ show (posLine cur + 1) ++ ":" ++ show (posCol cur + 1)
-- | Registers a scope description on the stack for error reporting.
pushContext :: String -> Parser a -> Parser a
@@ -715,7 +754,8 @@ readToken bpos f s0 = do
Nothing -> empty
Just (Left res) -> return (res, "")
Just (Right st') -> do
- put (ps { psCol = psCol ps + 1, psRest = cs })
+ let Pos line col = psCur ps
+ put (ps { psCur = Pos line (col + 1), psRest = cs })
fmap (c :) <$> loop f' st'
loop f s0
@@ -778,30 +818,30 @@ assertEOF fat = gets psRest >>= \case
-- soft-wrapping content. This means that col > blkCol.
isInsideBlock :: Parser Bool
isInsideBlock = do
- ps <- get
- return $ psLine ps >= psBlkLine ps && psCol ps > psBlkCol ps
+ PS { psCur = cur, psBlk = blk } <- get
+ return $ posLine cur >= posLine blk && posCol cur > posCol blk
-- | Returns whether the current position is at the left border of the block;
-- this is for list content such as function definitions or let bindings. This
-- means that col == blkCol.
isAtBlockLeft :: Parser Bool
isAtBlockLeft = do
- ps <- get
- return $ psLine ps >= psBlkLine ps && psCol ps == psBlkCol ps
+ PS { psCur = cur, psBlk = blk } <- get
+ return $ posLine cur >= posLine blk && posCol cur == posCol blk
-- | Consumes characters while the predicate holds or until (and excluding)
-- a newline, whichever comes first.
readWhileInline :: (Char -> Bool) -> Parser ()
readWhileInline p = do
(taken, rest) <- span (\c -> p c && c /= '\n') <$> gets psRest
- modify (\ps -> ps { psCol = psCol ps + length taken
+ modify (\ps -> ps { psCur = let Pos line col = psCur ps
+ in Pos line (col + length taken)
, psRest = rest })
-- | Consumes exactly one newline at the current position.
consumeNewline :: Parser ()
consumeNewline = gets psRest >>= \case
- '\n' : rest -> modify (\ps -> ps { psLine = psLine ps + 1
- , psCol = 0
+ '\n' : rest -> modify (\ps -> ps { psCur = Pos (posLine (psCur ps) + 1) 0
, psRest = rest })
_ -> empty
@@ -816,8 +856,9 @@ satisfy p = do
-- traceM ("rest is " ++ show r)
case r of
c : rest | c /= '\n', p c -> do
- modify (\ps -> ps { psCol = psCol ps + 1
- , psRest = rest })
+ modify (\ps -> let Pos line col = psCur ps
+ in ps { psCur = Pos line (col + 1)
+ , psRest = rest })
return c
_ -> empty
@@ -832,8 +873,9 @@ string :: String -> Parser ()
string s | any (== '\n') s = error "Newline in 'string' argument"
string s = do
ps <- get
+ let Pos line col = psCur ps
if take (length s) (psRest ps) == s
- then put (ps { psCol = psCol ps + length s
+ then put (ps { psCur = Pos line (col + length s)
, psRest = drop (length s) (psRest ps) })
else empty