aboutsummaryrefslogtreecommitdiff
path: root/Parser.hs
diff options
context:
space:
mode:
authorTom Smeding <tom@tomsmeding.com>2021-11-22 22:47:56 +0100
committerTom Smeding <tom@tomsmeding.com>2021-11-22 22:47:56 +0100
commit7ebf27051c61f69d5c12a9350273df4ec20e3d86 (patch)
treecb227700e620f1bfe9ec29eaeeb8b31e0763c69e /Parser.hs
parentd1cb805a26483cb64fea221cea4991c097f90fc0 (diff)
Work on parser
Diffstat (limited to 'Parser.hs')
-rw-r--r--Parser.hs111
1 files changed, 99 insertions, 12 deletions
diff --git a/Parser.hs b/Parser.hs
index 420e2b8..aedd557 100644
--- a/Parser.hs
+++ b/Parser.hs
@@ -3,7 +3,7 @@
module Parser where
import Control.Applicative
-import Data.Char (isSpace)
+import Data.Char (isLower, isUpper, isLetter, isDigit, isSpace, toUpper, toLower)
import Control.Monad.Chronicle
import Control.Monad.Reader
import Control.Monad.State.Strict
@@ -51,13 +51,71 @@ pProgram = do
return prog
pFunDef :: Parser (FunDef ())
-pFunDef = _
+pFunDef = do
+ skipWhiteComment
+ assertAtCol 0 Fatal "Expected top-level definition, found indented stuff"
+ withRefCol 0 $ do
+ _
+
+data Case = Uppercase | Lowercase
+ deriving (Show)
+
+-- | Consumes an identifier (word or parenthesised operator) at the current
+-- position.
+pIdentifier0 :: Case -> Parser Name
+pIdentifier0 cs = pAlphaName0 cs <|> pParens0 (pSymbol0 cs)
+
+-- | Consumes a word-like name at the current position with the given case.
+pAlphaName0 :: Case -> Parser Name
+pAlphaName0 cs = do
+ (_, s) <- readInline (\case True -> \case Just c | isInitNameChar c -> Just (Right False)
+ _ -> Nothing
+ False -> \case Just c | isNameContChar c -> Just (Right False)
+ _ -> Just (Left ()))
+ True
+ case cs of
+ Uppercase | isLower (head s) -> do
+ raise Error "Unexpected uppercase word at this position, assuming typo"
+ return (Name (toUpper (head s) : tail s))
+ Lowercase | isUpper (head s) -> do
+ raise Error "Unexpected lowercase word at this position, assuming typo"
+ return (Name (toLower (head s) : tail s))
+ _ -> return (Name s)
+ where
+ isInitNameChar, isNameContChar :: Char -> Bool
+ isInitNameChar c = isLetter c || c == '_'
+ isNameContChar c = isInitNameChar c || isDigit c || c == '\''
+
+pSymbol0 :: Case -> Parser Name
+pSymbol0 cs = do
+ _
+
+-- | Parser between parens, with the opening paren at the current position.
+-- Enforces that all components are within the current indented block.
+pParens0 :: Parser a -> Parser a
+pParens0 p = do
+ string "("
+ skipWhiteComment
+ res <- p
+ skipWhiteComment
+ string ")"
+ return res
+-- | Run a parser under a modified psRefCol. The current psRefCol is reinstated
+-- after completion of this parser.
+withRefCol :: Int -> Parser a -> Parser a
+withRefCol refcol p = do
+ old <- gets psRefCol
+ modify (\ps -> ps { psRefCol = refcol })
+ res <- p
+ modify (\ps -> ps { psRefCol = old })
+ return res
data Fatality = Error | Fatal
deriving (Show)
+-- | Raise an error with the given fatality and description.
raise :: Fatality -> String -> Parser ()
raise fat msg = do
fp <- asks ctxFile
@@ -67,30 +125,49 @@ raise fat msg = do
Fatal -> confess . pure
fun (ErrMsg fp (psLine ps) (psCol ps) msg)
+-- | Raises an error if we're not currently at the given column.
+assertAtCol :: Int -> Fatality -> String -> Parser ()
+assertAtCol col fat msg = gets psCol >>= \col' ->
+ when (col' /= col) $ raise fat msg
+
+-- | Raises an error if psCol is not greater than psRefCol.
+assertWithinBlock :: Fatality -> String -> Parser ()
+assertWithinBlock fat msg = get >>= \ps ->
+ when (psCol ps <= psRefCol ps) $ raise fat msg
+
+-- | Raises an error if we're not currently at EOF.
assertEOF :: Fatality -> Parser ()
assertEOF fat = gets psRest >>= \case
[] -> return ()
_ -> raise fat "Unexpected stuff"
-data ReadResult a = Token a | Truncated a
- deriving (Show, Functor)
-
-readInline :: (s -> Char -> Maybe s) -> s -> Parser (ReadResult String)
+-- | Consumes an inline token at the current position, asserting that psCol >
+-- psRefCol at the start. The token is defined by a pure stateful parser.
+-- If encountering a newline or EOF, the parser is run on this character
+-- ('Nothing' for EOF); if this produces a result, the result is returned;
+-- otherwise, the parser fails.
+readInline :: (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String)
readInline f s0 = do
ps0 <- get
when (psCol ps0 <= psRefCol ps0) $
raise Fatal "Expected stuff, but found end of indented expression"
- let loop :: (s -> Char -> Maybe s) -> s -> Parser (ReadResult String)
+ let loop :: (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String)
loop f' st = do
ps <- get
case psRest ps of
- c : cs | Just st' <- f' st c -> do
- put (ps { psCol = psCol ps + 1, psRest = cs })
- fmap (c :) <$> loop f' st'
- | otherwise -> return (Token "")
- [] -> return (Truncated "")
+ [] | Just (Left res) <- f' st Nothing -> return (res, "")
+ | otherwise -> empty
+ '\n' : _ | Just (Left res) <- f' st (Just '\n') -> return (res, "")
+ c : cs -> case f' st (Just c) of
+ Nothing -> empty
+ Just (Left res) -> return (res, "")
+ Just (Right st') -> do
+ put (ps { psCol = psCol ps + 1, psRest = cs })
+ fmap (c :) <$> loop f' st'
loop f s0
+-- | Consumes all whitespace and comments (including newlines). Note: this may
+-- leave psCol < psRefCol.
skipWhiteComment :: Parser ()
skipWhiteComment = do
inlineWhite
@@ -98,9 +175,12 @@ skipWhiteComment = do
_ <- optional lineComment
(consumeNewline >> skipWhiteComment) <|> return ()
where
+ -- | Consumes some inline whitespace.
inlineWhite :: Parser ()
inlineWhite = readWhileInline isSpace
+ -- | Consumes an inline comment including both end markers. Note: this may
+ -- leave psCol < psRefCol.
inlineComment :: Parser ()
inlineComment = do
string "{-"
@@ -111,15 +191,20 @@ skipWhiteComment = do
,consumeNewline >> loop]
loop
+ -- | Consumes a line comment marker and the rest of the line, excluding
+ -- newline.
lineComment :: Parser ()
lineComment = string "--" >> readWhileInline (const True)
+ -- | Consumes characters while the predicate holds or until (and excluding)
+ -- a newline, whichever comes first.
readWhileInline :: (Char -> Bool) -> Parser ()
readWhileInline p = do
(taken, rest) <- span (\c -> p c && c /= '\n') <$> gets psRest
modify (\ps -> ps { psCol = psCol ps + length taken
, psRest = rest })
+-- | Consumes exactly one newline at the current position.
consumeNewline :: Parser ()
consumeNewline = gets psRest >>= \case
'\n' : rest -> modify (\ps -> ps { psLine = psLine ps + 1
@@ -127,6 +212,8 @@ consumeNewline = gets psRest >>= \case
, psRest = rest })
_ -> empty
+-- | Consumes exactly this string at the current position. The string must not
+-- contain a newline.
string :: String -> Parser ()
string s | any (== '\n') s = error "Newline in 'string' argument"
string s = do