From 7ebf27051c61f69d5c12a9350273df4ec20e3d86 Mon Sep 17 00:00:00 2001
From: Tom Smeding <tom@tomsmeding.com>
Date: Mon, 22 Nov 2021 22:47:56 +0100
Subject: Work on parser

---
 Parser.hs | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 99 insertions(+), 12 deletions(-)

diff --git a/Parser.hs b/Parser.hs
index 420e2b8..aedd557 100644
--- a/Parser.hs
+++ b/Parser.hs
@@ -3,7 +3,7 @@
 module Parser where
 
 import Control.Applicative
-import Data.Char (isSpace)
+import Data.Char (isLower, isUpper, isLetter, isDigit, isSpace, toUpper, toLower)
 import Control.Monad.Chronicle
 import Control.Monad.Reader
 import Control.Monad.State.Strict
@@ -51,13 +51,71 @@ pProgram = do
     return prog
 
 pFunDef :: Parser (FunDef ())
-pFunDef = _
+pFunDef = do
+    skipWhiteComment
+    assertAtCol 0 Fatal "Expected top-level definition, found indented stuff"
+    withRefCol 0 $ do
+        _
+
+data Case = Uppercase | Lowercase
+  deriving (Show)
+
+-- | Consumes an identifier (word or parenthesised operator) at the current
+-- position.
+pIdentifier0 :: Case -> Parser Name
+pIdentifier0 cs = pAlphaName0 cs <|> pParens0 (pSymbol0 cs)
+
+-- | Consumes a word-like name at the current position with the given case.
+pAlphaName0 :: Case -> Parser Name
+pAlphaName0 cs = do
+    (_, s) <- readInline (\case True -> \case Just c | isInitNameChar c -> Just (Right False)
+                                              _ -> Nothing
+                                False -> \case Just c | isNameContChar c -> Just (Right False)
+                                               _ -> Just (Left ()))
+                         True
+    case cs of
+      Uppercase | isLower (head s) -> do
+          raise Error "Unexpected uppercase word at this position, assuming typo"
+          return (Name (toUpper (head s) : tail s))
+      Lowercase | isUpper (head s) -> do
+          raise Error "Unexpected lowercase word at this position, assuming typo"
+          return (Name (toLower (head s) : tail s))
+      _ -> return (Name s)
+  where
+    isInitNameChar, isNameContChar :: Char -> Bool
+    isInitNameChar c = isLetter c || c == '_'
+    isNameContChar c = isInitNameChar c || isDigit c || c == '\''
+
+pSymbol0 :: Case -> Parser Name
+pSymbol0 cs = do
+    _
+
+-- | Parser between parens, with the opening paren at the current position.
+-- Enforces that all components are within the current indented block.
+pParens0 :: Parser a -> Parser a
+pParens0 p = do
+    string "("
+    skipWhiteComment
+    res <- p
+    skipWhiteComment
+    string ")"
+    return res
 
 
+-- | Run a parser under a modified psRefCol. The current psRefCol is reinstated
+-- after completion of this parser.
+withRefCol :: Int -> Parser a -> Parser a
+withRefCol refcol p = do
+    old <- gets psRefCol
+    modify (\ps -> ps { psRefCol = refcol })
+    res <- p
+    modify (\ps -> ps { psRefCol = old })
+    return res
 
 data Fatality = Error | Fatal
   deriving (Show)
 
+-- | Raise an error with the given fatality and description.
 raise :: Fatality -> String -> Parser ()
 raise fat msg = do
     fp <- asks ctxFile
@@ -67,30 +125,49 @@ raise fat msg = do
                 Fatal -> confess . pure
     fun (ErrMsg fp (psLine ps) (psCol ps) msg)
 
+-- | Raises an error if we're not currently at the given column.
+assertAtCol :: Int -> Fatality -> String -> Parser ()
+assertAtCol col fat msg = gets psCol >>= \col' ->
+    when (col' /= col) $ raise fat msg
+
+-- | Raises an error if psCol is not greater than psRefCol.
+assertWithinBlock :: Fatality -> String -> Parser ()
+assertWithinBlock fat msg = get >>= \ps ->
+    when (psCol ps <= psRefCol ps) $ raise fat msg
+
+-- | Raises an error if we're not currently at EOF.
 assertEOF :: Fatality -> Parser ()
 assertEOF fat = gets psRest >>= \case
     [] -> return ()
     _ -> raise fat "Unexpected stuff"
 
-data ReadResult a = Token a | Truncated a
-  deriving (Show, Functor)
-
-readInline :: (s -> Char -> Maybe s) -> s -> Parser (ReadResult String)
+-- | Consumes an inline token at the current position, asserting that psCol >
+-- psRefCol at the start. The token is defined by a pure stateful parser.
+-- If encountering a newline or EOF, the parser is run on this character
+-- ('Nothing' for EOF); if this produces a result, the result is returned;
+-- otherwise, the parser fails.
+readInline :: (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String)
 readInline f s0 = do
     ps0 <- get
     when (psCol ps0 <= psRefCol ps0) $
         raise Fatal "Expected stuff, but found end of indented expression"
-    let loop :: (s -> Char -> Maybe s) -> s -> Parser (ReadResult String)
+    let loop :: (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String)
         loop f' st = do
             ps <- get
             case psRest ps of
-              c : cs | Just st' <- f' st c -> do
-                         put (ps { psCol = psCol ps + 1, psRest = cs })
-                         fmap (c :) <$> loop f' st'
-                     | otherwise -> return (Token "")
-              [] -> return (Truncated "")
+              []       | Just (Left res) <- f' st Nothing     -> return (res, "")
+                       | otherwise -> empty
+              '\n' : _ | Just (Left res) <- f' st (Just '\n') -> return (res, "")
+              c : cs -> case f' st (Just c) of
+                          Nothing -> empty
+                          Just (Left res) -> return (res, "")
+                          Just (Right st') -> do
+                              put (ps { psCol = psCol ps + 1, psRest = cs })
+                              fmap (c :) <$> loop f' st'
     loop f s0
 
+-- | Consumes all whitespace and comments (including newlines). Note: this may
+-- leave psCol < psRefCol.
 skipWhiteComment :: Parser ()
 skipWhiteComment = do
     inlineWhite
@@ -98,9 +175,12 @@ skipWhiteComment = do
     _ <- optional lineComment
     (consumeNewline >> skipWhiteComment) <|> return ()
   where
+    -- | Consumes some inline whitespace.
     inlineWhite :: Parser ()
     inlineWhite = readWhileInline isSpace
 
+    -- | Consumes an inline comment including both end markers. Note: this may
+    -- leave psCol < psRefCol.
     inlineComment :: Parser ()
     inlineComment = do
         string "{-"
@@ -111,15 +191,20 @@ skipWhiteComment = do
                      ,consumeNewline >> loop]
         loop
 
+    -- | Consumes a line comment marker and the rest of the line, excluding
+    -- newline.
     lineComment :: Parser ()
     lineComment = string "--" >> readWhileInline (const True)
 
+    -- | Consumes characters while the predicate holds or until (and excluding)
+    -- a newline, whichever comes first.
     readWhileInline :: (Char -> Bool) -> Parser ()
     readWhileInline p = do
         (taken, rest) <- span (\c -> p c && c /= '\n') <$> gets psRest
         modify (\ps -> ps { psCol = psCol ps + length taken
                           , psRest = rest })
 
+-- | Consumes exactly one newline at the current position.
 consumeNewline :: Parser ()
 consumeNewline = gets psRest >>= \case
     '\n' : rest -> modify (\ps -> ps { psLine = psLine ps + 1
@@ -127,6 +212,8 @@ consumeNewline = gets psRest >>= \case
                                      , psRest = rest })
     _ -> empty
 
+-- | Consumes exactly this string at the current position. The string must not
+-- contain a newline.
 string :: String -> Parser ()
 string s | any (== '\n') s = error "Newline in 'string' argument"
 string s = do
-- 
cgit v1.2.3-70-g09d2