From 3ef786673ff8298124cd3b5ef50c35dbb23f77e2 Mon Sep 17 00:00:00 2001 From: Tom Smeding Date: Tue, 13 Feb 2024 22:51:57 +0100 Subject: Parse basic ADTs, and fix bugs --- Parser.hs | 120 ++++++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 81 insertions(+), 39 deletions(-) (limited to 'Parser.hs') diff --git a/Parser.hs b/Parser.hs index b31ffa1..0f0bd0c 100644 --- a/Parser.hs +++ b/Parser.hs @@ -25,11 +25,12 @@ import Control.Monad.Reader import Control.Monad.State.Lazy import Data.Bifunctor (first) import Data.Char +import Data.Either (partitionEithers) import Data.List.NonEmpty (NonEmpty(..)) import Data.These import Data.Tuple (swap) -import Debug.Trace +-- import Debug.Trace import AST @@ -80,7 +81,7 @@ instance Monad Parser where These errs' res -> These (errs <> errs') res instance Alternative Parser where - empty = Parser (\_ _ -> This mempty) + empty = Parser (\_ _ -> This []) Parser f <|> Parser g = Parser $ \ctx ps -> case f ctx ps of This _ -> g ctx ps @@ -128,15 +129,35 @@ parse fp source = fmap snd $ runParser pProgram (Context fp []) (PS 0 0 0 0 sour pProgram :: Parser (Program ()) pProgram = do - prog <- Program <$> many pFunDef + defs <- many pTopDef + let (datadefs, fundefs) = partitionEithers defs skipWhiteComment assertEOF Error - return prog + return (Program datadefs fundefs) -pFunDef :: Parser (FunDef ()) -pFunDef = do +pTopDef :: Parser (Either DataDef (FunDef ())) +pTopDef = do skipWhiteComment - pFunDef0 + Left <$> pDataDef0 <|> Right <$> pFunDef0 + +pDataDef0 :: Parser DataDef +pDataDef0 = do + pKeyword "data" + inlineWhite + name <- pIdentifier0 InBlock Uppercase + params <- many (inlineWhite >> pIdentifier0 InBlock Lowercase) + cons <- pDatacons "=" + return (DataDef name params cons) + where + pDatacons :: String -> Parser [(Name, [Type])] + pDatacons leader = do + inlineWhite + pKeySym leader + inlineWhite + name <- pIdentifier0 InBlock Uppercase + fields <- many pTypeAtom + rest <- pDatacons "|" <|> return [] + return ((name, fields) : rest) pFunDef0 :: Parser (FunDef ()) pFunDef0 = do @@ -430,12 +451,12 @@ pType = do pTypeApp :: Parser Type pTypeApp = many pTypeAtom >>= \case - [] -> raise Error "Expected type" >> return (TTup []) + [] -> raise Fatal "Expected type" [t] -> return t t:ts -> return (TApp t ts) pTypeAtom :: Parser Type -pTypeAtom = pTypeParens <|> pTypeList <|> pTypeCon <|> pTypeVar +pTypeAtom = pTypeParens <|> pTypeList <|> pTypeName where pTypeParens = do inlineWhite @@ -461,8 +482,12 @@ pTypeAtom = pTypeParens <|> pTypeList <|> pTypeCon <|> pTypeVar char ']' return (TList ty) - pTypeCon = inlineWhite >> TCon <$> pIdentifier0 InBlock Uppercase - pTypeVar = inlineWhite >> TVar <$> pIdentifier0 InBlock Lowercase + pTypeName = do + inlineWhite + (cs, name) <- pIdentifier0 InBlock Don'tCare + case cs of + Uppercase -> return (TCon name) + Lowercase -> return (TVar name) -- | Parse the given name-like keyword, ensuring that it is the entire word. pKeyword :: String -> Parser () @@ -476,13 +501,20 @@ pKeySym s = do string s notFollowedBy (() <$ satisfy isSymbolChar) -data Case = Uppercase | Lowercase - deriving (Show) +data Case care where + Uppercase :: Case 'True + Lowercase :: Case 'True + Don'tCare :: Case 'False +deriving instance Show (Case care) + +type family WithCaseOutput care a where + WithCaseOutput 'True a = a + WithCaseOutput 'False a = (Case 'True, a) -- | Consumes an identifier (word or parenthesised operator) at the current -- position. The `var` production in Haskell2010. -- var -> varid | "(" varsym ")" -pIdentifier0 :: BlockPos -> Case -> Parser Name +pIdentifier0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name) pIdentifier0 bpos cs = pAlphaName0 bpos cs <|> pParens0 (pSymbol0 bpos cs) where -- | Parser between parens, with the opening paren at the current position. @@ -499,7 +531,7 @@ pIdentifier0 bpos cs = pAlphaName0 bpos cs <|> pParens0 (pSymbol0 bpos cs) -- `varid` production in Haskell2010 for 'Lowercase', `conid' for 'Uppercase'. -- -- > varid -> (small {small | large | digit | "'"}) without reservedid -pAlphaName0 :: BlockPos -> Case -> Parser Name +pAlphaName0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name) pAlphaName0 bpos cs = do (_, s) <- readToken bpos @@ -509,19 +541,25 @@ pAlphaName0 bpos cs = do (False, Just c) | isNameContChar c -> Just (Right False) (False, _ ) -> Just (Left ())) True - name <- case cs of - Uppercase | isLower (head s) -> do - raise Error "Unexpected uppercase word at this position, assuming typo" - return (toUpper (head s) : tail s) - Lowercase | isUpper (head s) -> do - raise Error "Unexpected lowercase word at this position, assuming typo" - return (toLower (head s) : tail s) - _ -> return s + (name, adjoin) <- case cs of + Uppercase + | isLower (head s) -> do + raise Error "Unexpected uppercase word at this position, assuming typo" + return (toUpper (head s) : tail s, id) + | otherwise -> return (s, id) + Lowercase + | isUpper (head s) -> do + raise Error "Unexpected lowercase word at this position, assuming typo" + return (toLower (head s) : tail s, id) + | otherwise -> return (s, id) + Don'tCare + | isLower (head s) -> return (s, (Lowercase,)) + | otherwise -> return (s, (Lowercase,)) guard (name `notElem` ["case", "class", "data", "default", "deriving", "do", "else" ,"foreign", "if", "import", "in", "infix", "infixl" ,"infixr", "instance", "let", "module", "newtype", "of" ,"then", "type", "where", "_"]) - return (Name name) + return (adjoin (Name name)) isNameHeadChar :: Char -> Bool isNameHeadChar c = isLetter c || c == '_' @@ -530,7 +568,7 @@ isNameContChar :: Char -> Bool isNameContChar c = isNameHeadChar c || isDigit c || c == '\'' -- | Consumes a symbol at the current position. The `varsym` production in --- Haskell2010 for 'Lowercase', `consym` otherwise. +-- Haskell2010 for 'Lowercase', `consym` otherwise, and either if 'Don'tCare'. -- -- > varsym -> ((symbol without ":") {symbol}) without (reservedop | dashes) -- > consym -> (":" {symbol}) without reservedop @@ -540,24 +578,28 @@ isNameContChar c = isNameHeadChar c || isDigit c || c == '\'' -- > dashes -> "--" {"-"} -- > special -> ```(),;[]`{}``` -- > reservedop -> ".." | ":" | "::" | "=" | "\" | "|" | "<-" | "->" | "@" | "~" | "=>" -pSymbol0 :: BlockPos -> Case -> Parser Name +pSymbol0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name) pSymbol0 bpos cs = do case bpos of AtLeft -> assertAtBlockLeft Fatal "Expected symbol, but found indented expression" InBlock -> assertInsideBlock Fatal "Expected symbol, but found end of indented expression" - name <- (:) <$> (case cs of Lowercase -> satisfy (\c -> isSymbolChar c && c /= ':') - Uppercase -> satisfy (== ':')) - <*> many (satisfy isSymbolChar) + (c1, adjoin) <- + case cs of Lowercase -> (,id) <$> satisfy (\c -> isSymbolChar c && c /= ':') + Uppercase -> (,id) <$> satisfy (== ':') + Don'tCare -> do c1 <- satisfy (\c -> isSymbolChar c) + return (c1, if c1 == ':' then (Uppercase,) else (Lowercase,)) + crest <- many (satisfy isSymbolChar) + let name = c1 : crest guard (name `notElem` ["..", ":", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]) guard (take 2 name /= "--") - return (Name name) + return (adjoin (Name name)) isSymbolChar :: Char -> Bool isSymbolChar c = (isAscSymbol || isUniSymbol) && not isSpecialExt where isSpecialExt = c `elem` "(),;[]`{}_\"'" isAscSymbol = c `elem` "!#$%&⋆+./<=>?@^|-~:" - isUniSymbol = isSymbol c || isPunctuation c + isUniSymbol = ord c > 127 && (isSymbol c || isPunctuation c) sepBy1 :: Parser a -> Parser sep -> Parser [a] @@ -650,10 +692,10 @@ skipWhiteComment = do _ <- many (blockComment >> inlineSpaces) optional_ lineComment optional_ (consumeNewline >> skipWhiteComment) - --- | Consumes some inline whitespace. Stops before newlines. -inlineSpaces :: Parser () -inlineSpaces = readWhileInline isSpace + where + -- | Consumes some inline whitespace. Stops before newlines. + inlineSpaces :: Parser () + inlineSpaces = readWhileInline isSpace -- | Consumes an delimited comment including both end markers. Note: this may -- end outside the current block. @@ -724,12 +766,12 @@ consumeNewline = gets psRest >>= \case -- | Consumes exactly one character, unequal to newline, at the current position. satisfy :: (Char -> Bool) -> Parser Char satisfy p = do - traceM "entering satisfy" + -- traceM "entering satisfy" r <- gets psRest - traceM "got rest" + -- traceM "got rest" r `seq` return () - traceM "seqd rest" - traceM ("rest is " ++ show r) + -- traceM "seqd rest" + -- traceM ("rest is " ++ show r) case r of c : rest | c /= '\n', p c -> do modify (\ps -> ps { psCol = psCol ps + 1 -- cgit v1.2.3-70-g09d2