diff options
author | Tom Smeding <tom@tomsmeding.com> | 2021-11-25 22:41:55 +0100 |
---|---|---|
committer | Tom Smeding <tom@tomsmeding.com> | 2021-11-25 22:41:55 +0100 |
commit | d17dc556c46a43dd7c35c6bfcc7c47a23ef0caeb (patch) | |
tree | 69d69100e4189bdf174a6bffd9df8efc53e42a4c /Parser.hs | |
parent | 7ebf27051c61f69d5c12a9350273df4ec20e3d86 (diff) |
Some work
Diffstat (limited to 'Parser.hs')
-rw-r--r-- | Parser.hs | 192 |
1 files changed, 149 insertions, 43 deletions
@@ -3,7 +3,7 @@ module Parser where import Control.Applicative -import Data.Char (isLower, isUpper, isLetter, isDigit, isSpace, toUpper, toLower) +import Data.Char import Control.Monad.Chronicle import Control.Monad.Reader import Control.Monad.State.Strict @@ -37,11 +37,14 @@ printErrMsg :: ErrMsg -> String printErrMsg (ErrMsg fp y x s) = fp ++ ":" ++ show (y + 1) ++ ":" ++ show (x + 1) ++ ": " ++ s parse :: FilePath -> String -> These [ErrMsg] (Program ()) -parse fp source = +parse = runParser pProgram + +runParser :: Parser a -> FilePath -> String -> These [ErrMsg] a +runParser p fp source = flip evalState (PS 0 0 0 source) . runChronicleT . flip runReaderT (Context fp) - $ pProgram + $ p pProgram :: Parser (Program ()) pProgram = do @@ -53,19 +56,77 @@ pProgram = do pFunDef :: Parser (FunDef ()) pFunDef = do skipWhiteComment + mtypesig <- optional pStandaloneTypesig0 + _ + +pStandaloneTypesig0 :: Parser (Name, Type) +pStandaloneTypesig0 = do assertAtCol 0 Fatal "Expected top-level definition, found indented stuff" withRefCol 0 $ do - _ + name <- pIdentifier0 Lowercase + inlineWhite + string "::" + ty <- pType + return (name, ty) + +pType :: Parser Type +pType = do + ty1 <- pTypeApp + asum [do inlineWhite + string "->" + ty2 <- pType + return (TFun ty1 ty2) + ,return ty1] + +pTypeApp :: Parser Type +pTypeApp = many pTypeAtom >>= \case + [] -> raise Error "Expected type" >> return (TTup []) + [t] -> return t + t:ts -> return (TApp t ts) + +pTypeAtom :: Parser Type +pTypeAtom = pTypeParens <|> pTypeList <|> pTypeCon <|> pTypeVar + where + pTypeParens = do + inlineWhite + string "(" + asum [do inlineWhite + string ")" + return (TTup []) + ,do ty1 <- pType + ty2s <- many $ do + inlineWhite + string "," + pType + inlineWhite + string ")" + case ty2s of + [] -> return ty1 + _ -> return (TTup (ty1 : ty2s))] + + pTypeList = do + inlineWhite + string "[" + ty <- pType + string "]" + return (TList ty) + + pTypeCon = inlineWhite >> TCon <$> pIdentifier0 Uppercase + pTypeVar = inlineWhite >> TVar <$> pIdentifier0 Lowercase data Case = Uppercase | Lowercase deriving (Show) -- | Consumes an identifier (word or parenthesised operator) at the current --- position. +-- position. The `var` production in Haskell2010. +-- var -> varid | "(" varsym ")" pIdentifier0 :: Case -> Parser Name pIdentifier0 cs = pAlphaName0 cs <|> pParens0 (pSymbol0 cs) --- | Consumes a word-like name at the current position with the given case. +-- | Consumes a word-like name at the current position with the given case. The +-- `varid` production in Haskell2010 for 'Lowercase', `conid' for 'Uppercase'. +-- +-- > varid -> (small {small | large | digit | "'"}) without reservedid pAlphaName0 :: Case -> Parser Name pAlphaName0 cs = do (_, s) <- readInline (\case True -> \case Just c | isInitNameChar c -> Just (Right False) @@ -73,22 +134,47 @@ pAlphaName0 cs = do False -> \case Just c | isNameContChar c -> Just (Right False) _ -> Just (Left ())) True - case cs of + name <- case cs of Uppercase | isLower (head s) -> do raise Error "Unexpected uppercase word at this position, assuming typo" - return (Name (toUpper (head s) : tail s)) + return (toUpper (head s) : tail s) Lowercase | isUpper (head s) -> do raise Error "Unexpected lowercase word at this position, assuming typo" - return (Name (toLower (head s) : tail s)) - _ -> return (Name s) + return (toLower (head s) : tail s) + _ -> return s + guard (name `notElem` ["case", "class", "data", "default", "deriving", "do", "else" + ,"foreign", "if", "import", "in", "infix", "infixl" + ,"infixr", "instance", "let", "module", "newtype", "of" + ,"then", "type", "where", "_"]) + return (Name name) where isInitNameChar, isNameContChar :: Char -> Bool isInitNameChar c = isLetter c || c == '_' isNameContChar c = isInitNameChar c || isDigit c || c == '\'' +-- | Consumes a symbol at the current position. The `varsym` production in +-- Haskell2010 for 'Lowercase', `consym` otherwise. +-- +-- > varsym -> ((symbol without ":") {symbol}) without (reservedop | dashes) +-- > consym -> (":" {symbol}) without reservedop +-- > symbol -> ascSymbol | uniSymbol without (special | "_" | "\"" | "'") +-- > ascSymbol -> ```!#$%&⋆+./<=>?@^|-~:``` +-- > uniSymbol -> unicode symbol or punctuation +-- > dashes -> "--" {"-"} +-- > special -> ```(),;[]`{}``` +-- > reservedop -> ".." | ":" | "::" | "=" | "\" | "|" | "<-" | "->" | "@" | "~" | "=>" pSymbol0 :: Case -> Parser Name pSymbol0 cs = do - _ + let isSpecialExt c = c `elem` "(),;[]`{}_\"'" + isAscSymbol c = c `elem` "!#$%&⋆+./<=>?@^|-~:" + isUniSymbol c = isSymbol c || isPunctuation c + isSymbolChar c = (isAscSymbol c || isUniSymbol c) && not (isSpecialExt c) + name <- (:) <$> (case cs of Lowercase -> satisfy (\c -> isSymbolChar c && c /= ':') + Uppercase -> satisfy (== ':')) + <*> many (satisfy isSymbolChar) + guard (name `notElem` ["..", ":", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]) + guard (take 2 name /= "--") + return (Name name) -- | Parser between parens, with the opening paren at the current position. -- Enforces that all components are within the current indented block. @@ -96,8 +182,11 @@ pParens0 :: Parser a -> Parser a pParens0 p = do string "(" skipWhiteComment + assertWithinBlock Error "Unexpected dedent after opening parenthesis" res <- p + assertWithinBlock Error "Unexpected dedent in parenthesised expression" skipWhiteComment + assertWithinBlock Error "Unexpected dedent in parenthesised expression" string ")" return res @@ -166,43 +255,51 @@ readInline f s0 = do fmap (c :) <$> loop f' st' loop f s0 +-- | Consumes all whitespace and comments (including newlines), but only if +-- this then leaves psCol > psRefCol. If not, this fails. +inlineWhite :: Parser () +inlineWhite = do + skipWhiteComment + ps <- get + when (psCol ps <= psRefCol ps) empty + -- | Consumes all whitespace and comments (including newlines). Note: this may --- leave psCol < psRefCol. +-- leave psCol <= psRefCol. skipWhiteComment :: Parser () skipWhiteComment = do - inlineWhite - _ <- many (inlineComment >> inlineWhite) + inlineSpaces + _ <- many (inlineComment >> inlineSpaces) _ <- optional lineComment (consumeNewline >> skipWhiteComment) <|> return () - where - -- | Consumes some inline whitespace. - inlineWhite :: Parser () - inlineWhite = readWhileInline isSpace - - -- | Consumes an inline comment including both end markers. Note: this may - -- leave psCol < psRefCol. - inlineComment :: Parser () - inlineComment = do - string "{-" - let loop = do - readWhileInline (`notElem` "{-") - asum [string "-}" - ,inlineComment >> loop - ,consumeNewline >> loop] - loop - - -- | Consumes a line comment marker and the rest of the line, excluding - -- newline. - lineComment :: Parser () - lineComment = string "--" >> readWhileInline (const True) - - -- | Consumes characters while the predicate holds or until (and excluding) - -- a newline, whichever comes first. - readWhileInline :: (Char -> Bool) -> Parser () - readWhileInline p = do - (taken, rest) <- span (\c -> p c && c /= '\n') <$> gets psRest - modify (\ps -> ps { psCol = psCol ps + length taken - , psRest = rest }) + +-- | Consumes some inline whitespace. +inlineSpaces :: Parser () +inlineSpaces = readWhileInline isSpace + +-- | Consumes an inline comment including both end markers. Note: this may +-- leave psCol < psRefCol. +inlineComment :: Parser () +inlineComment = do + string "{-" + let loop = do + readWhileInline (`notElem` "{-") + asum [string "-}" + ,inlineComment >> loop + ,consumeNewline >> loop] + loop + +-- | Consumes a line comment marker and the rest of the line, excluding +-- newline. +lineComment :: Parser () +lineComment = string "--" >> readWhileInline (const True) + +-- | Consumes characters while the predicate holds or until (and excluding) +-- a newline, whichever comes first. +readWhileInline :: (Char -> Bool) -> Parser () +readWhileInline p = do + (taken, rest) <- span (\c -> p c && c /= '\n') <$> gets psRest + modify (\ps -> ps { psCol = psCol ps + length taken + , psRest = rest }) -- | Consumes exactly one newline at the current position. consumeNewline :: Parser () @@ -212,6 +309,15 @@ consumeNewline = gets psRest >>= \case , psRest = rest }) _ -> empty +-- | Consumes exactly one character, unequal to newline, at the current position. +satisfy :: (Char -> Bool) -> Parser Char +satisfy p = gets psRest >>= \case + c : rest | c /= '\n', p c -> do + modify (\ps -> ps { psCol = psCol ps + 1 + , psRest = rest }) + return c + _ -> empty + -- | Consumes exactly this string at the current position. The string must not -- contain a newline. string :: String -> Parser () |