aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--AST.hs4
-rw-r--r--Parser.hs192
2 files changed, 152 insertions, 44 deletions
diff --git a/AST.hs b/AST.hs
index f8f3624..6b327c5 100644
--- a/AST.hs
+++ b/AST.hs
@@ -11,10 +11,12 @@ newtype Name = Name String
deriving (Show)
data Type
- = TApp Name [Type]
+ = TApp Type [Type]
| TTup [Type]
| TList Type
| TFun Type Type
+ | TCon Name
+ | TVar Name
deriving (Show)
data FunEq t = FunEq Name [Pattern t] (RHS t)
diff --git a/Parser.hs b/Parser.hs
index aedd557..f525ba6 100644
--- a/Parser.hs
+++ b/Parser.hs
@@ -3,7 +3,7 @@
module Parser where
import Control.Applicative
-import Data.Char (isLower, isUpper, isLetter, isDigit, isSpace, toUpper, toLower)
+import Data.Char
import Control.Monad.Chronicle
import Control.Monad.Reader
import Control.Monad.State.Strict
@@ -37,11 +37,14 @@ printErrMsg :: ErrMsg -> String
printErrMsg (ErrMsg fp y x s) = fp ++ ":" ++ show (y + 1) ++ ":" ++ show (x + 1) ++ ": " ++ s
parse :: FilePath -> String -> These [ErrMsg] (Program ())
-parse fp source =
+parse = runParser pProgram
+
+runParser :: Parser a -> FilePath -> String -> These [ErrMsg] a
+runParser p fp source =
flip evalState (PS 0 0 0 source)
. runChronicleT
. flip runReaderT (Context fp)
- $ pProgram
+ $ p
pProgram :: Parser (Program ())
pProgram = do
@@ -53,19 +56,77 @@ pProgram = do
pFunDef :: Parser (FunDef ())
pFunDef = do
skipWhiteComment
+ mtypesig <- optional pStandaloneTypesig0
+ _
+
+pStandaloneTypesig0 :: Parser (Name, Type)
+pStandaloneTypesig0 = do
assertAtCol 0 Fatal "Expected top-level definition, found indented stuff"
withRefCol 0 $ do
- _
+ name <- pIdentifier0 Lowercase
+ inlineWhite
+ string "::"
+ ty <- pType
+ return (name, ty)
+
+pType :: Parser Type
+pType = do
+ ty1 <- pTypeApp
+ asum [do inlineWhite
+ string "->"
+ ty2 <- pType
+ return (TFun ty1 ty2)
+ ,return ty1]
+
+pTypeApp :: Parser Type
+pTypeApp = many pTypeAtom >>= \case
+ [] -> raise Error "Expected type" >> return (TTup [])
+ [t] -> return t
+ t:ts -> return (TApp t ts)
+
+pTypeAtom :: Parser Type
+pTypeAtom = pTypeParens <|> pTypeList <|> pTypeCon <|> pTypeVar
+ where
+ pTypeParens = do
+ inlineWhite
+ string "("
+ asum [do inlineWhite
+ string ")"
+ return (TTup [])
+ ,do ty1 <- pType
+ ty2s <- many $ do
+ inlineWhite
+ string ","
+ pType
+ inlineWhite
+ string ")"
+ case ty2s of
+ [] -> return ty1
+ _ -> return (TTup (ty1 : ty2s))]
+
+ pTypeList = do
+ inlineWhite
+ string "["
+ ty <- pType
+ string "]"
+ return (TList ty)
+
+ pTypeCon = inlineWhite >> TCon <$> pIdentifier0 Uppercase
+ pTypeVar = inlineWhite >> TVar <$> pIdentifier0 Lowercase
data Case = Uppercase | Lowercase
deriving (Show)
-- | Consumes an identifier (word or parenthesised operator) at the current
--- position.
+-- position. The `var` production in Haskell2010.
+-- var -> varid | "(" varsym ")"
pIdentifier0 :: Case -> Parser Name
pIdentifier0 cs = pAlphaName0 cs <|> pParens0 (pSymbol0 cs)
--- | Consumes a word-like name at the current position with the given case.
+-- | Consumes a word-like name at the current position with the given case. The
+-- `varid` production in Haskell2010 for 'Lowercase', `conid' for 'Uppercase'.
+--
+-- > varid -> (small {small | large | digit | "'"}) without reservedid
pAlphaName0 :: Case -> Parser Name
pAlphaName0 cs = do
(_, s) <- readInline (\case True -> \case Just c | isInitNameChar c -> Just (Right False)
@@ -73,22 +134,47 @@ pAlphaName0 cs = do
False -> \case Just c | isNameContChar c -> Just (Right False)
_ -> Just (Left ()))
True
- case cs of
+ name <- case cs of
Uppercase | isLower (head s) -> do
raise Error "Unexpected uppercase word at this position, assuming typo"
- return (Name (toUpper (head s) : tail s))
+ return (toUpper (head s) : tail s)
Lowercase | isUpper (head s) -> do
raise Error "Unexpected lowercase word at this position, assuming typo"
- return (Name (toLower (head s) : tail s))
- _ -> return (Name s)
+ return (toLower (head s) : tail s)
+ _ -> return s
+ guard (name `notElem` ["case", "class", "data", "default", "deriving", "do", "else"
+ ,"foreign", "if", "import", "in", "infix", "infixl"
+ ,"infixr", "instance", "let", "module", "newtype", "of"
+ ,"then", "type", "where", "_"])
+ return (Name name)
where
isInitNameChar, isNameContChar :: Char -> Bool
isInitNameChar c = isLetter c || c == '_'
isNameContChar c = isInitNameChar c || isDigit c || c == '\''
+-- | Consumes a symbol at the current position. The `varsym` production in
+-- Haskell2010 for 'Lowercase', `consym` otherwise.
+--
+-- > varsym -> ((symbol without ":") {symbol}) without (reservedop | dashes)
+-- > consym -> (":" {symbol}) without reservedop
+-- > symbol -> ascSymbol | uniSymbol without (special | "_" | "\"" | "'")
+-- > ascSymbol -> ```!#$%&⋆+./<=>?@^|-~:```
+-- > uniSymbol -> unicode symbol or punctuation
+-- > dashes -> "--" {"-"}
+-- > special -> ```(),;[]`{}```
+-- > reservedop -> ".." | ":" | "::" | "=" | "\" | "|" | "<-" | "->" | "@" | "~" | "=>"
pSymbol0 :: Case -> Parser Name
pSymbol0 cs = do
- _
+ let isSpecialExt c = c `elem` "(),;[]`{}_\"'"
+ isAscSymbol c = c `elem` "!#$%&⋆+./<=>?@^|-~:"
+ isUniSymbol c = isSymbol c || isPunctuation c
+ isSymbolChar c = (isAscSymbol c || isUniSymbol c) && not (isSpecialExt c)
+ name <- (:) <$> (case cs of Lowercase -> satisfy (\c -> isSymbolChar c && c /= ':')
+ Uppercase -> satisfy (== ':'))
+ <*> many (satisfy isSymbolChar)
+ guard (name `notElem` ["..", ":", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"])
+ guard (take 2 name /= "--")
+ return (Name name)
-- | Parser between parens, with the opening paren at the current position.
-- Enforces that all components are within the current indented block.
@@ -96,8 +182,11 @@ pParens0 :: Parser a -> Parser a
pParens0 p = do
string "("
skipWhiteComment
+ assertWithinBlock Error "Unexpected dedent after opening parenthesis"
res <- p
+ assertWithinBlock Error "Unexpected dedent in parenthesised expression"
skipWhiteComment
+ assertWithinBlock Error "Unexpected dedent in parenthesised expression"
string ")"
return res
@@ -166,43 +255,51 @@ readInline f s0 = do
fmap (c :) <$> loop f' st'
loop f s0
+-- | Consumes all whitespace and comments (including newlines), but only if
+-- this then leaves psCol > psRefCol. If not, this fails.
+inlineWhite :: Parser ()
+inlineWhite = do
+ skipWhiteComment
+ ps <- get
+ when (psCol ps <= psRefCol ps) empty
+
-- | Consumes all whitespace and comments (including newlines). Note: this may
--- leave psCol < psRefCol.
+-- leave psCol <= psRefCol.
skipWhiteComment :: Parser ()
skipWhiteComment = do
- inlineWhite
- _ <- many (inlineComment >> inlineWhite)
+ inlineSpaces
+ _ <- many (inlineComment >> inlineSpaces)
_ <- optional lineComment
(consumeNewline >> skipWhiteComment) <|> return ()
- where
- -- | Consumes some inline whitespace.
- inlineWhite :: Parser ()
- inlineWhite = readWhileInline isSpace
-
- -- | Consumes an inline comment including both end markers. Note: this may
- -- leave psCol < psRefCol.
- inlineComment :: Parser ()
- inlineComment = do
- string "{-"
- let loop = do
- readWhileInline (`notElem` "{-")
- asum [string "-}"
- ,inlineComment >> loop
- ,consumeNewline >> loop]
- loop
-
- -- | Consumes a line comment marker and the rest of the line, excluding
- -- newline.
- lineComment :: Parser ()
- lineComment = string "--" >> readWhileInline (const True)
-
- -- | Consumes characters while the predicate holds or until (and excluding)
- -- a newline, whichever comes first.
- readWhileInline :: (Char -> Bool) -> Parser ()
- readWhileInline p = do
- (taken, rest) <- span (\c -> p c && c /= '\n') <$> gets psRest
- modify (\ps -> ps { psCol = psCol ps + length taken
- , psRest = rest })
+
+-- | Consumes some inline whitespace.
+inlineSpaces :: Parser ()
+inlineSpaces = readWhileInline isSpace
+
+-- | Consumes an inline comment including both end markers. Note: this may
+-- leave psCol < psRefCol.
+inlineComment :: Parser ()
+inlineComment = do
+ string "{-"
+ let loop = do
+ readWhileInline (`notElem` "{-")
+ asum [string "-}"
+ ,inlineComment >> loop
+ ,consumeNewline >> loop]
+ loop
+
+-- | Consumes a line comment marker and the rest of the line, excluding
+-- newline.
+lineComment :: Parser ()
+lineComment = string "--" >> readWhileInline (const True)
+
+-- | Consumes characters while the predicate holds or until (and excluding)
+-- a newline, whichever comes first.
+readWhileInline :: (Char -> Bool) -> Parser ()
+readWhileInline p = do
+ (taken, rest) <- span (\c -> p c && c /= '\n') <$> gets psRest
+ modify (\ps -> ps { psCol = psCol ps + length taken
+ , psRest = rest })
-- | Consumes exactly one newline at the current position.
consumeNewline :: Parser ()
@@ -212,6 +309,15 @@ consumeNewline = gets psRest >>= \case
, psRest = rest })
_ -> empty
+-- | Consumes exactly one character, unequal to newline, at the current position.
+satisfy :: (Char -> Bool) -> Parser Char
+satisfy p = gets psRest >>= \case
+ c : rest | c /= '\n', p c -> do
+ modify (\ps -> ps { psCol = psCol ps + 1
+ , psRest = rest })
+ return c
+ _ -> empty
+
-- | Consumes exactly this string at the current position. The string must not
-- contain a newline.
string :: String -> Parser ()