aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Smeding <tom@tomsmeding.com>2024-02-13 22:51:57 +0100
committerTom Smeding <tom@tomsmeding.com>2024-02-13 22:51:57 +0100
commit3ef786673ff8298124cd3b5ef50c35dbb23f77e2 (patch)
treee7c0cb4ac969dff2b8f2030f76df4bf0723294b1
parentf8ef28316dd8adeaf8d4d3e0f6e310e26ac19028 (diff)
Parse basic ADTs, and fix bugs
-rw-r--r--AST.hs5
-rw-r--r--Parser.hs120
-rw-r--r--examples/test1.hs8
3 files changed, 93 insertions, 40 deletions
diff --git a/AST.hs b/AST.hs
index 2e1bb13..47652b6 100644
--- a/AST.hs
+++ b/AST.hs
@@ -6,7 +6,10 @@ import Data.List.NonEmpty (NonEmpty)
newtype Name = Name String
deriving (Show, Eq)
-data Program t = Program [FunDef t]
+data Program t = Program [DataDef] [FunDef t]
+ deriving (Show)
+
+data DataDef = DataDef Name [Name] [(Name, [Type])]
deriving (Show)
data FunDef t = FunDef Name (Maybe Type) (NonEmpty (FunEq t))
diff --git a/Parser.hs b/Parser.hs
index b31ffa1..0f0bd0c 100644
--- a/Parser.hs
+++ b/Parser.hs
@@ -25,11 +25,12 @@ import Control.Monad.Reader
import Control.Monad.State.Lazy
import Data.Bifunctor (first)
import Data.Char
+import Data.Either (partitionEithers)
import Data.List.NonEmpty (NonEmpty(..))
import Data.These
import Data.Tuple (swap)
-import Debug.Trace
+-- import Debug.Trace
import AST
@@ -80,7 +81,7 @@ instance Monad Parser where
These errs' res -> These (errs <> errs') res
instance Alternative Parser where
- empty = Parser (\_ _ -> This mempty)
+ empty = Parser (\_ _ -> This [])
Parser f <|> Parser g = Parser $ \ctx ps ->
case f ctx ps of
This _ -> g ctx ps
@@ -128,15 +129,35 @@ parse fp source = fmap snd $ runParser pProgram (Context fp []) (PS 0 0 0 0 sour
pProgram :: Parser (Program ())
pProgram = do
- prog <- Program <$> many pFunDef
+ defs <- many pTopDef
+ let (datadefs, fundefs) = partitionEithers defs
skipWhiteComment
assertEOF Error
- return prog
+ return (Program datadefs fundefs)
-pFunDef :: Parser (FunDef ())
-pFunDef = do
+pTopDef :: Parser (Either DataDef (FunDef ()))
+pTopDef = do
skipWhiteComment
- pFunDef0
+ Left <$> pDataDef0 <|> Right <$> pFunDef0
+
+pDataDef0 :: Parser DataDef
+pDataDef0 = do
+ pKeyword "data"
+ inlineWhite
+ name <- pIdentifier0 InBlock Uppercase
+ params <- many (inlineWhite >> pIdentifier0 InBlock Lowercase)
+ cons <- pDatacons "="
+ return (DataDef name params cons)
+ where
+ pDatacons :: String -> Parser [(Name, [Type])]
+ pDatacons leader = do
+ inlineWhite
+ pKeySym leader
+ inlineWhite
+ name <- pIdentifier0 InBlock Uppercase
+ fields <- many pTypeAtom
+ rest <- pDatacons "|" <|> return []
+ return ((name, fields) : rest)
pFunDef0 :: Parser (FunDef ())
pFunDef0 = do
@@ -430,12 +451,12 @@ pType = do
pTypeApp :: Parser Type
pTypeApp = many pTypeAtom >>= \case
- [] -> raise Error "Expected type" >> return (TTup [])
+ [] -> raise Fatal "Expected type"
[t] -> return t
t:ts -> return (TApp t ts)
pTypeAtom :: Parser Type
-pTypeAtom = pTypeParens <|> pTypeList <|> pTypeCon <|> pTypeVar
+pTypeAtom = pTypeParens <|> pTypeList <|> pTypeName
where
pTypeParens = do
inlineWhite
@@ -461,8 +482,12 @@ pTypeAtom = pTypeParens <|> pTypeList <|> pTypeCon <|> pTypeVar
char ']'
return (TList ty)
- pTypeCon = inlineWhite >> TCon <$> pIdentifier0 InBlock Uppercase
- pTypeVar = inlineWhite >> TVar <$> pIdentifier0 InBlock Lowercase
+ pTypeName = do
+ inlineWhite
+ (cs, name) <- pIdentifier0 InBlock Don'tCare
+ case cs of
+ Uppercase -> return (TCon name)
+ Lowercase -> return (TVar name)
-- | Parse the given name-like keyword, ensuring that it is the entire word.
pKeyword :: String -> Parser ()
@@ -476,13 +501,20 @@ pKeySym s = do
string s
notFollowedBy (() <$ satisfy isSymbolChar)
-data Case = Uppercase | Lowercase
- deriving (Show)
+data Case care where
+ Uppercase :: Case 'True
+ Lowercase :: Case 'True
+ Don'tCare :: Case 'False
+deriving instance Show (Case care)
+
+type family WithCaseOutput care a where
+ WithCaseOutput 'True a = a
+ WithCaseOutput 'False a = (Case 'True, a)
-- | Consumes an identifier (word or parenthesised operator) at the current
-- position. The `var` production in Haskell2010.
-- var -> varid | "(" varsym ")"
-pIdentifier0 :: BlockPos -> Case -> Parser Name
+pIdentifier0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name)
pIdentifier0 bpos cs = pAlphaName0 bpos cs <|> pParens0 (pSymbol0 bpos cs)
where
-- | Parser between parens, with the opening paren at the current position.
@@ -499,7 +531,7 @@ pIdentifier0 bpos cs = pAlphaName0 bpos cs <|> pParens0 (pSymbol0 bpos cs)
-- `varid` production in Haskell2010 for 'Lowercase', `conid' for 'Uppercase'.
--
-- > varid -> (small {small | large | digit | "'"}) without reservedid
-pAlphaName0 :: BlockPos -> Case -> Parser Name
+pAlphaName0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name)
pAlphaName0 bpos cs = do
(_, s) <- readToken
bpos
@@ -509,19 +541,25 @@ pAlphaName0 bpos cs = do
(False, Just c) | isNameContChar c -> Just (Right False)
(False, _ ) -> Just (Left ()))
True
- name <- case cs of
- Uppercase | isLower (head s) -> do
- raise Error "Unexpected uppercase word at this position, assuming typo"
- return (toUpper (head s) : tail s)
- Lowercase | isUpper (head s) -> do
- raise Error "Unexpected lowercase word at this position, assuming typo"
- return (toLower (head s) : tail s)
- _ -> return s
+ (name, adjoin) <- case cs of
+ Uppercase
+ | isLower (head s) -> do
+ raise Error "Unexpected uppercase word at this position, assuming typo"
+ return (toUpper (head s) : tail s, id)
+ | otherwise -> return (s, id)
+ Lowercase
+ | isUpper (head s) -> do
+ raise Error "Unexpected lowercase word at this position, assuming typo"
+ return (toLower (head s) : tail s, id)
+ | otherwise -> return (s, id)
+ Don'tCare
+ | isLower (head s) -> return (s, (Lowercase,))
+ | otherwise -> return (s, (Lowercase,))
guard (name `notElem` ["case", "class", "data", "default", "deriving", "do", "else"
,"foreign", "if", "import", "in", "infix", "infixl"
,"infixr", "instance", "let", "module", "newtype", "of"
,"then", "type", "where", "_"])
- return (Name name)
+ return (adjoin (Name name))
isNameHeadChar :: Char -> Bool
isNameHeadChar c = isLetter c || c == '_'
@@ -530,7 +568,7 @@ isNameContChar :: Char -> Bool
isNameContChar c = isNameHeadChar c || isDigit c || c == '\''
-- | Consumes a symbol at the current position. The `varsym` production in
--- Haskell2010 for 'Lowercase', `consym` otherwise.
+-- Haskell2010 for 'Lowercase', `consym` otherwise, and either if 'Don'tCare'.
--
-- > varsym -> ((symbol without ":") {symbol}) without (reservedop | dashes)
-- > consym -> (":" {symbol}) without reservedop
@@ -540,24 +578,28 @@ isNameContChar c = isNameHeadChar c || isDigit c || c == '\''
-- > dashes -> "--" {"-"}
-- > special -> ```(),;[]`{}```
-- > reservedop -> ".." | ":" | "::" | "=" | "\" | "|" | "<-" | "->" | "@" | "~" | "=>"
-pSymbol0 :: BlockPos -> Case -> Parser Name
+pSymbol0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name)
pSymbol0 bpos cs = do
case bpos of
AtLeft -> assertAtBlockLeft Fatal "Expected symbol, but found indented expression"
InBlock -> assertInsideBlock Fatal "Expected symbol, but found end of indented expression"
- name <- (:) <$> (case cs of Lowercase -> satisfy (\c -> isSymbolChar c && c /= ':')
- Uppercase -> satisfy (== ':'))
- <*> many (satisfy isSymbolChar)
+ (c1, adjoin) <-
+ case cs of Lowercase -> (,id) <$> satisfy (\c -> isSymbolChar c && c /= ':')
+ Uppercase -> (,id) <$> satisfy (== ':')
+ Don'tCare -> do c1 <- satisfy (\c -> isSymbolChar c)
+ return (c1, if c1 == ':' then (Uppercase,) else (Lowercase,))
+ crest <- many (satisfy isSymbolChar)
+ let name = c1 : crest
guard (name `notElem` ["..", ":", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"])
guard (take 2 name /= "--")
- return (Name name)
+ return (adjoin (Name name))
isSymbolChar :: Char -> Bool
isSymbolChar c = (isAscSymbol || isUniSymbol) && not isSpecialExt
where
isSpecialExt = c `elem` "(),;[]`{}_\"'"
isAscSymbol = c `elem` "!#$%&⋆+./<=>?@^|-~:"
- isUniSymbol = isSymbol c || isPunctuation c
+ isUniSymbol = ord c > 127 && (isSymbol c || isPunctuation c)
sepBy1 :: Parser a -> Parser sep -> Parser [a]
@@ -650,10 +692,10 @@ skipWhiteComment = do
_ <- many (blockComment >> inlineSpaces)
optional_ lineComment
optional_ (consumeNewline >> skipWhiteComment)
-
--- | Consumes some inline whitespace. Stops before newlines.
-inlineSpaces :: Parser ()
-inlineSpaces = readWhileInline isSpace
+ where
+ -- | Consumes some inline whitespace. Stops before newlines.
+ inlineSpaces :: Parser ()
+ inlineSpaces = readWhileInline isSpace
-- | Consumes an delimited comment including both end markers. Note: this may
-- end outside the current block.
@@ -724,12 +766,12 @@ consumeNewline = gets psRest >>= \case
-- | Consumes exactly one character, unequal to newline, at the current position.
satisfy :: (Char -> Bool) -> Parser Char
satisfy p = do
- traceM "entering satisfy"
+ -- traceM "entering satisfy"
r <- gets psRest
- traceM "got rest"
+ -- traceM "got rest"
r `seq` return ()
- traceM "seqd rest"
- traceM ("rest is " ++ show r)
+ -- traceM "seqd rest"
+ -- traceM ("rest is " ++ show r)
case r of
c : rest | c /= '\n', p c -> do
modify (\ps -> ps { psCol = psCol ps + 1
diff --git a/examples/test1.hs b/examples/test1.hs
new file mode 100644
index 0000000..083a876
--- /dev/null
+++ b/examples/test1.hs
@@ -0,0 +1,8 @@
+data Tree a
+ = Node (Tree a) a (Tree a)
+ | Leaf
+
+foo = 1
+bar = 1 + 2
+f x = x * 3
+g y = f (y - 2) + 7