2 files changed, 149 insertions, 41 deletions
diff --git a/AST.hs b/AST.hs
index f8f3624..6b327c5 100644
--- a/AST.hs
+++ b/AST.hs
@@ -11,10 +11,12 @@ newtype Name = Name String
   deriving (Show)
 
 data Type
-    = TApp Name [Type]
+    = TApp Type [Type]
     | TTup [Type]
     | TList Type
     | TFun Type Type
+    | TCon Name
+    | TVar Name
   deriving (Show)
 
 data FunEq t = FunEq Name [Pattern t] (RHS t)
diff --git a/Parser.hs b/Parser.hs
index aedd557..f525ba6 100644
--- a/Parser.hs
+++ b/Parser.hs
@@ -3,7 +3,7 @@
 module Parser where
 
 import Control.Applicative
-import Data.Char (isLower, isUpper, isLetter, isDigit, isSpace, toUpper, toLower)
+import Data.Char
 import Control.Monad.Chronicle
 import Control.Monad.Reader
 import Control.Monad.State.Strict
@@ -37,11 +37,14 @@ printErrMsg :: ErrMsg -> String
 printErrMsg (ErrMsg fp y x s) = fp ++ ":" ++ show (y + 1) ++ ":" ++ show (x + 1) ++ ": " ++ s
 
 parse :: FilePath -> String -> These [ErrMsg] (Program ())
-parse fp source =
+parse = runParser pProgram
+
+runParser :: Parser a -> FilePath -> String -> These [ErrMsg] a
+runParser p fp source = 
     flip evalState (PS 0 0 0 source)
     . runChronicleT
     . flip runReaderT (Context fp)
-    $ pProgram
+    $ p
                             
 pProgram :: Parser (Program ())
 pProgram = do
@@ -53,19 +56,77 @@ pProgram = do
 pFunDef :: Parser (FunDef ())
 pFunDef = do
     skipWhiteComment
+    mtypesig <- optional pStandaloneTypesig0
+    _
+
+pStandaloneTypesig0 :: Parser (Name, Type)
+pStandaloneTypesig0 = do
     assertAtCol 0 Fatal "Expected top-level definition, found indented stuff"
     withRefCol 0 $ do
-        _
+        name <- pIdentifier0 Lowercase
+        inlineWhite
+        string "::"
+        ty <- pType
+        return (name, ty)
+
+pType :: Parser Type
+pType = do
+    ty1 <- pTypeApp
+    asum [do inlineWhite
+             string "->"
+             ty2 <- pType
+             return (TFun ty1 ty2)
+         ,return ty1]
+
+pTypeApp :: Parser Type
+pTypeApp = many pTypeAtom >>= \case
+    [] -> raise Error "Expected type" >> return (TTup [])
+    [t] -> return t
+    t:ts -> return (TApp t ts)
+
+pTypeAtom :: Parser Type
+pTypeAtom = pTypeParens <|> pTypeList <|> pTypeCon <|> pTypeVar
+  where
+    pTypeParens = do
+        inlineWhite
+        string "("
+        asum [do inlineWhite
+                 string ")"
+                 return (TTup [])
+             ,do ty1 <- pType
+                 ty2s <- many $ do
+                     inlineWhite
+                     string ","
+                     pType
+                 inlineWhite
+                 string ")"
+                 case ty2s of
+                   [] -> return ty1
+                   _ -> return (TTup (ty1 : ty2s))]
+
+    pTypeList = do
+        inlineWhite
+        string "["
+        ty <- pType
+        string "]"
+        return (TList ty)
+
+    pTypeCon = inlineWhite >> TCon <$> pIdentifier0 Uppercase
+    pTypeVar = inlineWhite >> TVar <$> pIdentifier0 Lowercase
 
 data Case = Uppercase | Lowercase
   deriving (Show)
 
 -- | Consumes an identifier (word or parenthesised operator) at the current
--- position.
+-- position. The `var` production in Haskell2010.
+-- var -> varid | "(" varsym ")"
 pIdentifier0 :: Case -> Parser Name
 pIdentifier0 cs = pAlphaName0 cs <|> pParens0 (pSymbol0 cs)
 
--- | Consumes a word-like name at the current position with the given case.
+-- | Consumes a word-like name at the current position with the given case. The
+-- `varid` production in Haskell2010 for 'Lowercase', `conid' for 'Uppercase'.
+--
+-- > varid -> (small {small | large | digit | "'"}) without reservedid
 pAlphaName0 :: Case -> Parser Name
 pAlphaName0 cs = do
     (_, s) <- readInline (\case True -> \case Just c | isInitNameChar c -> Just (Right False)
@@ -73,22 +134,47 @@ pAlphaName0 cs = do
                                 False -> \case Just c | isNameContChar c -> Just (Right False)
                                                _ -> Just (Left ()))
                          True
-    case cs of
+    name <- case cs of
       Uppercase | isLower (head s) -> do
           raise Error "Unexpected uppercase word at this position, assuming typo"
-          return (Name (toUpper (head s) : tail s))
+          return (toUpper (head s) : tail s)
       Lowercase | isUpper (head s) -> do
           raise Error "Unexpected lowercase word at this position, assuming typo"
-          return (Name (toLower (head s) : tail s))
-      _ -> return (Name s)
+          return (toLower (head s) : tail s)
+      _ -> return s
+    guard (name `notElem` ["case", "class", "data", "default", "deriving", "do", "else"
+                          ,"foreign", "if", "import", "in", "infix", "infixl"
+                          ,"infixr", "instance", "let", "module", "newtype", "of"
+                          ,"then", "type", "where", "_"])
+    return (Name name)
   where
     isInitNameChar, isNameContChar :: Char -> Bool
     isInitNameChar c = isLetter c || c == '_'
     isNameContChar c = isInitNameChar c || isDigit c || c == '\''
 
+-- | Consumes a symbol at the current position. The `varsym` production in
+-- Haskell2010 for 'Lowercase', `consym` otherwise.
+--
+-- > varsym -> ((symbol without ":") {symbol}) without (reservedop | dashes)
+-- > consym -> (":" {symbol}) without reservedop
+-- > symbol -> ascSymbol | uniSymbol without (special | "_" | "\"" | "'")
+-- > ascSymbol -> ```!#$%&⋆+./<=>?@^|-~:```
+-- > uniSymbol -> unicode symbol or punctuation
+-- > dashes -> "--" {"-"}
+-- > special -> ```(),;[]`{}```
+-- > reservedop -> ".." | ":" | "::" | "=" | "\" | "|" | "<-" | "->" | "@" | "~" | "=>"
 pSymbol0 :: Case -> Parser Name
 pSymbol0 cs = do
-    _
+    let isSpecialExt c = c `elem` "(),;[]`{}_\"'"
+        isAscSymbol c = c `elem` "!#$%&⋆+./<=>?@^|-~:"
+        isUniSymbol c = isSymbol c || isPunctuation c
+        isSymbolChar c = (isAscSymbol c || isUniSymbol c) && not (isSpecialExt c)
+    name <- (:) <$> (case cs of Lowercase -> satisfy (\c -> isSymbolChar c && c /= ':')
+                                Uppercase -> satisfy (== ':'))
+                <*> many (satisfy isSymbolChar)
+    guard (name `notElem` ["..", ":", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"])
+    guard (take 2 name /= "--")
+    return (Name name)
 
 -- | Parser between parens, with the opening paren at the current position.
 -- Enforces that all components are within the current indented block.
@@ -96,8 +182,11 @@ pParens0 :: Parser a -> Parser a
 pParens0 p = do
     string "("
     skipWhiteComment
+    assertWithinBlock Error "Unexpected dedent after opening parenthesis"
     res <- p
+    assertWithinBlock Error "Unexpected dedent in parenthesised expression"
     skipWhiteComment
+    assertWithinBlock Error "Unexpected dedent in parenthesised expression"
     string ")"
     return res
 
@@ -166,43 +255,51 @@ readInline f s0 = do
                               fmap (c :) <$> loop f' st'
     loop f s0
 
+-- | Consumes all whitespace and comments (including newlines), but only if
+-- this then leaves psCol > psRefCol. If not, this fails.
+inlineWhite :: Parser ()
+inlineWhite = do
+    skipWhiteComment
+    ps <- get
+    when (psCol ps <= psRefCol ps) empty
+
 -- | Consumes all whitespace and comments (including newlines). Note: this may
--- leave psCol < psRefCol.
+-- leave psCol <= psRefCol.
 skipWhiteComment :: Parser ()
 skipWhiteComment = do
-    inlineWhite
-    _ <- many (inlineComment >> inlineWhite)
+    inlineSpaces
+    _ <- many (inlineComment >> inlineSpaces)
     _ <- optional lineComment
     (consumeNewline >> skipWhiteComment) <|> return ()
-  where
-    -- | Consumes some inline whitespace.
-    inlineWhite :: Parser ()
-    inlineWhite = readWhileInline isSpace
 
-    -- | Consumes an inline comment including both end markers. Note: this may
-    -- leave psCol < psRefCol.
-    inlineComment :: Parser ()
-    inlineComment = do
-        string "{-"
-        let loop = do
-                readWhileInline (`notElem` "{-")
-                asum [string "-}"
-                     ,inlineComment >> loop
-                     ,consumeNewline >> loop]
-        loop
+-- | Consumes some inline whitespace.
+inlineSpaces :: Parser ()
+inlineSpaces = readWhileInline isSpace
+
+-- | Consumes an inline comment including both end markers. Note: this may
+-- leave psCol < psRefCol.
+inlineComment :: Parser ()
+inlineComment = do
+    string "{-"
+    let loop = do
+            readWhileInline (`notElem` "{-")
+            asum [string "-}"
+                 ,inlineComment >> loop
+                 ,consumeNewline >> loop]
+    loop
 
-    -- | Consumes a line comment marker and the rest of the line, excluding
-    -- newline.
-    lineComment :: Parser ()
-    lineComment = string "--" >> readWhileInline (const True)
+-- | Consumes a line comment marker and the rest of the line, excluding
+-- newline.
+lineComment :: Parser ()
+lineComment = string "--" >> readWhileInline (const True)
 
-    -- | Consumes characters while the predicate holds or until (and excluding)
-    -- a newline, whichever comes first.
-    readWhileInline :: (Char -> Bool) -> Parser ()
-    readWhileInline p = do
-        (taken, rest) <- span (\c -> p c && c /= '\n') <$> gets psRest
-        modify (\ps -> ps { psCol = psCol ps + length taken
-                          , psRest = rest })
+-- | Consumes characters while the predicate holds or until (and excluding)
+-- a newline, whichever comes first.
+readWhileInline :: (Char -> Bool) -> Parser ()
+readWhileInline p = do
+    (taken, rest) <- span (\c -> p c && c /= '\n') <$> gets psRest
+    modify (\ps -> ps { psCol = psCol ps + length taken
+                      , psRest = rest })
 
 -- | Consumes exactly one newline at the current position.
 consumeNewline :: Parser ()
@@ -212,6 +309,15 @@ consumeNewline = gets psRest >>= \case
                                      , psRest = rest })
     _ -> empty
 
+-- | Consumes exactly one character, unequal to newline, at the current position.
+satisfy :: (Char -> Bool) -> Parser Char
+satisfy p = gets psRest >>= \case
+    c : rest | c /= '\n', p c -> do
+        modify (\ps -> ps { psCol = psCol ps + 1
+                          , psRest = rest })
+        return c
+    _ -> empty
+
 -- | Consumes exactly this string at the current position. The string must not
 -- contain a newline.
 string :: String -> Parser ()