diff options
| -rw-r--r-- | AST.hs | 4 | ||||
| -rw-r--r-- | Parser.hs | 186 | 
2 files changed, 149 insertions, 41 deletions
@@ -11,10 +11,12 @@ newtype Name = Name String    deriving (Show)  data Type -    = TApp Name [Type] +    = TApp Type [Type]      | TTup [Type]      | TList Type      | TFun Type Type +    | TCon Name +    | TVar Name    deriving (Show)  data FunEq t = FunEq Name [Pattern t] (RHS t) @@ -3,7 +3,7 @@  module Parser where  import Control.Applicative -import Data.Char (isLower, isUpper, isLetter, isDigit, isSpace, toUpper, toLower) +import Data.Char  import Control.Monad.Chronicle  import Control.Monad.Reader  import Control.Monad.State.Strict @@ -37,11 +37,14 @@ printErrMsg :: ErrMsg -> String  printErrMsg (ErrMsg fp y x s) = fp ++ ":" ++ show (y + 1) ++ ":" ++ show (x + 1) ++ ": " ++ s  parse :: FilePath -> String -> These [ErrMsg] (Program ()) -parse fp source = +parse = runParser pProgram + +runParser :: Parser a -> FilePath -> String -> These [ErrMsg] a +runParser p fp source =       flip evalState (PS 0 0 0 source)      . runChronicleT      . flip runReaderT (Context fp) -    $ pProgram +    $ p  pProgram :: Parser (Program ())  pProgram = do @@ -53,19 +56,77 @@ pProgram = do  pFunDef :: Parser (FunDef ())  pFunDef = do      skipWhiteComment +    mtypesig <- optional pStandaloneTypesig0 +    _ + +pStandaloneTypesig0 :: Parser (Name, Type) +pStandaloneTypesig0 = do      assertAtCol 0 Fatal "Expected top-level definition, found indented stuff"      withRefCol 0 $ do -        _ +        name <- pIdentifier0 Lowercase +        inlineWhite +        string "::" +        ty <- pType +        return (name, ty) + +pType :: Parser Type +pType = do +    ty1 <- pTypeApp +    asum [do inlineWhite +             string "->" +             ty2 <- pType +             return (TFun ty1 ty2) +         ,return ty1] + +pTypeApp :: Parser Type +pTypeApp = many pTypeAtom >>= \case +    [] -> raise Error "Expected type" >> return (TTup []) +    [t] -> return t +    t:ts -> return (TApp t ts) + +pTypeAtom :: Parser Type +pTypeAtom = pTypeParens <|> pTypeList <|> pTypeCon <|> pTypeVar +  where +    pTypeParens = do +        inlineWhite +        string "(" +        asum [do inlineWhite +                 string ")" +                 return (TTup []) +             ,do ty1 <- pType +                 ty2s <- many $ do +                     inlineWhite +                     string "," +                     pType +                 inlineWhite +                 string ")" +                 case ty2s of +                   [] -> return ty1 +                   _ -> return (TTup (ty1 : ty2s))] + +    pTypeList = do +        inlineWhite +        string "[" +        ty <- pType +        string "]" +        return (TList ty) + +    pTypeCon = inlineWhite >> TCon <$> pIdentifier0 Uppercase +    pTypeVar = inlineWhite >> TVar <$> pIdentifier0 Lowercase  data Case = Uppercase | Lowercase    deriving (Show)  -- | Consumes an identifier (word or parenthesised operator) at the current --- position. +-- position. The `var` production in Haskell2010. +-- var -> varid | "(" varsym ")"  pIdentifier0 :: Case -> Parser Name  pIdentifier0 cs = pAlphaName0 cs <|> pParens0 (pSymbol0 cs) --- | Consumes a word-like name at the current position with the given case. +-- | Consumes a word-like name at the current position with the given case. The +-- `varid` production in Haskell2010 for 'Lowercase', `conid' for 'Uppercase'. +-- +-- > varid -> (small {small | large | digit | "'"}) without reservedid  pAlphaName0 :: Case -> Parser Name  pAlphaName0 cs = do      (_, s) <- readInline (\case True -> \case Just c | isInitNameChar c -> Just (Right False) @@ -73,22 +134,47 @@ pAlphaName0 cs = do                                  False -> \case Just c | isNameContChar c -> Just (Right False)                                                 _ -> Just (Left ()))                           True -    case cs of +    name <- case cs of        Uppercase | isLower (head s) -> do            raise Error "Unexpected uppercase word at this position, assuming typo" -          return (Name (toUpper (head s) : tail s)) +          return (toUpper (head s) : tail s)        Lowercase | isUpper (head s) -> do            raise Error "Unexpected lowercase word at this position, assuming typo" -          return (Name (toLower (head s) : tail s)) -      _ -> return (Name s) +          return (toLower (head s) : tail s) +      _ -> return s +    guard (name `notElem` ["case", "class", "data", "default", "deriving", "do", "else" +                          ,"foreign", "if", "import", "in", "infix", "infixl" +                          ,"infixr", "instance", "let", "module", "newtype", "of" +                          ,"then", "type", "where", "_"]) +    return (Name name)    where      isInitNameChar, isNameContChar :: Char -> Bool      isInitNameChar c = isLetter c || c == '_'      isNameContChar c = isInitNameChar c || isDigit c || c == '\'' +-- | Consumes a symbol at the current position. The `varsym` production in +-- Haskell2010 for 'Lowercase', `consym` otherwise. +-- +-- > varsym -> ((symbol without ":") {symbol}) without (reservedop | dashes) +-- > consym -> (":" {symbol}) without reservedop +-- > symbol -> ascSymbol | uniSymbol without (special | "_" | "\"" | "'") +-- > ascSymbol -> ```!#$%&⋆+./<=>?@^|-~:``` +-- > uniSymbol -> unicode symbol or punctuation +-- > dashes -> "--" {"-"} +-- > special -> ```(),;[]`{}``` +-- > reservedop -> ".." | ":" | "::" | "=" | "\" | "|" | "<-" | "->" | "@" | "~" | "=>"  pSymbol0 :: Case -> Parser Name  pSymbol0 cs = do -    _ +    let isSpecialExt c = c `elem` "(),;[]`{}_\"'" +        isAscSymbol c = c `elem` "!#$%&⋆+./<=>?@^|-~:" +        isUniSymbol c = isSymbol c || isPunctuation c +        isSymbolChar c = (isAscSymbol c || isUniSymbol c) && not (isSpecialExt c) +    name <- (:) <$> (case cs of Lowercase -> satisfy (\c -> isSymbolChar c && c /= ':') +                                Uppercase -> satisfy (== ':')) +                <*> many (satisfy isSymbolChar) +    guard (name `notElem` ["..", ":", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]) +    guard (take 2 name /= "--") +    return (Name name)  -- | Parser between parens, with the opening paren at the current position.  -- Enforces that all components are within the current indented block. @@ -96,8 +182,11 @@ pParens0 :: Parser a -> Parser a  pParens0 p = do      string "("      skipWhiteComment +    assertWithinBlock Error "Unexpected dedent after opening parenthesis"      res <- p +    assertWithinBlock Error "Unexpected dedent in parenthesised expression"      skipWhiteComment +    assertWithinBlock Error "Unexpected dedent in parenthesised expression"      string ")"      return res @@ -166,43 +255,51 @@ readInline f s0 = do                                fmap (c :) <$> loop f' st'      loop f s0 +-- | Consumes all whitespace and comments (including newlines), but only if +-- this then leaves psCol > psRefCol. If not, this fails. +inlineWhite :: Parser () +inlineWhite = do +    skipWhiteComment +    ps <- get +    when (psCol ps <= psRefCol ps) empty +  -- | Consumes all whitespace and comments (including newlines). Note: this may --- leave psCol < psRefCol. +-- leave psCol <= psRefCol.  skipWhiteComment :: Parser ()  skipWhiteComment = do -    inlineWhite -    _ <- many (inlineComment >> inlineWhite) +    inlineSpaces +    _ <- many (inlineComment >> inlineSpaces)      _ <- optional lineComment      (consumeNewline >> skipWhiteComment) <|> return () -  where -    -- | Consumes some inline whitespace. -    inlineWhite :: Parser () -    inlineWhite = readWhileInline isSpace -    -- | Consumes an inline comment including both end markers. Note: this may -    -- leave psCol < psRefCol. -    inlineComment :: Parser () -    inlineComment = do -        string "{-" -        let loop = do -                readWhileInline (`notElem` "{-") -                asum [string "-}" -                     ,inlineComment >> loop -                     ,consumeNewline >> loop] -        loop +-- | Consumes some inline whitespace. +inlineSpaces :: Parser () +inlineSpaces = readWhileInline isSpace + +-- | Consumes an inline comment including both end markers. Note: this may +-- leave psCol < psRefCol. +inlineComment :: Parser () +inlineComment = do +    string "{-" +    let loop = do +            readWhileInline (`notElem` "{-") +            asum [string "-}" +                 ,inlineComment >> loop +                 ,consumeNewline >> loop] +    loop -    -- | Consumes a line comment marker and the rest of the line, excluding -    -- newline. -    lineComment :: Parser () -    lineComment = string "--" >> readWhileInline (const True) +-- | Consumes a line comment marker and the rest of the line, excluding +-- newline. +lineComment :: Parser () +lineComment = string "--" >> readWhileInline (const True) -    -- | Consumes characters while the predicate holds or until (and excluding) -    -- a newline, whichever comes first. -    readWhileInline :: (Char -> Bool) -> Parser () -    readWhileInline p = do -        (taken, rest) <- span (\c -> p c && c /= '\n') <$> gets psRest -        modify (\ps -> ps { psCol = psCol ps + length taken -                          , psRest = rest }) +-- | Consumes characters while the predicate holds or until (and excluding) +-- a newline, whichever comes first. +readWhileInline :: (Char -> Bool) -> Parser () +readWhileInline p = do +    (taken, rest) <- span (\c -> p c && c /= '\n') <$> gets psRest +    modify (\ps -> ps { psCol = psCol ps + length taken +                      , psRest = rest })  -- | Consumes exactly one newline at the current position.  consumeNewline :: Parser () @@ -212,6 +309,15 @@ consumeNewline = gets psRest >>= \case                                       , psRest = rest })      _ -> empty +-- | Consumes exactly one character, unequal to newline, at the current position. +satisfy :: (Char -> Bool) -> Parser Char +satisfy p = gets psRest >>= \case +    c : rest | c /= '\n', p c -> do +        modify (\ps -> ps { psCol = psCol ps + 1 +                          , psRest = rest }) +        return c +    _ -> empty +  -- | Consumes exactly this string at the current position. The string must not  -- contain a newline.  string :: String -> Parser ()  | 
