diff options
| -rw-r--r-- | AST.hs | 4 | ||||
| -rw-r--r-- | Main.hs | 24 | ||||
| -rw-r--r-- | Parser.hs | 377 | 
3 files changed, 313 insertions, 92 deletions
@@ -38,6 +38,8 @@ data RHS t  data Expr t      = ELit t Literal +    | EVar t Name +    | ECon t Name      | EList t [Expr t]      | ETup t [Expr t]      | EApp t (Expr t) [Expr t] @@ -47,7 +49,7 @@ data Expr t      | ELet t [FunDef t] (Expr t)    deriving (Show) -data Literal = LInt Int | LFloat Double | LChar Char | LString String +data Literal = LInt Integer | LFloat Rational | LChar Char | LString String    deriving (Show)  data Operator = OAdd | OSub | OMul | ODiv | OMod | OEqu | OPow @@ -1,5 +1,27 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE TupleSections #-}  module Main where +import System.Environment (getArgs) +import System.Exit (die, exitFailure) + +import Parser +  main :: IO () -main = putStrLn "hoi" +main = do +    (source, fname) <- getArgs >>= \case +        [] -> (,"<stdin>") <$> getContents +        [fname] -> (,fname) <$> readFile fname +        _ -> die "Usage: hs-visinter [filename.hs]" + +    prog <- case parse fname source of +      This errs -> do +          mapM_ (putStrLn . printErrMsg) errs +          exitFailure +      These errs res -> do +          mapM_ (putStrLn . printErrMsg) errs +          return res +      That res -> return res + +    print prog @@ -2,7 +2,17 @@  {-# LANGUAGE FlexibleInstances #-}  {-# LANGUAGE MultiParamTypeClasses #-}  {-# LANGUAGE TupleSections #-} -module Parser where +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE StandaloneDeriving #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE RankNTypes #-} +module Parser ( +    parse, +    printErrMsg, +    -- * Re-exports +    These(..), +) where  import Control.Applicative  import Control.Monad.Chronicle @@ -119,6 +129,10 @@ pProgram = do  pFunDef :: Parser (FunDef ())  pFunDef = do      skipWhiteComment +    pFunDef0 + +pFunDef0 :: Parser (FunDef ()) +pFunDef0 = do      mtypesig <- optional pStandaloneTypesig0      let mname = fst <$> mtypesig          mtype = snd <$> mtypesig @@ -137,21 +151,22 @@ pFunEq mCheckName = do      skipWhiteComment      assertAtBlockLeft Fatal "Expected function clause, found indented stuff" -    name <- pIdentifier0 Lowercase +    name <- pIdentifier0 AtLeft Lowercase      case mCheckName of        Just checkName | name /= checkName ->            raise Fatal "Name of function clause does not correspond with type signature"        _ -> return ()      pats <- many (pPattern 11) -    rhs <- pRHS +    rhs <- pRHS "="      return (FunEq name pats rhs) -pRHS :: Parser (RHS ()) -pRHS = do +-- | Pass "=" for function definitions and "->" for case clauses. +pRHS :: String -> Parser (RHS ()) +pRHS sepsym = do      -- TODO: parse guards      inlineWhite -    string "=" +    pKeySym sepsym      Plain <$> pExpr  pPattern :: Int -> Parser (Pattern ()) @@ -165,43 +180,43 @@ pPattern0 d = do           ,pPatList0           ,pPatParens0]    where -    pPatWildcard0 = string "_" >> return (PWildcard ()) +    pPatWildcard0 = pKeySym "_" >> return (PWildcard ())      pPatVarOrAs0 = do -        var <- pIdentifier0 Lowercase +        var <- pIdentifier0 InBlock Lowercase          asum [do inlineWhite -                 string "@" +                 pKeySym "@"                   p <- pPattern 11                   return (PAs () var p)               ,return (PVar () var)]      pPatCon0 = do -        con <- pIdentifier0 Uppercase +        con <- pIdentifier0 InBlock Uppercase          if d > 0            then return (PCon () con [])            else do args <- many (pPattern 11)                    return (PCon () con args)      pPatList0 = do -        string "[" -        ps <- pPattern 0 `sepBy` (inlineWhite >> string ",") +        char '['  -- special syntax, no need for pKeySym +        ps <- pPattern 0 `sepBy` (inlineWhite >> char ',')          inlineWhite -        string "]" +        char ']'          return (PList () ps)      pPatParens0 = do -        string "(" +        char '('          inlineWhite -        asum [do string ")" +        asum [do char ')'                   return (PTup () [])               ,do p <- pPattern0 0                   inlineWhite -                 asum [do string ")" +                 asum [do char ')'                            return p -                      ,do string "," -                          ps <- pPattern 0 `sepBy1` (inlineWhite >> string ",") +                      ,do char ',' +                          ps <- pPattern 0 `sepBy1` (inlineWhite >> char ',')                            return (PTup () (p : ps))]]  pExpr :: Parser (Expr ())  pExpr = do      inlineWhite -    -- basics: lit, list, tup +    -- basics: lit, list, var, con, tup      -- expression atom: application of basics      -- expression parser: op      -- around: let, case, if @@ -210,6 +225,60 @@ pExpr = do           ,pEIf0           ,pExprOpExpr0 0] +pELet0 :: Parser (Expr ()) +pELet0 = do +    pKeyword "let" +    inlineWhite +    startLayoutBlock $ do +        -- The first occurrence is also going to skip whitespace in front, +        -- which is redundant -- but not harmful. +        defs <- many $ do +            skipWhiteComment +            -- Note: now not necessarily in the indented block. Which is +            -- precisely what we need here. If we see "in", let the 'many' +            -- choice fail so that the defs loop ends. But let it fail outside +            -- this asum so that it is the many that picks it up, not this +            -- asum. +            res <- asum [Nothing <$ lookAhead (pKeyword "in") +                        ,Just <$> pFunDef0] +            case res of +              Nothing -> empty +              Just def -> return def +        inlineWhite +        body <- pExpr +        return (ELet () defs body) + +pECase0 :: Parser (Expr ()) +pECase0 = do +    pKeyword "case" +    e <- pExpr +    inlineWhite +    pKeyword "of" +    inlineWhite +    startLayoutBlock $ do +        -- The first clause is going to skip whitespace, but that's harmless +        -- (though redundant). +        let pClause = do +                skipWhiteComment +                whenM (not <$> isInsideBlock) (() <$ empty) +                pat <- pPattern0 0 +                rhs <- pRHS "->" +                return (pat, rhs) +        clauses <- many pClause +        return (ECase () e clauses) + +pEIf0 :: Parser (Expr ()) +pEIf0 = do +    pKeyword "if" +    e1 <- pExpr +    inlineWhite +    pKeyword "then" +    e2 <- pExpr +    inlineWhite +    pKeyword "else" +    e3 <- pExpr +    return (EIf () e1 e2 e3) +  pExprOpExpr :: Int -> Parser (Expr ())  pExprOpExpr d = inlineWhite >> pExprOpExpr0 d @@ -229,7 +298,8 @@ pExprOpExpr0 d = do                         return ()                   let oprhsd = case a2 of AssocRight -> d2 ; _ -> d2 + 1                   rhs <- pExprOpExpr oprhsd -                 climbRight (EOp () lhs op rhs) (Just paop)] +                 climbRight (EOp () lhs op rhs) (Just paop) +             ,return lhs]  pEApp0 :: Parser (Expr ())  pEApp0 = do @@ -240,7 +310,79 @@ pEApp0 = do        _ -> return (EApp () e1 es)  pEAtom0 :: Parser (Expr ()) -pEAtom0 = pELit <|> pEList <|> pEParens +pEAtom0 = (ELit () <$> pLiteral0) <|> pEList0 <|> pEVar0 <|> pECon0 <|> pEParens0 + +pLiteral0 :: Parser Literal +pLiteral0 = asum +    [do as <- some (satisfy isDigit) +        let a = read as :: Integer +        asum +            [do char '.' +                bs <- some (satisfy isDigit) +                let b = read bs :: Integer +                cs <- optional $ do +                        char 'e' +                        cs <- some (satisfy isDigit) +                        return cs +                let c = maybe 0 read cs :: Integer +                return (LFloat ((fromIntegral a + fromIntegral b / 10 ^ length bs) * 10 ^ c)) +            ,return (LInt a)] +    ,do char '\'' +        c <- pStringChar +        char '\'' +        return (LChar c) +    ,do char '"' +        s <- many pStringChar +        char '"' +        return (LString s)] + +pStringChar :: Parser Char +pStringChar = asum +    [do char '\\' +        char 'x' +        let hexdig = do +                c <- satisfy $ \c' -> +                        let c = toLower c' +                        in 'a' <= c && c <= 'f' || '0' <= c && c <= '9' +                return $ if 'a' <= c then 10 + ord c - ord 'a' +                                     else ord c - ord '0' +        digs <- some hexdig +        return (chr (sum (zipWith (*) (reverse digs) (iterate (*16) 1)))) +    ,do char '\\' +        satisfy (const True) >>= \case +            'n' -> return '\n' +            'r' -> return '\r' +            't' -> return '\t' +            'a' -> return '\a' +            'b' -> return '\b' +            '\'' -> return '\'' +            '\"' -> return '\"' +            '0' -> return '\0' +            c -> do raise Error $ "Invalid escape sequence: \\" ++ [c] +                    return '?' +    ,do satisfy (\c -> c `notElem` "\n\r\\\'")] + +pEList0 :: Parser (Expr ()) +pEList0 = do +    char '['  -- special syntax, no need for pKeySym +    es <- sepBy pExpr (inlineWhite >> char ',') +    inlineWhite +    char ']' +    return (EList () es) + +pEVar0 :: Parser (Expr ()) +pEVar0 = EVar () <$> pIdentifier0 InBlock Lowercase + +pECon0 :: Parser (Expr ()) +pECon0 = ECon () <$> pIdentifier0 InBlock Uppercase + +pEParens0 :: Parser (Expr ()) +pEParens0 = do +    char '(' +    e <- pExpr +    inlineWhite +    char ')' +    return e  data Associativity = AssocLeft | AssocRight | AssocNone    deriving (Show, Eq) @@ -251,21 +393,21 @@ data ParsedOperator = PaOp Operator Int Associativity  pInfixOp :: Parser ParsedOperator  pInfixOp = do      inlineWhite -    asum [PaOp OEqu 4 AssocNone  <$ string "==" -         ,PaOp OAdd 6 AssocLeft  <$ string "+" -         ,PaOp OSub 6 AssocLeft  <$ string "-" -         ,PaOp OMul 7 AssocLeft  <$ string "*" -         ,PaOp ODiv 7 AssocLeft  <$ string "/" -         ,PaOp OMod 7 AssocLeft  <$ string "%" -         ,PaOp OPow 8 AssocRight <$ string "^" +    asum [PaOp OEqu 4 AssocNone  <$ pKeySym "==" +         ,PaOp OAdd 6 AssocLeft  <$ pKeySym "+" +         ,PaOp OSub 6 AssocLeft  <$ pKeySym "-" +         ,PaOp OMul 7 AssocLeft  <$ pKeySym "*" +         ,PaOp ODiv 7 AssocLeft  <$ pKeySym "/" +         ,PaOp OMod 7 AssocLeft  <$ pKeySym "%" +         ,PaOp OPow 8 AssocRight <$ pKeySym "^"           ]  pStandaloneTypesig0 :: Parser (Name, Type)  pStandaloneTypesig0 = do      assertAtBlockLeft Fatal "Expected top-level definition, found indented stuff" -    name@(Name namestr) <- pIdentifier0 Lowercase +    name@(Name namestr) <- pIdentifier0 AtLeft Lowercase      inlineWhite -    string "::" +    pKeySym "::"      pushContext ("type signature for '" ++ namestr ++ "'") $ do          ty <- pType          return (name, ty) @@ -274,7 +416,7 @@ pType :: Parser Type  pType = do      ty1 <- pTypeApp      asum [do inlineWhite -             string "->" +             pKeySym "->"               ty2 <- pType               return (TFun ty1 ty2)           ,return ty1] @@ -290,30 +432,42 @@ pTypeAtom = pTypeParens <|> pTypeList <|> pTypeCon <|> pTypeVar    where      pTypeParens = do          inlineWhite -        string "(" +        char '('          asum [do inlineWhite -                 string ")" +                 char ')'                   return (TTup [])               ,do ty1 <- pType                   ty2s <- many $ do                       inlineWhite -                     string "," +                     char ','                       pType                   inlineWhite -                 string ")" +                 char ')'                   case ty2s of                     [] -> return ty1                     _ -> return (TTup (ty1 : ty2s))]      pTypeList = do          inlineWhite -        string "[" +        char '['          ty <- pType -        string "]" +        char ']'          return (TList ty) -    pTypeCon = inlineWhite >> TCon <$> pIdentifier0 Uppercase -    pTypeVar = inlineWhite >> TVar <$> pIdentifier0 Lowercase +    pTypeCon = inlineWhite >> TCon <$> pIdentifier0 InBlock Uppercase +    pTypeVar = inlineWhite >> TVar <$> pIdentifier0 InBlock Lowercase + +-- | Parse the given name-like keyword, ensuring that it is the entire word. +pKeyword :: String -> Parser () +pKeyword s = do +    string s +    notFollowedBy (() <$ satisfy isNameContChar) + +-- | Parse the given symbol-like keyword, ensuring that it is the entire symbol. +pKeySym :: String -> Parser () +pKeySym s = do +    string s +    notFollowedBy (() <$ satisfy isSymbolChar)  data Case = Uppercase | Lowercase    deriving (Show) @@ -321,20 +475,32 @@ data Case = Uppercase | Lowercase  -- | Consumes an identifier (word or parenthesised operator) at the current  -- position. The `var` production in Haskell2010.  -- var -> varid | "(" varsym ")" -pIdentifier0 :: Case -> Parser Name -pIdentifier0 cs = pAlphaName0 cs <|> pParens0 (pSymbol0 cs) +pIdentifier0 :: BlockPos -> Case -> Parser Name +pIdentifier0 bpos cs = pAlphaName0 bpos cs <|> pParens0 (pSymbol0 bpos cs) +  where +    -- | Parser between parens, with the opening paren at the current position. +    pParens0 :: Parser a -> Parser a +    pParens0 p = do +        char '(' +        inlineWhite +        res <- p +        inlineWhite +        char ')' +        return res  -- | Consumes a word-like name at the current position with the given case. The  -- `varid` production in Haskell2010 for 'Lowercase', `conid' for 'Uppercase'.  --  -- > varid -> (small {small | large | digit | "'"}) without reservedid -pAlphaName0 :: Case -> Parser Name -pAlphaName0 cs = do -    (_, s) <- readInline (\case True -> \case Just c | isInitNameChar c -> Just (Right False) -                                              _ -> Nothing -                                False -> \case Just c | isNameContChar c -> Just (Right False) -                                               _ -> Just (Left ())) -                         True +pAlphaName0 :: BlockPos -> Case -> Parser Name +pAlphaName0 bpos cs = do +    (_, s) <- readToken bpos +        (\atfst mc -> case (atfst, mc) of +            (True , Just c) | isNameHeadChar c -> Just (Right False) +            (True , _     )                    -> Nothing +            (False, Just c) | isNameContChar c -> Just (Right False) +            (False, _     )                    -> Just (Left ())) +        True      name <- case cs of        Uppercase | isLower (head s) -> do            raise Error "Unexpected uppercase word at this position, assuming typo" @@ -348,10 +514,12 @@ pAlphaName0 cs = do                            ,"infixr", "instance", "let", "module", "newtype", "of"                            ,"then", "type", "where", "_"])      return (Name name) -  where -    isInitNameChar, isNameContChar :: Char -> Bool -    isInitNameChar c = isLetter c || c == '_' -    isNameContChar c = isInitNameChar c || isDigit c || c == '\'' + +isNameHeadChar :: Char -> Bool +isNameHeadChar c = isLetter c || c == '_' + +isNameContChar :: Char -> Bool +isNameContChar c = isNameHeadChar c || isDigit c || c == '\''  -- | Consumes a symbol at the current position. The `varsym` production in  -- Haskell2010 for 'Lowercase', `consym` otherwise. @@ -364,12 +532,11 @@ pAlphaName0 cs = do  -- > dashes -> "--" {"-"}  -- > special -> ```(),;[]`{}```  -- > reservedop -> ".." | ":" | "::" | "=" | "\" | "|" | "<-" | "->" | "@" | "~" | "=>" -pSymbol0 :: Case -> Parser Name -pSymbol0 cs = do -    let isSpecialExt c = c `elem` "(),;[]`{}_\"'" -        isAscSymbol c = c `elem` "!#$%&⋆+./<=>?@^|-~:" -        isUniSymbol c = isSymbol c || isPunctuation c -        isSymbolChar c = (isAscSymbol c || isUniSymbol c) && not (isSpecialExt c) +pSymbol0 :: BlockPos -> Case -> Parser Name +pSymbol0 bpos cs = do +    case bpos of +      AtLeft -> assertAtBlockLeft Fatal "Expected symbol, but found indented expression" +      InBlock -> assertInsideBlock Fatal "Expected symbol, but found end of indented expression"      name <- (:) <$> (case cs of Lowercase -> satisfy (\c -> isSymbolChar c && c /= ':')                                  Uppercase -> satisfy (== ':'))                  <*> many (satisfy isSymbolChar) @@ -377,19 +544,12 @@ pSymbol0 cs = do      guard (take 2 name /= "--")      return (Name name) --- | Parser between parens, with the opening paren at the current position. --- Enforces that all components are within the current indented block. -pParens0 :: Parser a -> Parser a -pParens0 p = do -    string "(" -    skipWhiteComment -    assertInsideBlock Error "Unexpected dedent after opening parenthesis" -    res <- p -    assertInsideBlock Error "Unexpected dedent in parenthesised expression" -    skipWhiteComment -    assertInsideBlock Error "Unexpected dedent in parenthesised expression" -    string ")" -    return res +isSymbolChar :: Char -> Bool +isSymbolChar c = (isAscSymbol || isUniSymbol) && not isSpecialExt +  where +    isSpecialExt = c `elem` "(),;[]`{}_\"'" +    isAscSymbol = c `elem` "!#$%&⋆+./<=>?@^|-~:" +    isUniSymbol = isSymbol c || isPunctuation c  sepBy1 :: Parser a -> Parser sep -> Parser [a] @@ -412,11 +572,17 @@ startLayoutBlock p = do                        , psBlkCol = psBlkCol ps0 })      return res -data Fatality = Error | Fatal -  deriving (Show) +data Fatality fatal where +    Error :: Fatality 'False +    Fatal :: Fatality 'True +deriving instance Show (Fatality fatal) + +type family FatalCtx fatal a where +    FatalCtx 'False a = a ~ () +    FatalCtx 'True a = ()  -- | Raise an error with the given fatality and description. -raise :: Fatality -> String -> Parser () +raise :: FatalCtx fatal a => Fatality fatal -> String -> Parser a  raise fat msg = do      Context { ctxFile = fp , ctxStack = stk } <- ask      PS { psLine = line, psCol = col } <- get @@ -425,18 +591,27 @@ raise fat msg = do                  Fatal -> confess . pure      fun (ErrMsg fp stk line col msg) +raise' :: Fatality fatal -> String -> Parser () +raise' Error = raise Error +raise' Fatal = raise Fatal +  -- | Registers a scope description on the stack for error reporting.  pushContext :: String -> Parser a -> Parser a  pushContext descr = local (\c -> c { ctxStack = descr : ctxStack c }) --- | Consumes an inline token at the current position, asserting that we are --- within the current block at the start. The token is defined by a pure --- stateful parser. If encountering a newline or EOF, the parser is run on this --- character ('Nothing' for EOF); if this produces a result, the result is --- returned; otherwise, the parser fails. The newline is not consumed. -readInline :: (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String) -readInline f s0 = do -    assertInsideBlock Fatal "Expected stuff, but found end of indented expression" +data BlockPos = AtLeft | InBlock +  deriving (Show) + +-- | Consumes a token at the current position, asserting that we are +-- in the position indicated by the 'BlockPos' argument. The token is defined +-- by a pure stateful parser. If encountering a newline or EOF, the parser is +-- run on this character ('Nothing' for EOF); if this produces a result, the +-- result is returned; otherwise, the parser fails. The newline is not consumed. +readToken :: BlockPos -> (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String) +readToken bpos f s0 = do +    case bpos of +      AtLeft -> assertAtBlockLeft Fatal "Expected token, but found indented expression" +      InBlock -> assertInsideBlock Fatal "Expected token, but found end of indented expression"      let loop :: (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String)          loop f' st = do              ps <- get @@ -476,7 +651,7 @@ inlineSpaces = readWhileInline isSpace  -- end outside the current block.  blockComment :: Parser ()  blockComment = do -    string "{-" +    string "{-"  -- no need for pKeySym here      let loop = do              readWhileInline (`notElem` "{-")  -- "-}" also starts with '-'              asum [string "-}" @@ -488,21 +663,24 @@ blockComment = do  -- | Consumes a line comment marker and the rest of the line, excluding  -- newline.  lineComment :: Parser () -lineComment = string "--" >> readWhileInline (const True) +lineComment = do +    -- '--!' is an operator, so we need to parse a whole symbol here. +    pKeySym "--" +    readWhileInline (const True)  -- | Raises an error if we're not currently at the given column. -assertAtBlockLeft :: Fatality -> String -> Parser () -assertAtBlockLeft fat msg = whenM (not <$> isAtBlockLeft) $ raise fat msg +assertAtBlockLeft :: Fatality fatal -> String -> Parser () +assertAtBlockLeft fat msg = whenM (not <$> isAtBlockLeft) $ raise' fat msg  -- | Raises an error if psCol is not greater than psRefCol. -assertInsideBlock :: Fatality -> String -> Parser () -assertInsideBlock fat msg = whenM (not <$> isInsideBlock) $ raise fat msg +assertInsideBlock :: Fatality fatal -> String -> Parser () +assertInsideBlock fat msg = whenM (not <$> isInsideBlock) $ raise' fat msg  -- | Raises an error if we're not currently at EOF. -assertEOF :: Fatality -> Parser () +assertEOF :: Fatality fatal -> Parser ()  assertEOF fat = gets psRest >>= \case      [] -> return () -    _ -> raise fat "Unexpected stuff" +    _ -> raise' fat "Unexpected stuff"  -- | Returns whether the current position is _within_ the current block, for  -- soft-wrapping content. This means that col > blkCol. @@ -551,6 +729,11 @@ satisfy p = do            return c        _ -> empty +-- | Consumes exactly this character at the current position. Must not be a +-- newline. +char :: Char -> Parser () +char c = string [c] +  -- | Consumes exactly this string at the current position. The string must not  -- contain a newline.  string :: String -> Parser () @@ -562,6 +745,20 @@ string s = do                       , psRest = drop (length s) (psRest ps) })          else empty +lookAhead :: Parser () -> Parser () +lookAhead p = do +    ps <- get +    success <- absolve False (True <$ p) +    put ps  -- restore state, as if nothing happened +    when (not success) empty + +notFollowedBy :: Parser () -> Parser () +notFollowedBy p = do +    ps <- get +    success <- absolve True (False <$ p) +    put ps  -- restore state, as if nothing happened +    when (not success) empty +  -- | Only succeeds at EOF.  eof :: Parser ()  eof = gets psRest >>= \case [] -> return ()  | 
