From 3faca807fe96f2cefa50023fe373d8bcf1430121 Mon Sep 17 00:00:00 2001 From: Tom Smeding Date: Sat, 17 Feb 2024 09:22:49 +0100 Subject: Move to src/, working HLS in examples/ --- AST.hs | 61 ---- Main.hs | 27 -- Parser.hs | 824 ------------------------------------------------------ hie.yaml | 12 + hs-visinter.cabal | 2 +- src/AST.hs | 61 ++++ src/Main.hs | 27 ++ src/Parser.hs | 824 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 925 insertions(+), 913 deletions(-) delete mode 100644 AST.hs delete mode 100644 Main.hs delete mode 100644 Parser.hs create mode 100644 hie.yaml create mode 100644 src/AST.hs create mode 100644 src/Main.hs create mode 100644 src/Parser.hs diff --git a/AST.hs b/AST.hs deleted file mode 100644 index 47652b6..0000000 --- a/AST.hs +++ /dev/null @@ -1,61 +0,0 @@ -module AST where - -import Data.List.NonEmpty (NonEmpty) - - -newtype Name = Name String - deriving (Show, Eq) - -data Program t = Program [DataDef] [FunDef t] - deriving (Show) - -data DataDef = DataDef Name [Name] [(Name, [Type])] - deriving (Show) - -data FunDef t = FunDef Name (Maybe Type) (NonEmpty (FunEq t)) - deriving (Show) - -data FunEq t = FunEq Name [Pattern t] (RHS t) - deriving (Show) - -data Type - = TApp Type [Type] - | TTup [Type] - | TList Type - | TFun Type Type - | TCon Name - | TVar Name - deriving (Show) - -data Pattern t - = PWildcard t - | PVar t Name - | PAs t Name (Pattern t) - | PCon t Name [Pattern t] - | PList t [Pattern t] - | PTup t [Pattern t] - deriving (Show) - -data RHS t - = Guarded [(Expr t, Expr t)] -- currently not parsed - | Plain (Expr t) - deriving (Show) - -data Expr t - = ELit t Literal - | EVar t Name - | ECon t Name - | EList t [Expr t] - | ETup t [Expr t] - | EApp t (Expr t) [Expr t] - | EOp t (Expr t) Operator (Expr t) - | EIf t (Expr t) (Expr t) (Expr t) - | ECase t (Expr t) [(Pattern t, RHS t)] - | ELet t [FunDef t] (Expr t) - deriving (Show) - -data Literal = LInt Integer | LFloat Rational | LChar Char | LString String - deriving (Show) - -data Operator = OAdd | OSub | OMul | ODiv | OMod | OEqu | OPow - deriving (Show) diff --git a/Main.hs b/Main.hs deleted file mode 100644 index c9de0cc..0000000 --- a/Main.hs +++ /dev/null @@ -1,27 +0,0 @@ -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE TupleSections #-} -module Main where - -import System.Environment (getArgs) -import System.Exit (die, exitFailure) - -import Parser - - -main :: IO () -main = do - (source, fname) <- getArgs >>= \case - [] -> (,"") <$> getContents - [fname] -> (,fname) <$> readFile fname - _ -> die "Usage: hs-visinter [filename.hs]" - - prog <- case parse fname source of - This errs -> do - mapM_ (putStrLn . printErrMsg) errs - exitFailure - These errs res -> do - mapM_ (putStrLn . printErrMsg) errs - return res - That res -> return res - - print prog diff --git a/Parser.hs b/Parser.hs deleted file mode 100644 index 0f0bd0c..0000000 --- a/Parser.hs +++ /dev/null @@ -1,824 +0,0 @@ -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE MultiParamTypeClasses #-} -{-# LANGUAGE TupleSections #-} -{-# LANGUAGE DataKinds #-} -{-# LANGUAGE GADTs #-} -{-# LANGUAGE StandaloneDeriving #-} -{-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE RankNTypes #-} -{-# LANGUAGE TypeOperators #-} --- I don't want a warning for 'head' and 'tail' in this file. But I also don't --- want GHCs before 9.8 to complain that they don't know the x-partial warning. -{-# OPTIONS_GHC -Wno-unrecognised-warning-flags -Wno-x-partial #-} -module Parser ( - parse, - printErrMsg, - -- * Re-exports - These(..), -) where - -import Control.Applicative -import Control.Monad -import Control.Monad.Chronicle -import Control.Monad.Reader -import Control.Monad.State.Lazy -import Data.Bifunctor (first) -import Data.Char -import Data.Either (partitionEithers) -import Data.List.NonEmpty (NonEmpty(..)) -import Data.These -import Data.Tuple (swap) - --- import Debug.Trace - -import AST - - --- Positions are zero-based in both dimensions. --- See 'isInsideBlock' and 'isAtBlockLeft' for the two relevant "inside the --- block" conditions. -data PS = PS - { psBlkLine :: Int -- ^ Start line of current layout block - , psBlkCol :: Int -- ^ Start column of current layout block - , psLine :: Int -- ^ Current line - , psCol :: Int -- ^ Current column - , psRest :: String -- ^ Rest of the input - } - deriving (Show) - -data Context = Context - { ctxFile :: FilePath - , ctxStack :: [String] -- ^ Stack of syntax scopes, for error reporting - } - deriving (Show) - --- ReaderT Context (ChronicleT [ErrMsg] (State PS) a) --- Context -> ChronicleT [ErrMsg] (State PS) a --- Context -> State PS (These [ErrMsg] a) --- Context -> PS -> Identity (These [ErrMsg] a, PS) --- Context -> PS -> (These [ErrMsg] a, PS) --- whereas I want: --- Context -> PS -> These [ErrMsg] (a, PS) --- which is not any transformer stack, but a new monad. -newtype Parser a = Parser { runParser :: Context -> PS -> These [ErrMsg] (PS, a) } - -instance Functor Parser where - fmap f (Parser g) = Parser (\ctx ps -> fmap (fmap f) (g ctx ps)) - -instance Applicative Parser where - pure x = Parser (\_ ps -> That (ps, x)) - (<*>) = ap - -instance Monad Parser where - Parser g >>= f = Parser $ \ctx ps -> - case g ctx ps of - This errs -> This errs - That (ps', x) -> runParser (f x) ctx ps' - These errs (ps', x) -> case runParser (f x) ctx ps' of - This errs' -> This (errs <> errs') - That res -> These errs res - These errs' res -> These (errs <> errs') res - -instance Alternative Parser where - empty = Parser (\_ _ -> This []) - Parser f <|> Parser g = Parser $ \ctx ps -> - case f ctx ps of - This _ -> g ctx ps - success -> success - -instance MonadState PS Parser where - state f = Parser $ \_ ps -> That (swap (f ps)) - -instance MonadReader Context Parser where - reader f = Parser $ \ctx ps -> That (ps, f ctx) - local f (Parser g) = Parser (g . f) - -instance MonadChronicle [ErrMsg] Parser where - dictate errs = Parser (\_ ps -> These errs (ps, ())) - confess errs = Parser (\_ _ -> This errs) - memento (Parser f) = Parser (\ctx ps -> case f ctx ps of - This errs -> That (ps, Left errs) - That res -> That (Right <$> res) - These errs res -> These errs (Right <$> res)) - absolve def (Parser f) = Parser (\ctx ps -> case f ctx ps of - This _ -> That (ps, def) - success -> success) - condemn (Parser f) = Parser (\ctx ps -> case f ctx ps of - These errs _ -> This errs - res -> res) - retcon g (Parser f) = Parser (\ctx ps -> first g (f ctx ps)) - chronicle th = Parser (\_ ps -> (ps,) <$> th) - --- Positions are zero-based in both dimensions -data ErrMsg = ErrMsg - { errFile :: FilePath - , errStk :: [String] - , errLine :: Int - , errCol :: Int - , errMsg :: String } - deriving (Show) - -printErrMsg :: ErrMsg -> String -printErrMsg (ErrMsg fp stk y x s) = - unlines (map (\descr -> "In " ++ descr ++ ":") (reverse stk)) ++ - fp ++ ":" ++ show (y + 1) ++ ":" ++ show (x + 1) ++ ": " ++ s - -parse :: FilePath -> String -> These [ErrMsg] (Program ()) -parse fp source = fmap snd $ runParser pProgram (Context fp []) (PS 0 0 0 0 source) - -pProgram :: Parser (Program ()) -pProgram = do - defs <- many pTopDef - let (datadefs, fundefs) = partitionEithers defs - skipWhiteComment - assertEOF Error - return (Program datadefs fundefs) - -pTopDef :: Parser (Either DataDef (FunDef ())) -pTopDef = do - skipWhiteComment - Left <$> pDataDef0 <|> Right <$> pFunDef0 - -pDataDef0 :: Parser DataDef -pDataDef0 = do - pKeyword "data" - inlineWhite - name <- pIdentifier0 InBlock Uppercase - params <- many (inlineWhite >> pIdentifier0 InBlock Lowercase) - cons <- pDatacons "=" - return (DataDef name params cons) - where - pDatacons :: String -> Parser [(Name, [Type])] - pDatacons leader = do - inlineWhite - pKeySym leader - inlineWhite - name <- pIdentifier0 InBlock Uppercase - fields <- many pTypeAtom - rest <- pDatacons "|" <|> return [] - return ((name, fields) : rest) - -pFunDef0 :: Parser (FunDef ()) -pFunDef0 = do - mtypesig <- optional pStandaloneTypesig0 - let mname = fst <$> mtypesig - mtype = snd <$> mtypesig - (clauses, name) <- someClauses mname - return (FunDef name mtype clauses) - where - someClauses :: Maybe Name -> Parser (NonEmpty (FunEq ()), Name) - someClauses Nothing = do - clause@(FunEq name _ _) <- pFunEq Nothing - (,name) . (clause :|) <$> many (pFunEq (Just name)) - someClauses (Just name) = (,name) <$> someNE (pFunEq (Just name)) - --- | Given the name of the type signature, if any. -pFunEq :: Maybe Name -> Parser (FunEq ()) -pFunEq mCheckName = do - skipWhiteComment - assertAtBlockLeft Fatal "Expected function clause, found indented stuff" - - name <- pIdentifier0 AtLeft Lowercase - case mCheckName of - Just checkName | name /= checkName -> - raise Fatal "Name of function clause does not correspond with type signature" - _ -> return () - - pats <- many (pPattern 11) - rhs <- pRHS "=" - return (FunEq name pats rhs) - --- | Pass "=" for function definitions and "->" for case clauses. -pRHS :: String -> Parser (RHS ()) -pRHS sepsym = do - -- TODO: parse guards - inlineWhite - pKeySym sepsym - Plain <$> pExpr - -pPattern :: Int -> Parser (Pattern ()) -pPattern d = inlineWhite >> pPattern0 d - -pPattern0 :: Int -> Parser (Pattern ()) -pPattern0 d = do - asum [pPatWildcard0 - ,pPatVarOrAs0 - ,pPatCon0 - ,pPatList0 - ,pPatParens0] - where - pPatWildcard0 = pKeySym "_" >> return (PWildcard ()) - pPatVarOrAs0 = do - var <- pIdentifier0 InBlock Lowercase - asum [do inlineWhite - pKeySym "@" - p <- pPattern 11 - return (PAs () var p) - ,return (PVar () var)] - pPatCon0 = do - con <- pIdentifier0 InBlock Uppercase - if d > 0 - then return (PCon () con []) - else do args <- many (pPattern 11) - return (PCon () con args) - pPatList0 = do - char '[' -- special syntax, no need for pKeySym - ps <- pPattern 0 `sepBy` (inlineWhite >> char ',') - inlineWhite - char ']' - return (PList () ps) - pPatParens0 = do - char '(' - inlineWhite - asum [do char ')' - return (PTup () []) - ,do p <- pPattern0 0 - inlineWhite - asum [do char ')' - return p - ,do char ',' - ps <- pPattern 0 `sepBy1` (inlineWhite >> char ',') - return (PTup () (p : ps))]] - -pExpr :: Parser (Expr ()) -pExpr = do - inlineWhite - -- basics: lit, list, var, con, tup - -- expression atom: application of basics - -- expression parser: op - -- around: let, case, if - asum [pELet0 - ,pECase0 - ,pEIf0 - ,pExprOpExpr0 0] - -pELet0 :: Parser (Expr ()) -pELet0 = do - pKeyword "let" - inlineWhite - startLayoutBlock $ do - -- The first occurrence is also going to skip whitespace in front, - -- which is redundant -- but not harmful. - defs <- many $ do - skipWhiteComment - -- Note: now not necessarily in the indented block. Which is - -- precisely what we need here. If we see "in", let the 'many' - -- choice fail so that the defs loop ends. But let it fail outside - -- this asum so that it is the many that picks it up, not this - -- asum. - res <- asum [Nothing <$ lookAhead (pKeyword "in") - ,Just <$> pFunDef0] - case res of - Nothing -> empty - Just def -> return def - inlineWhite - body <- pExpr - return (ELet () defs body) - -pECase0 :: Parser (Expr ()) -pECase0 = do - pKeyword "case" - e <- pExpr - inlineWhite - pKeyword "of" - inlineWhite - startLayoutBlock $ do - -- The first clause is going to skip whitespace, but that's harmless - -- (though redundant). - let pClause = do - skipWhiteComment - whenM (not <$> isInsideBlock) (() <$ empty) - pat <- pPattern0 0 - rhs <- pRHS "->" - return (pat, rhs) - clauses <- many pClause - return (ECase () e clauses) - -pEIf0 :: Parser (Expr ()) -pEIf0 = do - pKeyword "if" - e1 <- pExpr - inlineWhite - pKeyword "then" - e2 <- pExpr - inlineWhite - pKeyword "else" - e3 <- pExpr - return (EIf () e1 e2 e3) - -pExprOpExpr :: Int -> Parser (Expr ()) -pExprOpExpr d = inlineWhite >> pExprOpExpr0 d - -pExprOpExpr0 :: Int -> Parser (Expr ()) -pExprOpExpr0 d = do - e0 <- pEApp0 - climbRight e0 Nothing - where - climbRight :: Expr () -> Maybe ParsedOperator -> Parser (Expr ()) - climbRight lhs mlhsop = do - asum [do paop@(PaOp op d2 a2) <- pInfixOp - guard (d2 >= d) -- respect global minimum precedence - case mlhsop of -- check operator compatibility - Just (PaOp _ d1 a1) -> - guard (d1 > d2 || (d1 == d2 && a1 == a2 && a1 /= AssocNone)) - Nothing -> - return () - let oprhsd = case a2 of AssocRight -> d2 ; _ -> d2 + 1 - rhs <- pExprOpExpr oprhsd - climbRight (EOp () lhs op rhs) (Just paop) - ,return lhs] - -pEApp0 :: Parser (Expr ()) -pEApp0 = do - e1 <- pEAtom0 - es <- many (inlineWhite >> pEAtom0) - case es of - [] -> return e1 - _ -> return (EApp () e1 es) - -pEAtom0 :: Parser (Expr ()) -pEAtom0 = (ELit () <$> pLiteral0) <|> pEList0 <|> pEVar0 <|> pECon0 <|> pEParens0 - -pLiteral0 :: Parser Literal -pLiteral0 = asum - [do as <- some (satisfy isDigit) - let a = read as :: Integer - asum - [do char '.' - bs <- some (satisfy isDigit) - let b = read bs :: Integer - cs <- optional $ do - char 'e' - cs <- some (satisfy isDigit) - return cs - let c = maybe 0 read cs :: Integer - return (LFloat ((fromIntegral a + fromIntegral b / 10 ^ length bs) * 10 ^ c)) - ,return (LInt a)] - ,do char '\'' - c <- pStringChar - char '\'' - return (LChar c) - ,do char '"' - s <- many pStringChar - char '"' - return (LString s)] - -pStringChar :: Parser Char -pStringChar = asum - [do char '\\' - char 'x' - let hexdig = do - c <- satisfy $ \c' -> - let c = toLower c' - in 'a' <= c && c <= 'f' || '0' <= c && c <= '9' - return $ if 'a' <= c then 10 + ord c - ord 'a' - else ord c - ord '0' - digs <- some hexdig - return (chr (sum (zipWith (*) (reverse digs) (iterate (*16) 1)))) - ,do char '\\' - satisfy (const True) >>= \case - 'n' -> return '\n' - 'r' -> return '\r' - 't' -> return '\t' - 'a' -> return '\a' - 'b' -> return '\b' - '\'' -> return '\'' - '\"' -> return '\"' - '0' -> return '\0' - c -> do raise Error $ "Invalid escape sequence: \\" ++ [c] - return '?' - ,do satisfy (\c -> c `notElem` "\n\r\\\'")] - -pEList0 :: Parser (Expr ()) -pEList0 = do - char '[' -- special syntax, no need for pKeySym - es <- sepBy pExpr (inlineWhite >> char ',') - inlineWhite - char ']' - return (EList () es) - -pEVar0 :: Parser (Expr ()) -pEVar0 = EVar () <$> pIdentifier0 InBlock Lowercase - -pECon0 :: Parser (Expr ()) -pECon0 = ECon () <$> pIdentifier0 InBlock Uppercase - -pEParens0 :: Parser (Expr ()) -pEParens0 = do - char '(' - e <- pExpr - inlineWhite - char ')' - return e - -data Associativity = AssocLeft | AssocRight | AssocNone - deriving (Show, Eq) - -data ParsedOperator = PaOp Operator Int Associativity - deriving (Show) - -pInfixOp :: Parser ParsedOperator -pInfixOp = do - inlineWhite - asum [PaOp OEqu 4 AssocNone <$ pKeySym "==" - ,PaOp OAdd 6 AssocLeft <$ pKeySym "+" - ,PaOp OSub 6 AssocLeft <$ pKeySym "-" - ,PaOp OMul 7 AssocLeft <$ pKeySym "*" - ,PaOp ODiv 7 AssocLeft <$ pKeySym "/" - ,PaOp OMod 7 AssocLeft <$ pKeySym "%" - ,PaOp OPow 8 AssocRight <$ pKeySym "^" - ] - -pStandaloneTypesig0 :: Parser (Name, Type) -pStandaloneTypesig0 = do - assertAtBlockLeft Fatal "Expected top-level definition, found indented stuff" - name@(Name namestr) <- pIdentifier0 AtLeft Lowercase - inlineWhite - pKeySym "::" - pushContext ("type signature for '" ++ namestr ++ "'") $ do - ty <- pType - return (name, ty) - -pType :: Parser Type -pType = do - ty1 <- pTypeApp - asum [do inlineWhite - pKeySym "->" - ty2 <- pType - return (TFun ty1 ty2) - ,return ty1] - -pTypeApp :: Parser Type -pTypeApp = many pTypeAtom >>= \case - [] -> raise Fatal "Expected type" - [t] -> return t - t:ts -> return (TApp t ts) - -pTypeAtom :: Parser Type -pTypeAtom = pTypeParens <|> pTypeList <|> pTypeName - where - pTypeParens = do - inlineWhite - char '(' - asum [do inlineWhite - char ')' - return (TTup []) - ,do ty1 <- pType - ty2s <- many $ do - inlineWhite - char ',' - pType - inlineWhite - char ')' - case ty2s of - [] -> return ty1 - _ -> return (TTup (ty1 : ty2s))] - - pTypeList = do - inlineWhite - char '[' - ty <- pType - char ']' - return (TList ty) - - pTypeName = do - inlineWhite - (cs, name) <- pIdentifier0 InBlock Don'tCare - case cs of - Uppercase -> return (TCon name) - Lowercase -> return (TVar name) - --- | Parse the given name-like keyword, ensuring that it is the entire word. -pKeyword :: String -> Parser () -pKeyword s = do - string s - notFollowedBy (() <$ satisfy isNameContChar) - --- | Parse the given symbol-like keyword, ensuring that it is the entire symbol. -pKeySym :: String -> Parser () -pKeySym s = do - string s - notFollowedBy (() <$ satisfy isSymbolChar) - -data Case care where - Uppercase :: Case 'True - Lowercase :: Case 'True - Don'tCare :: Case 'False -deriving instance Show (Case care) - -type family WithCaseOutput care a where - WithCaseOutput 'True a = a - WithCaseOutput 'False a = (Case 'True, a) - --- | Consumes an identifier (word or parenthesised operator) at the current --- position. The `var` production in Haskell2010. --- var -> varid | "(" varsym ")" -pIdentifier0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name) -pIdentifier0 bpos cs = pAlphaName0 bpos cs <|> pParens0 (pSymbol0 bpos cs) - where - -- | Parser between parens, with the opening paren at the current position. - pParens0 :: Parser a -> Parser a - pParens0 p = do - char '(' - inlineWhite - res <- p - inlineWhite - char ')' - return res - --- | Consumes a word-like name at the current position with the given case. The --- `varid` production in Haskell2010 for 'Lowercase', `conid' for 'Uppercase'. --- --- > varid -> (small {small | large | digit | "'"}) without reservedid -pAlphaName0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name) -pAlphaName0 bpos cs = do - (_, s) <- readToken - bpos - (\atfst mc -> case (atfst, mc) of - (True , Just c) | isNameHeadChar c -> Just (Right False) - (True , _ ) -> Nothing - (False, Just c) | isNameContChar c -> Just (Right False) - (False, _ ) -> Just (Left ())) - True - (name, adjoin) <- case cs of - Uppercase - | isLower (head s) -> do - raise Error "Unexpected uppercase word at this position, assuming typo" - return (toUpper (head s) : tail s, id) - | otherwise -> return (s, id) - Lowercase - | isUpper (head s) -> do - raise Error "Unexpected lowercase word at this position, assuming typo" - return (toLower (head s) : tail s, id) - | otherwise -> return (s, id) - Don'tCare - | isLower (head s) -> return (s, (Lowercase,)) - | otherwise -> return (s, (Lowercase,)) - guard (name `notElem` ["case", "class", "data", "default", "deriving", "do", "else" - ,"foreign", "if", "import", "in", "infix", "infixl" - ,"infixr", "instance", "let", "module", "newtype", "of" - ,"then", "type", "where", "_"]) - return (adjoin (Name name)) - -isNameHeadChar :: Char -> Bool -isNameHeadChar c = isLetter c || c == '_' - -isNameContChar :: Char -> Bool -isNameContChar c = isNameHeadChar c || isDigit c || c == '\'' - --- | Consumes a symbol at the current position. The `varsym` production in --- Haskell2010 for 'Lowercase', `consym` otherwise, and either if 'Don'tCare'. --- --- > varsym -> ((symbol without ":") {symbol}) without (reservedop | dashes) --- > consym -> (":" {symbol}) without reservedop --- > symbol -> ascSymbol | uniSymbol without (special | "_" | "\"" | "'") --- > ascSymbol -> ```!#$%&⋆+./<=>?@^|-~:``` --- > uniSymbol -> unicode symbol or punctuation --- > dashes -> "--" {"-"} --- > special -> ```(),;[]`{}``` --- > reservedop -> ".." | ":" | "::" | "=" | "\" | "|" | "<-" | "->" | "@" | "~" | "=>" -pSymbol0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name) -pSymbol0 bpos cs = do - case bpos of - AtLeft -> assertAtBlockLeft Fatal "Expected symbol, but found indented expression" - InBlock -> assertInsideBlock Fatal "Expected symbol, but found end of indented expression" - (c1, adjoin) <- - case cs of Lowercase -> (,id) <$> satisfy (\c -> isSymbolChar c && c /= ':') - Uppercase -> (,id) <$> satisfy (== ':') - Don'tCare -> do c1 <- satisfy (\c -> isSymbolChar c) - return (c1, if c1 == ':' then (Uppercase,) else (Lowercase,)) - crest <- many (satisfy isSymbolChar) - let name = c1 : crest - guard (name `notElem` ["..", ":", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]) - guard (take 2 name /= "--") - return (adjoin (Name name)) - -isSymbolChar :: Char -> Bool -isSymbolChar c = (isAscSymbol || isUniSymbol) && not isSpecialExt - where - isSpecialExt = c `elem` "(),;[]`{}_\"'" - isAscSymbol = c `elem` "!#$%&⋆+./<=>?@^|-~:" - isUniSymbol = ord c > 127 && (isSymbol c || isPunctuation c) - - -sepBy1 :: Parser a -> Parser sep -> Parser [a] -sepBy1 p psep = do - x1 <- p - (psep >> (x1 :) <$> sepBy1 p psep) <|> return [x1] - -sepBy :: Parser a -> Parser sep -> Parser [a] -sepBy p psep = sepBy1 p psep <|> return [] - --- | Start a new layout block at the current position. The old layout block is --- restored after completion of this subparser. -startLayoutBlock :: Parser a -> Parser a -startLayoutBlock p = do - ps0 <- get - put (ps0 { psBlkLine = psLine ps0 - , psBlkCol = psCol ps0 }) - res <- p - modify (\ps -> ps { psBlkLine = psBlkLine ps0 - , psBlkCol = psBlkCol ps0 }) - return res - -data Fatality fatal where - Error :: Fatality 'False - Fatal :: Fatality 'True -deriving instance Show (Fatality fatal) - -type family FatalCtx fatal a where - FatalCtx 'False a = a ~ () - FatalCtx 'True a = () - --- | Raise an error with the given fatality and description. -raise :: FatalCtx fatal a => Fatality fatal -> String -> Parser a -raise fat msg = do - Context { ctxFile = fp , ctxStack = stk } <- ask - PS { psLine = line, psCol = col } <- get - let fun = case fat of - Error -> dictate . pure - Fatal -> confess . pure - fun (ErrMsg fp stk line col msg) - -raise' :: Fatality fatal -> String -> Parser () -raise' Error = raise Error -raise' Fatal = raise Fatal - --- | Registers a scope description on the stack for error reporting. -pushContext :: String -> Parser a -> Parser a -pushContext descr = local (\c -> c { ctxStack = descr : ctxStack c }) - -data BlockPos = AtLeft | InBlock - deriving (Show) - --- | Consumes a token at the current position, asserting that we are --- in the position indicated by the 'BlockPos' argument. The token is defined --- by a pure stateful parser. If encountering a newline or EOF, the parser is --- run on this character ('Nothing' for EOF); if this produces a result, the --- result is returned; otherwise, the parser fails. The newline is not consumed. -readToken :: BlockPos -> (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String) -readToken bpos f s0 = do - case bpos of - AtLeft -> assertAtBlockLeft Fatal "Expected token, but found indented expression" - InBlock -> assertInsideBlock Fatal "Expected token, but found end of indented expression" - let loop :: (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String) - loop f' st = do - ps <- get - case psRest ps of - [] | Just (Left res) <- f' st Nothing -> return (res, "") - | otherwise -> empty - '\n' : _ | Just (Left res) <- f' st (Just '\n') -> return (res, "") - c : cs -> case f' st (Just c) of - Nothing -> empty - Just (Left res) -> return (res, "") - Just (Right st') -> do - put (ps { psCol = psCol ps + 1, psRest = cs }) - fmap (c :) <$> loop f' st' - loop f s0 - --- | Consumes all whitespace and comments (including newlines), but only if --- this then leaves the parser inside the current block. If not, this fails. -inlineWhite :: Parser () -inlineWhite = do - skipWhiteComment - whenM (not <$> isInsideBlock) empty - --- | Consumes all whitespace and comments (including newlines). Note: this may --- end outside the current block. -skipWhiteComment :: Parser () -skipWhiteComment = do - inlineSpaces - _ <- many (blockComment >> inlineSpaces) - optional_ lineComment - optional_ (consumeNewline >> skipWhiteComment) - where - -- | Consumes some inline whitespace. Stops before newlines. - inlineSpaces :: Parser () - inlineSpaces = readWhileInline isSpace - --- | Consumes an delimited comment including both end markers. Note: this may --- end outside the current block. -blockComment :: Parser () -blockComment = do - string "{-" -- no need for pKeySym here - let loop = do - readWhileInline (`notElem` "{-") -- "-}" also starts with '-' - asum [string "-}" - ,eof >> raise Error "Unfinished {- -} comment at end of file" - ,blockComment >> loop - ,consumeNewline >> loop] - loop - --- | Consumes a line comment marker and the rest of the line, excluding --- newline. -lineComment :: Parser () -lineComment = do - -- '--!' is an operator, so we need to parse a whole symbol here. - pKeySym "--" - readWhileInline (const True) - --- | Raises an error if we're not currently at the given column. -assertAtBlockLeft :: Fatality fatal -> String -> Parser () -assertAtBlockLeft fat msg = whenM (not <$> isAtBlockLeft) $ raise' fat msg - --- | Raises an error if psCol is not greater than psRefCol. -assertInsideBlock :: Fatality fatal -> String -> Parser () -assertInsideBlock fat msg = whenM (not <$> isInsideBlock) $ raise' fat msg - --- | Raises an error if we're not currently at EOF. -assertEOF :: Fatality fatal -> Parser () -assertEOF fat = gets psRest >>= \case - [] -> return () - _ -> raise' fat "Unexpected stuff" - --- | Returns whether the current position is _within_ the current block, for --- soft-wrapping content. This means that col > blkCol. -isInsideBlock :: Parser Bool -isInsideBlock = do - ps <- get - return $ psLine ps >= psBlkLine ps && psCol ps > psBlkCol ps - --- | Returns whether the current position is at the left border of the block; --- this is for list content such as function definitions or let bindings. This --- means that col == blkCol. -isAtBlockLeft :: Parser Bool -isAtBlockLeft = do - ps <- get - return $ psLine ps >= psBlkLine ps && psCol ps == psBlkCol ps - --- | Consumes characters while the predicate holds or until (and excluding) --- a newline, whichever comes first. -readWhileInline :: (Char -> Bool) -> Parser () -readWhileInline p = do - (taken, rest) <- span (\c -> p c && c /= '\n') <$> gets psRest - modify (\ps -> ps { psCol = psCol ps + length taken - , psRest = rest }) - --- | Consumes exactly one newline at the current position. -consumeNewline :: Parser () -consumeNewline = gets psRest >>= \case - '\n' : rest -> modify (\ps -> ps { psLine = psLine ps + 1 - , psCol = 0 - , psRest = rest }) - _ -> empty - --- | Consumes exactly one character, unequal to newline, at the current position. -satisfy :: (Char -> Bool) -> Parser Char -satisfy p = do - -- traceM "entering satisfy" - r <- gets psRest - -- traceM "got rest" - r `seq` return () - -- traceM "seqd rest" - -- traceM ("rest is " ++ show r) - case r of - c : rest | c /= '\n', p c -> do - modify (\ps -> ps { psCol = psCol ps + 1 - , psRest = rest }) - return c - _ -> empty - --- | Consumes exactly this character at the current position. Must not be a --- newline. -char :: Char -> Parser () -char c = string [c] - --- | Consumes exactly this string at the current position. The string must not --- contain a newline. -string :: String -> Parser () -string s | any (== '\n') s = error "Newline in 'string' argument" -string s = do - ps <- get - if take (length s) (psRest ps) == s - then put (ps { psCol = psCol ps + length s - , psRest = drop (length s) (psRest ps) }) - else empty - -lookAhead :: Parser () -> Parser () -lookAhead p = do - ps <- get - success <- absolve False (True <$ p) - put ps -- restore state, as if nothing happened - when (not success) empty - -notFollowedBy :: Parser () -> Parser () -notFollowedBy p = do - ps <- get - success <- absolve True (False <$ p) - put ps -- restore state, as if nothing happened - when (not success) empty - --- | Only succeeds at EOF. -eof :: Parser () -eof = gets psRest >>= \case [] -> return () - _ -> empty - -whenM :: (Monad m, Monoid a) => m Bool -> m a -> m a -whenM mb mx = mb >>= \b -> if b then mx else return mempty - -optional_ :: Alternative f => f a -> f () -optional_ a = (() <$ a) <|> pure () - -someNE :: Alternative f => f a -> f (NonEmpty a) -someNE a = (:|) <$> a <*> many a diff --git a/hie.yaml b/hie.yaml new file mode 100644 index 0000000..9e856b5 --- /dev/null +++ b/hie.yaml @@ -0,0 +1,12 @@ +cradle: + multi: + - path: "./src" + config: + cradle: + cabal: + component: "exe:hs-visinter" + - path: "./examples" + config: + cradle: + direct: + arguments: [] diff --git a/hs-visinter.cabal b/hs-visinter.cabal index 11d19eb..1f9a39f 100644 --- a/hs-visinter.cabal +++ b/hs-visinter.cabal @@ -18,6 +18,6 @@ executable hs-visinter mtl, monad-chronicle ^>= 1.0.0.1, these - hs-source-dirs: . + hs-source-dirs: src default-language: Haskell2010 ghc-options: -Wall -threaded diff --git a/src/AST.hs b/src/AST.hs new file mode 100644 index 0000000..47652b6 --- /dev/null +++ b/src/AST.hs @@ -0,0 +1,61 @@ +module AST where + +import Data.List.NonEmpty (NonEmpty) + + +newtype Name = Name String + deriving (Show, Eq) + +data Program t = Program [DataDef] [FunDef t] + deriving (Show) + +data DataDef = DataDef Name [Name] [(Name, [Type])] + deriving (Show) + +data FunDef t = FunDef Name (Maybe Type) (NonEmpty (FunEq t)) + deriving (Show) + +data FunEq t = FunEq Name [Pattern t] (RHS t) + deriving (Show) + +data Type + = TApp Type [Type] + | TTup [Type] + | TList Type + | TFun Type Type + | TCon Name + | TVar Name + deriving (Show) + +data Pattern t + = PWildcard t + | PVar t Name + | PAs t Name (Pattern t) + | PCon t Name [Pattern t] + | PList t [Pattern t] + | PTup t [Pattern t] + deriving (Show) + +data RHS t + = Guarded [(Expr t, Expr t)] -- currently not parsed + | Plain (Expr t) + deriving (Show) + +data Expr t + = ELit t Literal + | EVar t Name + | ECon t Name + | EList t [Expr t] + | ETup t [Expr t] + | EApp t (Expr t) [Expr t] + | EOp t (Expr t) Operator (Expr t) + | EIf t (Expr t) (Expr t) (Expr t) + | ECase t (Expr t) [(Pattern t, RHS t)] + | ELet t [FunDef t] (Expr t) + deriving (Show) + +data Literal = LInt Integer | LFloat Rational | LChar Char | LString String + deriving (Show) + +data Operator = OAdd | OSub | OMul | ODiv | OMod | OEqu | OPow + deriving (Show) diff --git a/src/Main.hs b/src/Main.hs new file mode 100644 index 0000000..c9de0cc --- /dev/null +++ b/src/Main.hs @@ -0,0 +1,27 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE TupleSections #-} +module Main where + +import System.Environment (getArgs) +import System.Exit (die, exitFailure) + +import Parser + + +main :: IO () +main = do + (source, fname) <- getArgs >>= \case + [] -> (,"") <$> getContents + [fname] -> (,fname) <$> readFile fname + _ -> die "Usage: hs-visinter [filename.hs]" + + prog <- case parse fname source of + This errs -> do + mapM_ (putStrLn . printErrMsg) errs + exitFailure + These errs res -> do + mapM_ (putStrLn . printErrMsg) errs + return res + That res -> return res + + print prog diff --git a/src/Parser.hs b/src/Parser.hs new file mode 100644 index 0000000..0f0bd0c --- /dev/null +++ b/src/Parser.hs @@ -0,0 +1,824 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE StandaloneDeriving #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE TypeOperators #-} +-- I don't want a warning for 'head' and 'tail' in this file. But I also don't +-- want GHCs before 9.8 to complain that they don't know the x-partial warning. +{-# OPTIONS_GHC -Wno-unrecognised-warning-flags -Wno-x-partial #-} +module Parser ( + parse, + printErrMsg, + -- * Re-exports + These(..), +) where + +import Control.Applicative +import Control.Monad +import Control.Monad.Chronicle +import Control.Monad.Reader +import Control.Monad.State.Lazy +import Data.Bifunctor (first) +import Data.Char +import Data.Either (partitionEithers) +import Data.List.NonEmpty (NonEmpty(..)) +import Data.These +import Data.Tuple (swap) + +-- import Debug.Trace + +import AST + + +-- Positions are zero-based in both dimensions. +-- See 'isInsideBlock' and 'isAtBlockLeft' for the two relevant "inside the +-- block" conditions. +data PS = PS + { psBlkLine :: Int -- ^ Start line of current layout block + , psBlkCol :: Int -- ^ Start column of current layout block + , psLine :: Int -- ^ Current line + , psCol :: Int -- ^ Current column + , psRest :: String -- ^ Rest of the input + } + deriving (Show) + +data Context = Context + { ctxFile :: FilePath + , ctxStack :: [String] -- ^ Stack of syntax scopes, for error reporting + } + deriving (Show) + +-- ReaderT Context (ChronicleT [ErrMsg] (State PS) a) +-- Context -> ChronicleT [ErrMsg] (State PS) a +-- Context -> State PS (These [ErrMsg] a) +-- Context -> PS -> Identity (These [ErrMsg] a, PS) +-- Context -> PS -> (These [ErrMsg] a, PS) +-- whereas I want: +-- Context -> PS -> These [ErrMsg] (a, PS) +-- which is not any transformer stack, but a new monad. +newtype Parser a = Parser { runParser :: Context -> PS -> These [ErrMsg] (PS, a) } + +instance Functor Parser where + fmap f (Parser g) = Parser (\ctx ps -> fmap (fmap f) (g ctx ps)) + +instance Applicative Parser where + pure x = Parser (\_ ps -> That (ps, x)) + (<*>) = ap + +instance Monad Parser where + Parser g >>= f = Parser $ \ctx ps -> + case g ctx ps of + This errs -> This errs + That (ps', x) -> runParser (f x) ctx ps' + These errs (ps', x) -> case runParser (f x) ctx ps' of + This errs' -> This (errs <> errs') + That res -> These errs res + These errs' res -> These (errs <> errs') res + +instance Alternative Parser where + empty = Parser (\_ _ -> This []) + Parser f <|> Parser g = Parser $ \ctx ps -> + case f ctx ps of + This _ -> g ctx ps + success -> success + +instance MonadState PS Parser where + state f = Parser $ \_ ps -> That (swap (f ps)) + +instance MonadReader Context Parser where + reader f = Parser $ \ctx ps -> That (ps, f ctx) + local f (Parser g) = Parser (g . f) + +instance MonadChronicle [ErrMsg] Parser where + dictate errs = Parser (\_ ps -> These errs (ps, ())) + confess errs = Parser (\_ _ -> This errs) + memento (Parser f) = Parser (\ctx ps -> case f ctx ps of + This errs -> That (ps, Left errs) + That res -> That (Right <$> res) + These errs res -> These errs (Right <$> res)) + absolve def (Parser f) = Parser (\ctx ps -> case f ctx ps of + This _ -> That (ps, def) + success -> success) + condemn (Parser f) = Parser (\ctx ps -> case f ctx ps of + These errs _ -> This errs + res -> res) + retcon g (Parser f) = Parser (\ctx ps -> first g (f ctx ps)) + chronicle th = Parser (\_ ps -> (ps,) <$> th) + +-- Positions are zero-based in both dimensions +data ErrMsg = ErrMsg + { errFile :: FilePath + , errStk :: [String] + , errLine :: Int + , errCol :: Int + , errMsg :: String } + deriving (Show) + +printErrMsg :: ErrMsg -> String +printErrMsg (ErrMsg fp stk y x s) = + unlines (map (\descr -> "In " ++ descr ++ ":") (reverse stk)) ++ + fp ++ ":" ++ show (y + 1) ++ ":" ++ show (x + 1) ++ ": " ++ s + +parse :: FilePath -> String -> These [ErrMsg] (Program ()) +parse fp source = fmap snd $ runParser pProgram (Context fp []) (PS 0 0 0 0 source) + +pProgram :: Parser (Program ()) +pProgram = do + defs <- many pTopDef + let (datadefs, fundefs) = partitionEithers defs + skipWhiteComment + assertEOF Error + return (Program datadefs fundefs) + +pTopDef :: Parser (Either DataDef (FunDef ())) +pTopDef = do + skipWhiteComment + Left <$> pDataDef0 <|> Right <$> pFunDef0 + +pDataDef0 :: Parser DataDef +pDataDef0 = do + pKeyword "data" + inlineWhite + name <- pIdentifier0 InBlock Uppercase + params <- many (inlineWhite >> pIdentifier0 InBlock Lowercase) + cons <- pDatacons "=" + return (DataDef name params cons) + where + pDatacons :: String -> Parser [(Name, [Type])] + pDatacons leader = do + inlineWhite + pKeySym leader + inlineWhite + name <- pIdentifier0 InBlock Uppercase + fields <- many pTypeAtom + rest <- pDatacons "|" <|> return [] + return ((name, fields) : rest) + +pFunDef0 :: Parser (FunDef ()) +pFunDef0 = do + mtypesig <- optional pStandaloneTypesig0 + let mname = fst <$> mtypesig + mtype = snd <$> mtypesig + (clauses, name) <- someClauses mname + return (FunDef name mtype clauses) + where + someClauses :: Maybe Name -> Parser (NonEmpty (FunEq ()), Name) + someClauses Nothing = do + clause@(FunEq name _ _) <- pFunEq Nothing + (,name) . (clause :|) <$> many (pFunEq (Just name)) + someClauses (Just name) = (,name) <$> someNE (pFunEq (Just name)) + +-- | Given the name of the type signature, if any. +pFunEq :: Maybe Name -> Parser (FunEq ()) +pFunEq mCheckName = do + skipWhiteComment + assertAtBlockLeft Fatal "Expected function clause, found indented stuff" + + name <- pIdentifier0 AtLeft Lowercase + case mCheckName of + Just checkName | name /= checkName -> + raise Fatal "Name of function clause does not correspond with type signature" + _ -> return () + + pats <- many (pPattern 11) + rhs <- pRHS "=" + return (FunEq name pats rhs) + +-- | Pass "=" for function definitions and "->" for case clauses. +pRHS :: String -> Parser (RHS ()) +pRHS sepsym = do + -- TODO: parse guards + inlineWhite + pKeySym sepsym + Plain <$> pExpr + +pPattern :: Int -> Parser (Pattern ()) +pPattern d = inlineWhite >> pPattern0 d + +pPattern0 :: Int -> Parser (Pattern ()) +pPattern0 d = do + asum [pPatWildcard0 + ,pPatVarOrAs0 + ,pPatCon0 + ,pPatList0 + ,pPatParens0] + where + pPatWildcard0 = pKeySym "_" >> return (PWildcard ()) + pPatVarOrAs0 = do + var <- pIdentifier0 InBlock Lowercase + asum [do inlineWhite + pKeySym "@" + p <- pPattern 11 + return (PAs () var p) + ,return (PVar () var)] + pPatCon0 = do + con <- pIdentifier0 InBlock Uppercase + if d > 0 + then return (PCon () con []) + else do args <- many (pPattern 11) + return (PCon () con args) + pPatList0 = do + char '[' -- special syntax, no need for pKeySym + ps <- pPattern 0 `sepBy` (inlineWhite >> char ',') + inlineWhite + char ']' + return (PList () ps) + pPatParens0 = do + char '(' + inlineWhite + asum [do char ')' + return (PTup () []) + ,do p <- pPattern0 0 + inlineWhite + asum [do char ')' + return p + ,do char ',' + ps <- pPattern 0 `sepBy1` (inlineWhite >> char ',') + return (PTup () (p : ps))]] + +pExpr :: Parser (Expr ()) +pExpr = do + inlineWhite + -- basics: lit, list, var, con, tup + -- expression atom: application of basics + -- expression parser: op + -- around: let, case, if + asum [pELet0 + ,pECase0 + ,pEIf0 + ,pExprOpExpr0 0] + +pELet0 :: Parser (Expr ()) +pELet0 = do + pKeyword "let" + inlineWhite + startLayoutBlock $ do + -- The first occurrence is also going to skip whitespace in front, + -- which is redundant -- but not harmful. + defs <- many $ do + skipWhiteComment + -- Note: now not necessarily in the indented block. Which is + -- precisely what we need here. If we see "in", let the 'many' + -- choice fail so that the defs loop ends. But let it fail outside + -- this asum so that it is the many that picks it up, not this + -- asum. + res <- asum [Nothing <$ lookAhead (pKeyword "in") + ,Just <$> pFunDef0] + case res of + Nothing -> empty + Just def -> return def + inlineWhite + body <- pExpr + return (ELet () defs body) + +pECase0 :: Parser (Expr ()) +pECase0 = do + pKeyword "case" + e <- pExpr + inlineWhite + pKeyword "of" + inlineWhite + startLayoutBlock $ do + -- The first clause is going to skip whitespace, but that's harmless + -- (though redundant). + let pClause = do + skipWhiteComment + whenM (not <$> isInsideBlock) (() <$ empty) + pat <- pPattern0 0 + rhs <- pRHS "->" + return (pat, rhs) + clauses <- many pClause + return (ECase () e clauses) + +pEIf0 :: Parser (Expr ()) +pEIf0 = do + pKeyword "if" + e1 <- pExpr + inlineWhite + pKeyword "then" + e2 <- pExpr + inlineWhite + pKeyword "else" + e3 <- pExpr + return (EIf () e1 e2 e3) + +pExprOpExpr :: Int -> Parser (Expr ()) +pExprOpExpr d = inlineWhite >> pExprOpExpr0 d + +pExprOpExpr0 :: Int -> Parser (Expr ()) +pExprOpExpr0 d = do + e0 <- pEApp0 + climbRight e0 Nothing + where + climbRight :: Expr () -> Maybe ParsedOperator -> Parser (Expr ()) + climbRight lhs mlhsop = do + asum [do paop@(PaOp op d2 a2) <- pInfixOp + guard (d2 >= d) -- respect global minimum precedence + case mlhsop of -- check operator compatibility + Just (PaOp _ d1 a1) -> + guard (d1 > d2 || (d1 == d2 && a1 == a2 && a1 /= AssocNone)) + Nothing -> + return () + let oprhsd = case a2 of AssocRight -> d2 ; _ -> d2 + 1 + rhs <- pExprOpExpr oprhsd + climbRight (EOp () lhs op rhs) (Just paop) + ,return lhs] + +pEApp0 :: Parser (Expr ()) +pEApp0 = do + e1 <- pEAtom0 + es <- many (inlineWhite >> pEAtom0) + case es of + [] -> return e1 + _ -> return (EApp () e1 es) + +pEAtom0 :: Parser (Expr ()) +pEAtom0 = (ELit () <$> pLiteral0) <|> pEList0 <|> pEVar0 <|> pECon0 <|> pEParens0 + +pLiteral0 :: Parser Literal +pLiteral0 = asum + [do as <- some (satisfy isDigit) + let a = read as :: Integer + asum + [do char '.' + bs <- some (satisfy isDigit) + let b = read bs :: Integer + cs <- optional $ do + char 'e' + cs <- some (satisfy isDigit) + return cs + let c = maybe 0 read cs :: Integer + return (LFloat ((fromIntegral a + fromIntegral b / 10 ^ length bs) * 10 ^ c)) + ,return (LInt a)] + ,do char '\'' + c <- pStringChar + char '\'' + return (LChar c) + ,do char '"' + s <- many pStringChar + char '"' + return (LString s)] + +pStringChar :: Parser Char +pStringChar = asum + [do char '\\' + char 'x' + let hexdig = do + c <- satisfy $ \c' -> + let c = toLower c' + in 'a' <= c && c <= 'f' || '0' <= c && c <= '9' + return $ if 'a' <= c then 10 + ord c - ord 'a' + else ord c - ord '0' + digs <- some hexdig + return (chr (sum (zipWith (*) (reverse digs) (iterate (*16) 1)))) + ,do char '\\' + satisfy (const True) >>= \case + 'n' -> return '\n' + 'r' -> return '\r' + 't' -> return '\t' + 'a' -> return '\a' + 'b' -> return '\b' + '\'' -> return '\'' + '\"' -> return '\"' + '0' -> return '\0' + c -> do raise Error $ "Invalid escape sequence: \\" ++ [c] + return '?' + ,do satisfy (\c -> c `notElem` "\n\r\\\'")] + +pEList0 :: Parser (Expr ()) +pEList0 = do + char '[' -- special syntax, no need for pKeySym + es <- sepBy pExpr (inlineWhite >> char ',') + inlineWhite + char ']' + return (EList () es) + +pEVar0 :: Parser (Expr ()) +pEVar0 = EVar () <$> pIdentifier0 InBlock Lowercase + +pECon0 :: Parser (Expr ()) +pECon0 = ECon () <$> pIdentifier0 InBlock Uppercase + +pEParens0 :: Parser (Expr ()) +pEParens0 = do + char '(' + e <- pExpr + inlineWhite + char ')' + return e + +data Associativity = AssocLeft | AssocRight | AssocNone + deriving (Show, Eq) + +data ParsedOperator = PaOp Operator Int Associativity + deriving (Show) + +pInfixOp :: Parser ParsedOperator +pInfixOp = do + inlineWhite + asum [PaOp OEqu 4 AssocNone <$ pKeySym "==" + ,PaOp OAdd 6 AssocLeft <$ pKeySym "+" + ,PaOp OSub 6 AssocLeft <$ pKeySym "-" + ,PaOp OMul 7 AssocLeft <$ pKeySym "*" + ,PaOp ODiv 7 AssocLeft <$ pKeySym "/" + ,PaOp OMod 7 AssocLeft <$ pKeySym "%" + ,PaOp OPow 8 AssocRight <$ pKeySym "^" + ] + +pStandaloneTypesig0 :: Parser (Name, Type) +pStandaloneTypesig0 = do + assertAtBlockLeft Fatal "Expected top-level definition, found indented stuff" + name@(Name namestr) <- pIdentifier0 AtLeft Lowercase + inlineWhite + pKeySym "::" + pushContext ("type signature for '" ++ namestr ++ "'") $ do + ty <- pType + return (name, ty) + +pType :: Parser Type +pType = do + ty1 <- pTypeApp + asum [do inlineWhite + pKeySym "->" + ty2 <- pType + return (TFun ty1 ty2) + ,return ty1] + +pTypeApp :: Parser Type +pTypeApp = many pTypeAtom >>= \case + [] -> raise Fatal "Expected type" + [t] -> return t + t:ts -> return (TApp t ts) + +pTypeAtom :: Parser Type +pTypeAtom = pTypeParens <|> pTypeList <|> pTypeName + where + pTypeParens = do + inlineWhite + char '(' + asum [do inlineWhite + char ')' + return (TTup []) + ,do ty1 <- pType + ty2s <- many $ do + inlineWhite + char ',' + pType + inlineWhite + char ')' + case ty2s of + [] -> return ty1 + _ -> return (TTup (ty1 : ty2s))] + + pTypeList = do + inlineWhite + char '[' + ty <- pType + char ']' + return (TList ty) + + pTypeName = do + inlineWhite + (cs, name) <- pIdentifier0 InBlock Don'tCare + case cs of + Uppercase -> return (TCon name) + Lowercase -> return (TVar name) + +-- | Parse the given name-like keyword, ensuring that it is the entire word. +pKeyword :: String -> Parser () +pKeyword s = do + string s + notFollowedBy (() <$ satisfy isNameContChar) + +-- | Parse the given symbol-like keyword, ensuring that it is the entire symbol. +pKeySym :: String -> Parser () +pKeySym s = do + string s + notFollowedBy (() <$ satisfy isSymbolChar) + +data Case care where + Uppercase :: Case 'True + Lowercase :: Case 'True + Don'tCare :: Case 'False +deriving instance Show (Case care) + +type family WithCaseOutput care a where + WithCaseOutput 'True a = a + WithCaseOutput 'False a = (Case 'True, a) + +-- | Consumes an identifier (word or parenthesised operator) at the current +-- position. The `var` production in Haskell2010. +-- var -> varid | "(" varsym ")" +pIdentifier0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name) +pIdentifier0 bpos cs = pAlphaName0 bpos cs <|> pParens0 (pSymbol0 bpos cs) + where + -- | Parser between parens, with the opening paren at the current position. + pParens0 :: Parser a -> Parser a + pParens0 p = do + char '(' + inlineWhite + res <- p + inlineWhite + char ')' + return res + +-- | Consumes a word-like name at the current position with the given case. The +-- `varid` production in Haskell2010 for 'Lowercase', `conid' for 'Uppercase'. +-- +-- > varid -> (small {small | large | digit | "'"}) without reservedid +pAlphaName0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name) +pAlphaName0 bpos cs = do + (_, s) <- readToken + bpos + (\atfst mc -> case (atfst, mc) of + (True , Just c) | isNameHeadChar c -> Just (Right False) + (True , _ ) -> Nothing + (False, Just c) | isNameContChar c -> Just (Right False) + (False, _ ) -> Just (Left ())) + True + (name, adjoin) <- case cs of + Uppercase + | isLower (head s) -> do + raise Error "Unexpected uppercase word at this position, assuming typo" + return (toUpper (head s) : tail s, id) + | otherwise -> return (s, id) + Lowercase + | isUpper (head s) -> do + raise Error "Unexpected lowercase word at this position, assuming typo" + return (toLower (head s) : tail s, id) + | otherwise -> return (s, id) + Don'tCare + | isLower (head s) -> return (s, (Lowercase,)) + | otherwise -> return (s, (Lowercase,)) + guard (name `notElem` ["case", "class", "data", "default", "deriving", "do", "else" + ,"foreign", "if", "import", "in", "infix", "infixl" + ,"infixr", "instance", "let", "module", "newtype", "of" + ,"then", "type", "where", "_"]) + return (adjoin (Name name)) + +isNameHeadChar :: Char -> Bool +isNameHeadChar c = isLetter c || c == '_' + +isNameContChar :: Char -> Bool +isNameContChar c = isNameHeadChar c || isDigit c || c == '\'' + +-- | Consumes a symbol at the current position. The `varsym` production in +-- Haskell2010 for 'Lowercase', `consym` otherwise, and either if 'Don'tCare'. +-- +-- > varsym -> ((symbol without ":") {symbol}) without (reservedop | dashes) +-- > consym -> (":" {symbol}) without reservedop +-- > symbol -> ascSymbol | uniSymbol without (special | "_" | "\"" | "'") +-- > ascSymbol -> ```!#$%&⋆+./<=>?@^|-~:``` +-- > uniSymbol -> unicode symbol or punctuation +-- > dashes -> "--" {"-"} +-- > special -> ```(),;[]`{}``` +-- > reservedop -> ".." | ":" | "::" | "=" | "\" | "|" | "<-" | "->" | "@" | "~" | "=>" +pSymbol0 :: BlockPos -> Case care -> Parser (WithCaseOutput care Name) +pSymbol0 bpos cs = do + case bpos of + AtLeft -> assertAtBlockLeft Fatal "Expected symbol, but found indented expression" + InBlock -> assertInsideBlock Fatal "Expected symbol, but found end of indented expression" + (c1, adjoin) <- + case cs of Lowercase -> (,id) <$> satisfy (\c -> isSymbolChar c && c /= ':') + Uppercase -> (,id) <$> satisfy (== ':') + Don'tCare -> do c1 <- satisfy (\c -> isSymbolChar c) + return (c1, if c1 == ':' then (Uppercase,) else (Lowercase,)) + crest <- many (satisfy isSymbolChar) + let name = c1 : crest + guard (name `notElem` ["..", ":", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]) + guard (take 2 name /= "--") + return (adjoin (Name name)) + +isSymbolChar :: Char -> Bool +isSymbolChar c = (isAscSymbol || isUniSymbol) && not isSpecialExt + where + isSpecialExt = c `elem` "(),;[]`{}_\"'" + isAscSymbol = c `elem` "!#$%&⋆+./<=>?@^|-~:" + isUniSymbol = ord c > 127 && (isSymbol c || isPunctuation c) + + +sepBy1 :: Parser a -> Parser sep -> Parser [a] +sepBy1 p psep = do + x1 <- p + (psep >> (x1 :) <$> sepBy1 p psep) <|> return [x1] + +sepBy :: Parser a -> Parser sep -> Parser [a] +sepBy p psep = sepBy1 p psep <|> return [] + +-- | Start a new layout block at the current position. The old layout block is +-- restored after completion of this subparser. +startLayoutBlock :: Parser a -> Parser a +startLayoutBlock p = do + ps0 <- get + put (ps0 { psBlkLine = psLine ps0 + , psBlkCol = psCol ps0 }) + res <- p + modify (\ps -> ps { psBlkLine = psBlkLine ps0 + , psBlkCol = psBlkCol ps0 }) + return res + +data Fatality fatal where + Error :: Fatality 'False + Fatal :: Fatality 'True +deriving instance Show (Fatality fatal) + +type family FatalCtx fatal a where + FatalCtx 'False a = a ~ () + FatalCtx 'True a = () + +-- | Raise an error with the given fatality and description. +raise :: FatalCtx fatal a => Fatality fatal -> String -> Parser a +raise fat msg = do + Context { ctxFile = fp , ctxStack = stk } <- ask + PS { psLine = line, psCol = col } <- get + let fun = case fat of + Error -> dictate . pure + Fatal -> confess . pure + fun (ErrMsg fp stk line col msg) + +raise' :: Fatality fatal -> String -> Parser () +raise' Error = raise Error +raise' Fatal = raise Fatal + +-- | Registers a scope description on the stack for error reporting. +pushContext :: String -> Parser a -> Parser a +pushContext descr = local (\c -> c { ctxStack = descr : ctxStack c }) + +data BlockPos = AtLeft | InBlock + deriving (Show) + +-- | Consumes a token at the current position, asserting that we are +-- in the position indicated by the 'BlockPos' argument. The token is defined +-- by a pure stateful parser. If encountering a newline or EOF, the parser is +-- run on this character ('Nothing' for EOF); if this produces a result, the +-- result is returned; otherwise, the parser fails. The newline is not consumed. +readToken :: BlockPos -> (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String) +readToken bpos f s0 = do + case bpos of + AtLeft -> assertAtBlockLeft Fatal "Expected token, but found indented expression" + InBlock -> assertInsideBlock Fatal "Expected token, but found end of indented expression" + let loop :: (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String) + loop f' st = do + ps <- get + case psRest ps of + [] | Just (Left res) <- f' st Nothing -> return (res, "") + | otherwise -> empty + '\n' : _ | Just (Left res) <- f' st (Just '\n') -> return (res, "") + c : cs -> case f' st (Just c) of + Nothing -> empty + Just (Left res) -> return (res, "") + Just (Right st') -> do + put (ps { psCol = psCol ps + 1, psRest = cs }) + fmap (c :) <$> loop f' st' + loop f s0 + +-- | Consumes all whitespace and comments (including newlines), but only if +-- this then leaves the parser inside the current block. If not, this fails. +inlineWhite :: Parser () +inlineWhite = do + skipWhiteComment + whenM (not <$> isInsideBlock) empty + +-- | Consumes all whitespace and comments (including newlines). Note: this may +-- end outside the current block. +skipWhiteComment :: Parser () +skipWhiteComment = do + inlineSpaces + _ <- many (blockComment >> inlineSpaces) + optional_ lineComment + optional_ (consumeNewline >> skipWhiteComment) + where + -- | Consumes some inline whitespace. Stops before newlines. + inlineSpaces :: Parser () + inlineSpaces = readWhileInline isSpace + +-- | Consumes an delimited comment including both end markers. Note: this may +-- end outside the current block. +blockComment :: Parser () +blockComment = do + string "{-" -- no need for pKeySym here + let loop = do + readWhileInline (`notElem` "{-") -- "-}" also starts with '-' + asum [string "-}" + ,eof >> raise Error "Unfinished {- -} comment at end of file" + ,blockComment >> loop + ,consumeNewline >> loop] + loop + +-- | Consumes a line comment marker and the rest of the line, excluding +-- newline. +lineComment :: Parser () +lineComment = do + -- '--!' is an operator, so we need to parse a whole symbol here. + pKeySym "--" + readWhileInline (const True) + +-- | Raises an error if we're not currently at the given column. +assertAtBlockLeft :: Fatality fatal -> String -> Parser () +assertAtBlockLeft fat msg = whenM (not <$> isAtBlockLeft) $ raise' fat msg + +-- | Raises an error if psCol is not greater than psRefCol. +assertInsideBlock :: Fatality fatal -> String -> Parser () +assertInsideBlock fat msg = whenM (not <$> isInsideBlock) $ raise' fat msg + +-- | Raises an error if we're not currently at EOF. +assertEOF :: Fatality fatal -> Parser () +assertEOF fat = gets psRest >>= \case + [] -> return () + _ -> raise' fat "Unexpected stuff" + +-- | Returns whether the current position is _within_ the current block, for +-- soft-wrapping content. This means that col > blkCol. +isInsideBlock :: Parser Bool +isInsideBlock = do + ps <- get + return $ psLine ps >= psBlkLine ps && psCol ps > psBlkCol ps + +-- | Returns whether the current position is at the left border of the block; +-- this is for list content such as function definitions or let bindings. This +-- means that col == blkCol. +isAtBlockLeft :: Parser Bool +isAtBlockLeft = do + ps <- get + return $ psLine ps >= psBlkLine ps && psCol ps == psBlkCol ps + +-- | Consumes characters while the predicate holds or until (and excluding) +-- a newline, whichever comes first. +readWhileInline :: (Char -> Bool) -> Parser () +readWhileInline p = do + (taken, rest) <- span (\c -> p c && c /= '\n') <$> gets psRest + modify (\ps -> ps { psCol = psCol ps + length taken + , psRest = rest }) + +-- | Consumes exactly one newline at the current position. +consumeNewline :: Parser () +consumeNewline = gets psRest >>= \case + '\n' : rest -> modify (\ps -> ps { psLine = psLine ps + 1 + , psCol = 0 + , psRest = rest }) + _ -> empty + +-- | Consumes exactly one character, unequal to newline, at the current position. +satisfy :: (Char -> Bool) -> Parser Char +satisfy p = do + -- traceM "entering satisfy" + r <- gets psRest + -- traceM "got rest" + r `seq` return () + -- traceM "seqd rest" + -- traceM ("rest is " ++ show r) + case r of + c : rest | c /= '\n', p c -> do + modify (\ps -> ps { psCol = psCol ps + 1 + , psRest = rest }) + return c + _ -> empty + +-- | Consumes exactly this character at the current position. Must not be a +-- newline. +char :: Char -> Parser () +char c = string [c] + +-- | Consumes exactly this string at the current position. The string must not +-- contain a newline. +string :: String -> Parser () +string s | any (== '\n') s = error "Newline in 'string' argument" +string s = do + ps <- get + if take (length s) (psRest ps) == s + then put (ps { psCol = psCol ps + length s + , psRest = drop (length s) (psRest ps) }) + else empty + +lookAhead :: Parser () -> Parser () +lookAhead p = do + ps <- get + success <- absolve False (True <$ p) + put ps -- restore state, as if nothing happened + when (not success) empty + +notFollowedBy :: Parser () -> Parser () +notFollowedBy p = do + ps <- get + success <- absolve True (False <$ p) + put ps -- restore state, as if nothing happened + when (not success) empty + +-- | Only succeeds at EOF. +eof :: Parser () +eof = gets psRest >>= \case [] -> return () + _ -> empty + +whenM :: (Monad m, Monoid a) => m Bool -> m a -> m a +whenM mb mx = mb >>= \b -> if b then mx else return mempty + +optional_ :: Alternative f => f a -> f () +optional_ a = (() <$ a) <|> pure () + +someNE :: Alternative f => f a -> f (NonEmpty a) +someNE a = (:|) <$> a <*> many a -- cgit v1.2.3-70-g09d2