{-# LANGUAGE LambdaCase #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE TupleSections #-} module Parser where import Control.Applicative import Control.Monad.Chronicle import Control.Monad.Reader import Control.Monad.State.Lazy import Data.Bifunctor (first) import Data.Char import Data.These import Data.Tuple (swap) import Debug.Trace import AST -- Positions are zero-based in both dimensions. -- See 'isInsideBlock' and 'isAtBlockLeft' for the two relevant "inside the -- block" conditions. data PS = PS { psBlkLine :: Int -- ^ Start line of current layout block , psBlkCol :: Int -- ^ Start column of current layout block , psLine :: Int -- ^ Current line , psCol :: Int -- ^ Current column , psRest :: String -- ^ Rest of the input } deriving (Show) data Context = Context { ctxFile :: FilePath , ctxStack :: [String] -- ^ Stack of syntax scopes, for error reporting } deriving (Show) -- ReaderT Context (ChronicleT [ErrMsg] (State PS) a) -- Context -> ChronicleT [ErrMsg] (State PS) a -- Context -> State PS (These [ErrMsg] a) -- Context -> PS -> Identity (These [ErrMsg] a, PS) -- Context -> PS -> (These [ErrMsg] a, PS) -- whereas I want: -- Context -> PS -> These [ErrMsg] (a, PS) -- which is not any transformer stack, but a new monad. newtype Parser a = Parser { runParser :: Context -> PS -> These [ErrMsg] (PS, a) } instance Functor Parser where fmap f (Parser g) = Parser (\ctx ps -> fmap (fmap f) (g ctx ps)) instance Applicative Parser where pure x = Parser (\_ ps -> That (ps, x)) (<*>) = ap instance Monad Parser where Parser g >>= f = Parser $ \ctx ps -> case g ctx ps of This errs -> This errs That (ps', x) -> runParser (f x) ctx ps' These errs (ps', x) -> case runParser (f x) ctx ps' of This errs' -> This (errs <> errs') That res -> These errs res These errs' res -> These (errs <> errs') res instance Alternative Parser where empty = Parser (\_ _ -> This mempty) Parser f <|> Parser g = Parser $ \ctx ps -> case f ctx ps of This _ -> g ctx ps success -> success instance MonadState PS Parser where state f = Parser $ \_ ps -> That (swap (f ps)) instance MonadReader Context Parser where reader f = Parser $ \ctx ps -> That (ps, f ctx) local f (Parser g) = Parser (g . f) instance MonadChronicle [ErrMsg] Parser where dictate errs = Parser (\_ ps -> These errs (ps, ())) confess errs = Parser (\_ _ -> This errs) memento (Parser f) = Parser (\ctx ps -> case f ctx ps of This errs -> That (ps, Left errs) That res -> That (Right <$> res) These errs res -> These errs (Right <$> res)) absolve def (Parser f) = Parser (\ctx ps -> case f ctx ps of This _ -> That (ps, def) success -> success) condemn (Parser f) = Parser (\ctx ps -> case f ctx ps of These errs _ -> This errs res -> res) retcon g (Parser f) = Parser (\ctx ps -> first g (f ctx ps)) chronicle th = Parser (\_ ps -> (ps,) <$> th) -- Positions are zero-based in both dimensions data ErrMsg = ErrMsg { errFile :: FilePath , errStk :: [String] , errLine :: Int , errCol :: Int , errMsg :: String } deriving (Show) printErrMsg :: ErrMsg -> String printErrMsg (ErrMsg fp stk y x s) = unlines (map (\descr -> "In " ++ descr ++ ":") (reverse stk)) ++ fp ++ ":" ++ show (y + 1) ++ ":" ++ show (x + 1) ++ ": " ++ s parse :: FilePath -> String -> These [ErrMsg] (Program ()) parse fp source = fmap snd $ runParser pProgram (Context fp []) (PS 0 0 0 0 source) pProgram :: Parser (Program ()) pProgram = do prog <- Program <$> many pFunDef skipWhiteComment assertEOF Error return prog pFunDef :: Parser (FunDef ()) pFunDef = do skipWhiteComment mtypesig <- optional pStandaloneTypesig0 let mname = fst <$> mtypesig mtype = snd <$> mtypesig (clauses, name) <- someClauses mname return (FunDef name mtype clauses) where someClauses :: Maybe Name -> Parser ([FunEq ()], Name) someClauses Nothing = do clause@(FunEq name _ _) <- pFunEq Nothing (,name) . (clause:) <$> many (pFunEq (Just name)) someClauses (Just name) = (,name) <$> some (pFunEq (Just name)) -- | Given the name of the type signature, if any. pFunEq :: Maybe Name -> Parser (FunEq ()) pFunEq mCheckName = do skipWhiteComment assertAtBlockLeft Fatal "Expected function clause, found indented stuff" name <- pIdentifier0 Lowercase case mCheckName of Just checkName | name /= checkName -> raise Fatal "Name of function clause does not correspond with type signature" _ -> return () pats <- many (pPattern 11) rhs <- pRHS return (FunEq name pats rhs) pRHS :: Parser (RHS ()) pRHS = do -- TODO: parse guards inlineWhite string "=" Plain <$> pExpr pPattern :: Int -> Parser (Pattern ()) pPattern d = inlineWhite >> pPattern0 d pPattern0 :: Int -> Parser (Pattern ()) pPattern0 d = do asum [pPatWildcard0 ,pPatVarOrAs0 ,pPatCon0 ,pPatList0 ,pPatParens0] where pPatWildcard0 = string "_" >> return (PWildcard ()) pPatVarOrAs0 = do var <- pIdentifier0 Lowercase asum [do inlineWhite string "@" p <- pPattern 11 return (PAs () var p) ,return (PVar () var)] pPatCon0 = do con <- pIdentifier0 Uppercase if d > 0 then return (PCon () con []) else do args <- many (pPattern 11) return (PCon () con args) pPatList0 = do string "[" ps <- pPattern 0 `sepBy` (inlineWhite >> string ",") inlineWhite string "]" return (PList () ps) pPatParens0 = do string "(" inlineWhite asum [do string ")" return (PTup () []) ,do p <- pPattern0 0 inlineWhite asum [do string ")" return p ,do string "," ps <- pPattern 0 `sepBy1` (inlineWhite >> string ",") return (PTup () (p : ps))]] pExpr :: Parser (Expr ()) pExpr = do inlineWhite -- basics: lit, list, tup -- expression atom: application of basics -- expression parser: op -- around: let, case, if asum [pELet0 ,pECase0 ,pEIf0 ,pExprOpExpr0 0] pExprOpExpr :: Int -> Parser (Expr ()) pExprOpExpr d = inlineWhite >> pExprOpExpr0 d pExprOpExpr0 :: Int -> Parser (Expr ()) pExprOpExpr0 d = do e0 <- pEApp0 climbRight e0 Nothing where climbRight :: Expr () -> Maybe ParsedOperator -> Parser (Expr ()) climbRight lhs mlhsop = do asum [do paop@(PaOp op d2 a2) <- pInfixOp guard (d2 >= d) -- respect global minimum precedence case mlhsop of -- check operator compatibility Just (PaOp _ d1 a1) -> guard (d1 > d2 || (d1 == d2 && a1 == a2 && a1 /= AssocNone)) Nothing -> return () let oprhsd = case a2 of AssocRight -> d2 ; _ -> d2 + 1 rhs <- pExprOpExpr oprhsd climbRight (EOp () lhs op rhs) (Just paop)] pEApp0 :: Parser (Expr ()) pEApp0 = do e1 <- pEAtom0 es <- many (inlineWhite >> pEAtom0) case es of [] -> return e1 _ -> return (EApp () e1 es) pEAtom0 :: Parser (Expr ()) pEAtom0 = pELit <|> pEList <|> pEParens data Associativity = AssocLeft | AssocRight | AssocNone deriving (Show, Eq) data ParsedOperator = PaOp Operator Int Associativity deriving (Show) pInfixOp :: Parser ParsedOperator pInfixOp = do inlineWhite asum [PaOp OEqu 4 AssocNone <$ string "==" ,PaOp OAdd 6 AssocLeft <$ string "+" ,PaOp OSub 6 AssocLeft <$ string "-" ,PaOp OMul 7 AssocLeft <$ string "*" ,PaOp ODiv 7 AssocLeft <$ string "/" ,PaOp OMod 7 AssocLeft <$ string "%" ,PaOp OPow 8 AssocRight <$ string "^" ] pStandaloneTypesig0 :: Parser (Name, Type) pStandaloneTypesig0 = do assertAtBlockLeft Fatal "Expected top-level definition, found indented stuff" name@(Name namestr) <- pIdentifier0 Lowercase inlineWhite string "::" pushContext ("type signature for '" ++ namestr ++ "'") $ do ty <- pType return (name, ty) pType :: Parser Type pType = do ty1 <- pTypeApp asum [do inlineWhite string "->" ty2 <- pType return (TFun ty1 ty2) ,return ty1] pTypeApp :: Parser Type pTypeApp = many pTypeAtom >>= \case [] -> raise Error "Expected type" >> return (TTup []) [t] -> return t t:ts -> return (TApp t ts) pTypeAtom :: Parser Type pTypeAtom = pTypeParens <|> pTypeList <|> pTypeCon <|> pTypeVar where pTypeParens = do inlineWhite string "(" asum [do inlineWhite string ")" return (TTup []) ,do ty1 <- pType ty2s <- many $ do inlineWhite string "," pType inlineWhite string ")" case ty2s of [] -> return ty1 _ -> return (TTup (ty1 : ty2s))] pTypeList = do inlineWhite string "[" ty <- pType string "]" return (TList ty) pTypeCon = inlineWhite >> TCon <$> pIdentifier0 Uppercase pTypeVar = inlineWhite >> TVar <$> pIdentifier0 Lowercase data Case = Uppercase | Lowercase deriving (Show) -- | Consumes an identifier (word or parenthesised operator) at the current -- position. The `var` production in Haskell2010. -- var -> varid | "(" varsym ")" pIdentifier0 :: Case -> Parser Name pIdentifier0 cs = pAlphaName0 cs <|> pParens0 (pSymbol0 cs) -- | Consumes a word-like name at the current position with the given case. The -- `varid` production in Haskell2010 for 'Lowercase', `conid' for 'Uppercase'. -- -- > varid -> (small {small | large | digit | "'"}) without reservedid pAlphaName0 :: Case -> Parser Name pAlphaName0 cs = do (_, s) <- readInline (\case True -> \case Just c | isInitNameChar c -> Just (Right False) _ -> Nothing False -> \case Just c | isNameContChar c -> Just (Right False) _ -> Just (Left ())) True name <- case cs of Uppercase | isLower (head s) -> do raise Error "Unexpected uppercase word at this position, assuming typo" return (toUpper (head s) : tail s) Lowercase | isUpper (head s) -> do raise Error "Unexpected lowercase word at this position, assuming typo" return (toLower (head s) : tail s) _ -> return s guard (name `notElem` ["case", "class", "data", "default", "deriving", "do", "else" ,"foreign", "if", "import", "in", "infix", "infixl" ,"infixr", "instance", "let", "module", "newtype", "of" ,"then", "type", "where", "_"]) return (Name name) where isInitNameChar, isNameContChar :: Char -> Bool isInitNameChar c = isLetter c || c == '_' isNameContChar c = isInitNameChar c || isDigit c || c == '\'' -- | Consumes a symbol at the current position. The `varsym` production in -- Haskell2010 for 'Lowercase', `consym` otherwise. -- -- > varsym -> ((symbol without ":") {symbol}) without (reservedop | dashes) -- > consym -> (":" {symbol}) without reservedop -- > symbol -> ascSymbol | uniSymbol without (special | "_" | "\"" | "'") -- > ascSymbol -> ```!#$%&⋆+./<=>?@^|-~:``` -- > uniSymbol -> unicode symbol or punctuation -- > dashes -> "--" {"-"} -- > special -> ```(),;[]`{}``` -- > reservedop -> ".." | ":" | "::" | "=" | "\" | "|" | "<-" | "->" | "@" | "~" | "=>" pSymbol0 :: Case -> Parser Name pSymbol0 cs = do let isSpecialExt c = c `elem` "(),;[]`{}_\"'" isAscSymbol c = c `elem` "!#$%&⋆+./<=>?@^|-~:" isUniSymbol c = isSymbol c || isPunctuation c isSymbolChar c = (isAscSymbol c || isUniSymbol c) && not (isSpecialExt c) name <- (:) <$> (case cs of Lowercase -> satisfy (\c -> isSymbolChar c && c /= ':') Uppercase -> satisfy (== ':')) <*> many (satisfy isSymbolChar) guard (name `notElem` ["..", ":", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]) guard (take 2 name /= "--") return (Name name) -- | Parser between parens, with the opening paren at the current position. -- Enforces that all components are within the current indented block. pParens0 :: Parser a -> Parser a pParens0 p = do string "(" skipWhiteComment assertInsideBlock Error "Unexpected dedent after opening parenthesis" res <- p assertInsideBlock Error "Unexpected dedent in parenthesised expression" skipWhiteComment assertInsideBlock Error "Unexpected dedent in parenthesised expression" string ")" return res sepBy1 :: Parser a -> Parser sep -> Parser [a] sepBy1 p psep = do x1 <- p (psep >> (x1 :) <$> sepBy1 p psep) <|> return [x1] sepBy :: Parser a -> Parser sep -> Parser [a] sepBy p psep = sepBy1 p psep <|> return [] -- | Start a new layout block at the current position. The old layout block is -- restored after completion of this subparser. startLayoutBlock :: Parser a -> Parser a startLayoutBlock p = do ps0 <- get put (ps0 { psBlkLine = psLine ps0 , psBlkCol = psCol ps0 }) res <- p modify (\ps -> ps { psBlkLine = psBlkLine ps0 , psBlkCol = psBlkCol ps0 }) return res data Fatality = Error | Fatal deriving (Show) -- | Raise an error with the given fatality and description. raise :: Fatality -> String -> Parser () raise fat msg = do Context { ctxFile = fp , ctxStack = stk } <- ask PS { psLine = line, psCol = col } <- get let fun = case fat of Error -> dictate . pure Fatal -> confess . pure fun (ErrMsg fp stk line col msg) -- | Registers a scope description on the stack for error reporting. pushContext :: String -> Parser a -> Parser a pushContext descr = local (\c -> c { ctxStack = descr : ctxStack c }) -- | Consumes an inline token at the current position, asserting that we are -- within the current block at the start. The token is defined by a pure -- stateful parser. If encountering a newline or EOF, the parser is run on this -- character ('Nothing' for EOF); if this produces a result, the result is -- returned; otherwise, the parser fails. The newline is not consumed. readInline :: (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String) readInline f s0 = do assertInsideBlock Fatal "Expected stuff, but found end of indented expression" let loop :: (s -> Maybe Char -> Maybe (Either r s)) -> s -> Parser (r, String) loop f' st = do ps <- get case psRest ps of [] | Just (Left res) <- f' st Nothing -> return (res, "") | otherwise -> empty '\n' : _ | Just (Left res) <- f' st (Just '\n') -> return (res, "") c : cs -> case f' st (Just c) of Nothing -> empty Just (Left res) -> return (res, "") Just (Right st') -> do put (ps { psCol = psCol ps + 1, psRest = cs }) fmap (c :) <$> loop f' st' loop f s0 -- | Consumes all whitespace and comments (including newlines), but only if -- this then leaves the parser inside the current block. If not, this fails. inlineWhite :: Parser () inlineWhite = do skipWhiteComment whenM (not <$> isInsideBlock) empty -- | Consumes all whitespace and comments (including newlines). Note: this may -- end outside the current block. skipWhiteComment :: Parser () skipWhiteComment = do inlineSpaces _ <- many (blockComment >> inlineSpaces) optional_ lineComment optional_ (consumeNewline >> skipWhiteComment) -- | Consumes some inline whitespace. Stops before newlines. inlineSpaces :: Parser () inlineSpaces = readWhileInline isSpace -- | Consumes an delimited comment including both end markers. Note: this may -- end outside the current block. blockComment :: Parser () blockComment = do string "{-" let loop = do readWhileInline (`notElem` "{-") -- "-}" also starts with '-' asum [string "-}" ,eof >> raise Error "Unfinished {- -} comment at end of file" ,blockComment >> loop ,consumeNewline >> loop] loop -- | Consumes a line comment marker and the rest of the line, excluding -- newline. lineComment :: Parser () lineComment = string "--" >> readWhileInline (const True) -- | Raises an error if we're not currently at the given column. assertAtBlockLeft :: Fatality -> String -> Parser () assertAtBlockLeft fat msg = whenM (not <$> isAtBlockLeft) $ raise fat msg -- | Raises an error if psCol is not greater than psRefCol. assertInsideBlock :: Fatality -> String -> Parser () assertInsideBlock fat msg = whenM (not <$> isInsideBlock) $ raise fat msg -- | Raises an error if we're not currently at EOF. assertEOF :: Fatality -> Parser () assertEOF fat = gets psRest >>= \case [] -> return () _ -> raise fat "Unexpected stuff" -- | Returns whether the current position is _within_ the current block, for -- soft-wrapping content. This means that col > blkCol. isInsideBlock :: Parser Bool isInsideBlock = do ps <- get return $ psLine ps >= psBlkLine ps && psCol ps > psBlkCol ps -- | Returns whether the current position is at the left border of the block; -- this is for list content such as function definitions or let bindings. This -- means that col == blkCol. isAtBlockLeft :: Parser Bool isAtBlockLeft = do ps <- get return $ psLine ps >= psBlkLine ps && psCol ps == psBlkCol ps -- | Consumes characters while the predicate holds or until (and excluding) -- a newline, whichever comes first. readWhileInline :: (Char -> Bool) -> Parser () readWhileInline p = do (taken, rest) <- span (\c -> p c && c /= '\n') <$> gets psRest modify (\ps -> ps { psCol = psCol ps + length taken , psRest = rest }) -- | Consumes exactly one newline at the current position. consumeNewline :: Parser () consumeNewline = gets psRest >>= \case '\n' : rest -> modify (\ps -> ps { psLine = psLine ps + 1 , psCol = 0 , psRest = rest }) _ -> empty -- | Consumes exactly one character, unequal to newline, at the current position. satisfy :: (Char -> Bool) -> Parser Char satisfy p = do traceM "entering satisfy" r <- gets psRest traceM "got rest" r `seq` return () traceM "seqd rest" traceM ("rest is " ++ r) case r of c : rest | c /= '\n', p c -> do modify (\ps -> ps { psCol = psCol ps + 1 , psRest = rest }) return c _ -> empty -- | Consumes exactly this string at the current position. The string must not -- contain a newline. string :: String -> Parser () string s | any (== '\n') s = error "Newline in 'string' argument" string s = do ps <- get if take (length s) (psRest ps) == s then put (ps { psCol = psCol ps + length s , psRest = drop (length s) (psRest ps) }) else empty -- | Only succeeds at EOF. eof :: Parser () eof = gets psRest >>= \case [] -> return () _ -> empty whenM :: (Monad m, Monoid a) => m Bool -> m a -> m a whenM mb mx = mb >>= \b -> if b then mx else return mempty optional_ :: Alternative f => f a -> f () optional_ a = (() <$ a) <|> pure ()