module CC.Parser(runPass, parseProgram) where import Control.Monad import qualified Data.Set as Set import Text.Parsec hiding (SourcePos, getPosition, token) import qualified Text.Parsec import CC.AST.Source import CC.Context import CC.Pretty type Parser a = Parsec String () a runPass :: Context -> RawString -> Either (PrettyShow ParseError) Program runPass (Context path (Builtins _ prelude)) (RawString src) = do prog1 <- fmapLeft PrettyShow (parseProgram "" prelude) prog2 <- fmapLeft PrettyShow (parseProgram path src) return (prog1 <> prog2) where fmapLeft f (Left x) = Left (f x) fmapLeft _ (Right x) = Right x parseProgram :: FilePath -> String -> Either ParseError Program parseProgram fname src = parse pProgram fname src pProgram :: Parser Program pProgram = do prog <- Program <$> many pDecl emptyLines eof return prog pDecl :: Parser Decl pDecl = choice [ DeclType <$> pDeclType , DeclAlias <$> pDeclAlias , DeclFunc <$> pDeclFunc ] pDeclFunc :: Parser FuncDef pDeclFunc = do func <- try $ do emptyLines name <- pName0 LowerCase "declaration head name" return name mtyp <- optionMaybe $ do symbol "::" typ <- pType whitespace >> void newline emptyLines (func', _) <- pName0 LowerCase guard (fst func == func') return typ args <- many (pName LowerCase) symbol "=" expr <- pExpr return (FuncDef mtyp func args expr) pDeclType :: Parser TypeDef pDeclType = (\(n, a, t) -> TypeDef n a t) <$> pTypedefLike "type" pDeclAlias :: Parser AliasDef pDeclAlias = (\(n, a, t) -> AliasDef n a t) <$> pTypedefLike "alias" pTypedefLike :: String -> Parser ((Name, SourceRange), [(Name, SourceRange)], Type) pTypedefLike keyword = do try (emptyLines >> string keyword >> whitespace1) name <- pName0 UpperCase args <- many (pName LowerCase) symbol "=" ty <- pType return (name, args, ty) pType :: Parser Type pType = chainr1 pTypeTerm (symbol "->" >> return TFun) pTypeTerm :: Parser Type pTypeTerm = pTypeAtom <|> pTypeCall pTypeAtom :: Parser Type pTypeAtom = choice [ wordToken "Int" >> return TInt , TyVar . fst <$> pName LowerCase , pParenType , pUnionType ] pTypeCall :: Parser Type pTypeCall = do (constr, _) <- pName UpperCase args <- many pTypeAtom return (TNamed constr args) pParenType :: Parser Type pParenType = do token "(" tys <- pType `sepBy` token "," token ")" case tys of [ty] -> return ty _ -> return (TTup tys) pUnionType :: Parser Type pUnionType = do token "{" tys <- pType `sepBy` token "|" token "}" case tys of [] -> unexpected "empty union type" [ty] -> return ty _ -> let tyset = Set.fromList tys in if Set.size tyset == length tys then return (TUnion tyset) else unexpected "duplicate types in union" pExpr :: Parser Expr pExpr = label (pLam <|> pLet <|> pCall) "expression" where pCall = do atoms <- many1 pExprAtom annot <- optionMaybe (do symbol "::" p1 <- getPosition ty <- pType p2 <- getPosition return (ty, SourceRange p1 p2)) let call = foldl1 (\a b -> Call (mergeRange (range a) (range b)) a b) atoms case annot of Just (ty, sr) -> return (Annot (mergeRange (range call) sr) call ty) Nothing -> return call pLam = do p1 <- try $ do whitespace p <- getPosition void (char '\\') return p names <- many1 (pName LowerCase) symbol "->" body <- pExpr p2 <- getPosition return (Lam (SourceRange p1 p2) names body) pLet = do p1 <- try $ do whitespace p <- getPosition void (string "let") return p afterKeyword p1 where afterKeyword p1 = do whitespace1 lhs <- pName0 LowerCase symbol "=" rhs <- pExpr let fullRange rest = mergeRange (SourceRange p1 p1) (range rest) choice [ do p1' <- try $ do whitespace p1' <- getPosition void (string "let") return p1' rest <- afterKeyword p1' return (Let (fullRange rest) lhs rhs rest) , do symbol "in" body <- pExpr return (Let (fullRange body) lhs rhs body) ] pExprAtom :: Parser Expr pExprAtom = choice [ uncurry (flip Int) <$> pInt , uncurry (flip Var) <$> pName LowerCase , uncurry (flip Constr) <$> pName UpperCase , pParenExpr ] pParenExpr :: Parser Expr pParenExpr = do p1 <- getPosition token "(" exprs <- pExpr `sepBy` token "," token ")" p2 <- getPosition case exprs of [expr] -> return expr _ -> return (Tup (SourceRange p1 p2) exprs) pInt :: Parser (Int, SourceRange) pInt = try (whitespace >> pInt0) where pInt0 = do p1 <- getPosition num <- read <$> many1 digit p2 <- getPosition return (num, SourceRange p1 p2) data Case = LowerCase | UpperCase pName0 :: Case -> Parser (Name, SourceRange) pName0 vcase = do p1 <- getPosition s <- try $ do c <- pWordFirstChar vcase cs <- many pWordMidChar let s = c : cs guard (s `notElem` ["let", "in"]) return s p2 <- getPosition notFollowedBy pWordMidChar return (s, SourceRange p1 p2) pWordFirstChar :: Case -> Parser Char pWordFirstChar LowerCase = lower <|> oneOf wordSymbols pWordFirstChar UpperCase = upper <|> oneOf wordSymbols pWordMidChar :: Parser Char pWordMidChar = alphaNum <|> oneOf wordSymbols wordSymbols :: [Char] wordSymbols = "_$#!" pName :: Case -> Parser (Name, SourceRange) pName vcase = try (whitespace >> pName0 vcase) symbol :: String -> Parser () symbol s = token s >> (eof <|> void space <|> void (oneOf "(){}[]")) wordToken :: String -> Parser () wordToken s = token s >> notFollowedBy pWordMidChar token :: String -> Parser () token s = try (whitespace >> void (string s)) emptyLines :: Parser () emptyLines = (try (whitespace >> newline) >> emptyLines) <|> try (whitespace >> eof) <|> return () whitespace, whitespace1 :: Parser () whitespace = void (many (void (char ' ') <|> void (try (string "\n ")))) whitespace1 = void (many1 (void (char ' ') <|> void (try (string "\n ")))) getPosition :: Parser SourcePos getPosition = do pos <- Text.Parsec.getPosition return (SourcePos (sourceLine pos - 1) (sourceColumn pos - 1))