module Parser ( parseSourceFile, ) where import Control.Monad import Control.Monad.Reader import Data.Char import Data.Either import Data.List (intersperse) import qualified Data.Text.Lazy as T import Data.Text.Lazy (Text) import qualified Data.Text.Lazy.Builder as B import Data.Text.Lazy.Builder (Builder) import Text.Parsec import Pattern import SourceFile type Parser = ParsecT String () (Reader Pattern) type Parser' = Parsec String () parseSourceFile :: FilePath -> String -> Pattern -> Either ParseError SourceFile parseSourceFile fname source pat = runReader (runParserT pSourceFile () fname source) pat pSourceFile :: Parser SourceFile pSourceFile = do SourceFile . concat <$> many (pDataDef <|> pInstanceDef <|> pFunDef <|> skipLine) where skipLine :: Parser [Chunk] skipLine = fmap (pure . CSkip . build) $ (\s -> B.fromString (s ++ "\n")) <$> manyTill anyChar newline -- Parses a function definition on the current line. -- Assumes the cursor is at the start of a line, and ends at the start of a line again. -- Fails if no function definition can be found on this line. pFunDef :: Parser [Chunk] pFunDef = try $ do prefix <- build <$> combine [horSpaces ,option mempty (text "=" +++ horSpaces) ,option mempty (text "let" +++ horSpaces1) ,option mempty (text "where" +++ horSpaces1)] let indent = T.length prefix s1 <- combine [pIdentifier ,horSpaces ,text "::"] let limiter = lookAhead $ try $ do _ <- newline _ <- atmost (fromIntegral indent) (char ' ') _ <- satisfy (not . isSpace) return () typeText <- manyTill anyChar limiter s2 <- B.singleton <$> newline pat <- ask return (CSkip (prefix `T.append` build s1) : parseTypeText pat typeText ++ [CSkip (build s2)]) pDataDef :: Parser [Chunk] pDataDef = try $ do s1 <- text "data" let limiter = lookAhead $ try $ choice [eof ,do _ <- newline _ <- lookAhead (satisfy (not . isSpace)) notFollowedBy (string "--")] typeText <- manyTill anyChar limiter s2 <- (eof >> return mempty) <|> (B.singleton <$> newline) pat <- ask return (CSkip (build s1) : parseTypeText pat typeText ++ [CSkip (build s2)]) pInstanceDef :: Parser [Chunk] pInstanceDef = try $ do s1 <- text "instance" let limiter = lookAhead $ try $ choice [eof ,lookAhead $ try $ do _ <- satisfy prewordc _ <- string "where" eof <|> (satisfy prewordc >> return ())] typeText <- manyTill anyChar limiter pat <- ask return (CSkip (build s1) : parseTypeText pat typeText) parseTypeText :: Pattern -> String -> [Chunk] parseTypeText (Pattern patname _) inputText = fromRight [CSkip (T.pack inputText)] $ parse pTypeText "" inputText where pTypeText :: Parser' [Chunk] pTypeText = do s1 <- manyTill anyChar $ try (choice [eof ,lookAhead $ try $ do _ <- satisfy prewordc _ <- string (T.unpack patname) return ()]) choice [eof >> return [CSkip (T.pack s1)] ,do s2 <- B.singleton <$> satisfy prewordc sName <- build <$> text (T.unpack patname) -- Try to parse some arguments; if that fails wholesale, just ignore this occurrence choice [do args <- try (many pArgument) rest <- pTypeText return (CSkip (build (B.fromString s1 <> s2)) :COcc (Occurrence sName args) :rest) ,do rest <- pTypeText return (CSkip (build (B.fromString s1 <> s2)) :CSkip sName :rest)]] -- Returns preceding spaces and the argument itself. Requires non-zero spacing before. pArgument :: Parser' (Text, Text) pArgument = try $ do s1 <- build <$> horSpaces1 s2 <- build <$> pExprAtom return (s1, s2) pExprAtom :: Parser' Builder pExprAtom = pName <|> pParens where pExprApp :: Parser' Builder pExprApp = pExprAtom +++ (mconcat <$> many (try (horSpaces1 +++ pExprAtom))) pParens :: Parser' Builder pParens = try $ combine [text "(" ,mconcat . intersperse (B.singleton ',') <$> (horSpaces +++ pExprApp) `sepBy` char ',' ,horSpaces ,text ")"] prewordc :: Char -> Bool prewordc c = not (isAlpha c) && not (isDigit c) && c /= '_' pIdentifier :: Parser Builder pIdentifier = pOperator <|> pName where pOperator :: Parser Builder pOperator = try $ combine [text "(" ,do s <- many1 (satisfy (\c -> isSymbol c && c `notElem` "()")) guard (not (all (== '-') s)) return (B.fromString s) ,text ")"] pName :: Monad m => ParsecT String u m Builder pName = try $ do c1 <- satisfy (\c -> isAlpha c || c == '_') cs <- many $ satisfy (\c -> isAlpha c || isDigit c || c `elem` "_'") guard (not (reservedName (c1 : cs))) return (B.fromString (c1 : cs)) where reservedName :: String -> Bool reservedName s = s `elem` ["case", "class", "data", "default", "deriving", "do", "else" ,"foreign", "if", "import", "in", "infix", "infixl", "infixr" ,"instance", "let", "module", "newtype", "of", "then", "type" ,"where", "_"] horSpaces :: Monad m => ParsecT String u m Builder horSpaces = mconcat . map B.singleton <$> many (satisfy (== ' ')) horSpaces1 :: Monad m => ParsecT String u m Builder horSpaces1 = mconcat . map B.singleton <$> many1 (satisfy (== ' ')) text :: Monad m => String -> ParsecT String u m Builder text s = B.fromString <$> string s atmost :: Int -> ParsecT s u m a -> ParsecT s u m [a] atmost 0 _ = return [] atmost n p = ((:) <$> p <*> atmost (n-1) p) <|> return [] (+++) :: ParsecT s u m Builder -> ParsecT s u m Builder -> ParsecT s u m Builder p +++ q = (<>) <$> p <*> q combine :: (Monoid m, Applicative f) => [f m] -> f m combine = fmap mconcat . sequenceA build :: Builder -> Text build = B.toLazyText