diff options
author | Tom Smeding <tom@tomsmeding.com> | 2021-05-21 09:16:00 +0200 |
---|---|---|
committer | Tom Smeding <tom@tomsmeding.com> | 2021-05-21 09:16:00 +0200 |
commit | b03f51f3a363f861f9d5de30ec6a337fec316383 (patch) | |
tree | f07e009d6c2b45c188466ab7a52362fe98dcae39 /Parser.hs |
Initial
Diffstat (limited to 'Parser.hs')
-rw-r--r-- | Parser.hs | 182 |
1 files changed, 182 insertions, 0 deletions
diff --git a/Parser.hs b/Parser.hs new file mode 100644 index 0000000..afc5b15 --- /dev/null +++ b/Parser.hs @@ -0,0 +1,182 @@ +module Parser ( + parseSourceFile, +) where + +import Control.Monad +import Control.Monad.Reader +import Data.Char +import Data.Either +import Data.List (intersperse) +import qualified Data.Text.Lazy as T +import Data.Text.Lazy (Text) +import qualified Data.Text.Lazy.Builder as B +import Data.Text.Lazy.Builder (Builder) +import Text.Parsec + +import Pattern +import SourceFile + + +type Parser = ParsecT String () (Reader Pattern) +type Parser' = Parsec String () + +parseSourceFile :: FilePath -> String -> Pattern -> Either ParseError SourceFile +parseSourceFile fname source pat = runReader (runParserT pSourceFile () fname source) pat + +pSourceFile :: Parser SourceFile +pSourceFile = do + SourceFile . concat <$> many (pDataDef <|> pInstanceDef <|> pFunDef <|> skipLine) + where + skipLine :: Parser [Chunk] + skipLine = + fmap (pure . CSkip . build) $ + (\s -> B.fromString (s ++ "\n")) <$> manyTill anyChar newline + +-- Parses a function definition on the current line. +-- Assumes the cursor is at the start of a line, and ends at the start of a line again. +-- Fails if no function definition can be found on this line. +pFunDef :: Parser [Chunk] +pFunDef = try $ do + prefix <- build <$> combine + [horSpaces + ,option mempty (text "=" +++ horSpaces) + ,option mempty (text "let" +++ horSpaces1) + ,option mempty (text "where" +++ horSpaces1)] + let indent = T.length prefix + s1 <- combine + [pIdentifier + ,horSpaces + ,text "::"] + let limiter = lookAhead $ try $ do + _ <- newline + _ <- atmost (fromIntegral indent) (char ' ') + _ <- satisfy (not . isSpace) + return () + typeText <- manyTill anyChar limiter + s2 <- B.singleton <$> newline + pat <- ask + return (CSkip (prefix `T.append` build s1) : parseTypeText pat typeText ++ [CSkip (build s2)]) + +pDataDef :: Parser [Chunk] +pDataDef = try $ do + s1 <- text "data" + let limiter = lookAhead $ try $ + choice [eof + ,do _ <- newline + _ <- lookAhead (satisfy (not . isSpace)) + notFollowedBy (string "--")] + typeText <- manyTill anyChar limiter + s2 <- (eof >> return mempty) <|> (B.singleton <$> newline) + pat <- ask + return (CSkip (build s1) : parseTypeText pat typeText ++ [CSkip (build s2)]) + +pInstanceDef :: Parser [Chunk] +pInstanceDef = try $ do + s1 <- text "instance" + let limiter = lookAhead $ try $ + choice [eof + ,lookAhead $ try $ do + _ <- satisfy prewordc + _ <- string "where" + eof <|> (satisfy prewordc >> return ())] + typeText <- manyTill anyChar limiter + pat <- ask + return (CSkip (build s1) : parseTypeText pat typeText) + +parseTypeText :: Pattern -> String -> [Chunk] +parseTypeText (Pattern patname _) inputText = + fromRight [CSkip (T.pack inputText)] $ + parse pTypeText "" inputText + where + pTypeText :: Parser' [Chunk] + pTypeText = do + s1 <- manyTill anyChar $ + try (choice [eof + ,lookAhead $ try $ do + _ <- satisfy prewordc + _ <- string (T.unpack patname) + return ()]) + choice [eof >> return [CSkip (T.pack s1)] + ,do s2 <- B.singleton <$> satisfy prewordc + sName <- build <$> text (T.unpack patname) + -- Try to parse some arguments; if that fails wholesale, just ignore this occurrence + choice [do args <- try (many pArgument) + rest <- pTypeText + return (CSkip (build (B.fromString s1 <> s2)) + :COcc (Occurrence sName args) + :rest) + ,do rest <- pTypeText + return (CSkip (build (B.fromString s1 <> s2)) + :CSkip sName + :rest)]] + + -- Returns preceding spaces and the argument itself. Requires non-zero spacing before. + pArgument :: Parser' (Text, Text) + pArgument = try $ do + s1 <- build <$> horSpaces1 + s2 <- build <$> pExprAtom + return (s1, s2) + + pExprAtom :: Parser' Builder + pExprAtom = pName <|> pParens + where + pExprApp :: Parser' Builder + pExprApp = pExprAtom +++ (mconcat <$> many (try (horSpaces1 +++ pExprAtom))) + + pParens :: Parser' Builder + pParens = try $ + combine [text "(" + ,mconcat . intersperse (B.singleton ',') <$> + (horSpaces +++ pExprApp) `sepBy` char ',' + ,horSpaces + ,text ")"] + +prewordc :: Char -> Bool +prewordc c = not (isAlpha c) && not (isDigit c) && c /= '_' + +pIdentifier :: Parser Builder +pIdentifier = pOperator <|> pName + where + pOperator :: Parser Builder + pOperator = try $ combine + [text "(" + ,do s <- many1 (satisfy (\c -> isSymbol c && c `notElem` "()")) + guard (not (all (== '-') s)) + return (B.fromString s) + ,text ")"] + +pName :: Monad m => ParsecT String u m Builder +pName = try $ do + c1 <- satisfy (\c -> isAlpha c || c == '_') + cs <- many $ satisfy (\c -> isAlpha c || isDigit c || c `elem` "_'") + guard (not (reservedName (c1 : cs))) + return (B.fromString (c1 : cs)) + where + reservedName :: String -> Bool + reservedName s = s `elem` + ["case", "class", "data", "default", "deriving", "do", "else" + ,"foreign", "if", "import", "in", "infix", "infixl", "infixr" + ,"instance", "let", "module", "newtype", "of", "then", "type" + ,"where", "_"] + +horSpaces :: Monad m => ParsecT String u m Builder +horSpaces = mconcat . map B.singleton <$> many (satisfy (== ' ')) + +horSpaces1 :: Monad m => ParsecT String u m Builder +horSpaces1 = mconcat . map B.singleton <$> many1 (satisfy (== ' ')) + +text :: Monad m => String -> ParsecT String u m Builder +text s = B.fromString <$> string s + +atmost :: Int -> ParsecT s u m a -> ParsecT s u m [a] +atmost 0 _ = return [] +atmost n p = ((:) <$> p <*> atmost (n-1) p) <|> return [] + +(+++) :: ParsecT s u m Builder -> ParsecT s u m Builder -> ParsecT s u m Builder +p +++ q = (<>) <$> p <*> q + +combine :: (Monoid m, Applicative f) => [f m] -> f m +combine = fmap mconcat . sequenceA + +build :: Builder -> Text +build = B.toLazyText |