summaryrefslogtreecommitdiff
path: root/Parser.hs
diff options
context:
space:
mode:
authorTom Smeding <tom@tomsmeding.com>2021-05-21 09:16:00 +0200
committerTom Smeding <tom@tomsmeding.com>2021-05-21 09:16:00 +0200
commitb03f51f3a363f861f9d5de30ec6a337fec316383 (patch)
treef07e009d6c2b45c188466ab7a52362fe98dcae39 /Parser.hs
Initial
Diffstat (limited to 'Parser.hs')
-rw-r--r--Parser.hs182
1 files changed, 182 insertions, 0 deletions
diff --git a/Parser.hs b/Parser.hs
new file mode 100644
index 0000000..afc5b15
--- /dev/null
+++ b/Parser.hs
@@ -0,0 +1,182 @@
+module Parser (
+ parseSourceFile,
+) where
+
+import Control.Monad
+import Control.Monad.Reader
+import Data.Char
+import Data.Either
+import Data.List (intersperse)
+import qualified Data.Text.Lazy as T
+import Data.Text.Lazy (Text)
+import qualified Data.Text.Lazy.Builder as B
+import Data.Text.Lazy.Builder (Builder)
+import Text.Parsec
+
+import Pattern
+import SourceFile
+
+
+type Parser = ParsecT String () (Reader Pattern)
+type Parser' = Parsec String ()
+
+parseSourceFile :: FilePath -> String -> Pattern -> Either ParseError SourceFile
+parseSourceFile fname source pat = runReader (runParserT pSourceFile () fname source) pat
+
+pSourceFile :: Parser SourceFile
+pSourceFile = do
+ SourceFile . concat <$> many (pDataDef <|> pInstanceDef <|> pFunDef <|> skipLine)
+ where
+ skipLine :: Parser [Chunk]
+ skipLine =
+ fmap (pure . CSkip . build) $
+ (\s -> B.fromString (s ++ "\n")) <$> manyTill anyChar newline
+
+-- Parses a function definition on the current line.
+-- Assumes the cursor is at the start of a line, and ends at the start of a line again.
+-- Fails if no function definition can be found on this line.
+pFunDef :: Parser [Chunk]
+pFunDef = try $ do
+ prefix <- build <$> combine
+ [horSpaces
+ ,option mempty (text "=" +++ horSpaces)
+ ,option mempty (text "let" +++ horSpaces1)
+ ,option mempty (text "where" +++ horSpaces1)]
+ let indent = T.length prefix
+ s1 <- combine
+ [pIdentifier
+ ,horSpaces
+ ,text "::"]
+ let limiter = lookAhead $ try $ do
+ _ <- newline
+ _ <- atmost (fromIntegral indent) (char ' ')
+ _ <- satisfy (not . isSpace)
+ return ()
+ typeText <- manyTill anyChar limiter
+ s2 <- B.singleton <$> newline
+ pat <- ask
+ return (CSkip (prefix `T.append` build s1) : parseTypeText pat typeText ++ [CSkip (build s2)])
+
+pDataDef :: Parser [Chunk]
+pDataDef = try $ do
+ s1 <- text "data"
+ let limiter = lookAhead $ try $
+ choice [eof
+ ,do _ <- newline
+ _ <- lookAhead (satisfy (not . isSpace))
+ notFollowedBy (string "--")]
+ typeText <- manyTill anyChar limiter
+ s2 <- (eof >> return mempty) <|> (B.singleton <$> newline)
+ pat <- ask
+ return (CSkip (build s1) : parseTypeText pat typeText ++ [CSkip (build s2)])
+
+pInstanceDef :: Parser [Chunk]
+pInstanceDef = try $ do
+ s1 <- text "instance"
+ let limiter = lookAhead $ try $
+ choice [eof
+ ,lookAhead $ try $ do
+ _ <- satisfy prewordc
+ _ <- string "where"
+ eof <|> (satisfy prewordc >> return ())]
+ typeText <- manyTill anyChar limiter
+ pat <- ask
+ return (CSkip (build s1) : parseTypeText pat typeText)
+
+parseTypeText :: Pattern -> String -> [Chunk]
+parseTypeText (Pattern patname _) inputText =
+ fromRight [CSkip (T.pack inputText)] $
+ parse pTypeText "" inputText
+ where
+ pTypeText :: Parser' [Chunk]
+ pTypeText = do
+ s1 <- manyTill anyChar $
+ try (choice [eof
+ ,lookAhead $ try $ do
+ _ <- satisfy prewordc
+ _ <- string (T.unpack patname)
+ return ()])
+ choice [eof >> return [CSkip (T.pack s1)]
+ ,do s2 <- B.singleton <$> satisfy prewordc
+ sName <- build <$> text (T.unpack patname)
+ -- Try to parse some arguments; if that fails wholesale, just ignore this occurrence
+ choice [do args <- try (many pArgument)
+ rest <- pTypeText
+ return (CSkip (build (B.fromString s1 <> s2))
+ :COcc (Occurrence sName args)
+ :rest)
+ ,do rest <- pTypeText
+ return (CSkip (build (B.fromString s1 <> s2))
+ :CSkip sName
+ :rest)]]
+
+ -- Returns preceding spaces and the argument itself. Requires non-zero spacing before.
+ pArgument :: Parser' (Text, Text)
+ pArgument = try $ do
+ s1 <- build <$> horSpaces1
+ s2 <- build <$> pExprAtom
+ return (s1, s2)
+
+ pExprAtom :: Parser' Builder
+ pExprAtom = pName <|> pParens
+ where
+ pExprApp :: Parser' Builder
+ pExprApp = pExprAtom +++ (mconcat <$> many (try (horSpaces1 +++ pExprAtom)))
+
+ pParens :: Parser' Builder
+ pParens = try $
+ combine [text "("
+ ,mconcat . intersperse (B.singleton ',') <$>
+ (horSpaces +++ pExprApp) `sepBy` char ','
+ ,horSpaces
+ ,text ")"]
+
+prewordc :: Char -> Bool
+prewordc c = not (isAlpha c) && not (isDigit c) && c /= '_'
+
+pIdentifier :: Parser Builder
+pIdentifier = pOperator <|> pName
+ where
+ pOperator :: Parser Builder
+ pOperator = try $ combine
+ [text "("
+ ,do s <- many1 (satisfy (\c -> isSymbol c && c `notElem` "()"))
+ guard (not (all (== '-') s))
+ return (B.fromString s)
+ ,text ")"]
+
+pName :: Monad m => ParsecT String u m Builder
+pName = try $ do
+ c1 <- satisfy (\c -> isAlpha c || c == '_')
+ cs <- many $ satisfy (\c -> isAlpha c || isDigit c || c `elem` "_'")
+ guard (not (reservedName (c1 : cs)))
+ return (B.fromString (c1 : cs))
+ where
+ reservedName :: String -> Bool
+ reservedName s = s `elem`
+ ["case", "class", "data", "default", "deriving", "do", "else"
+ ,"foreign", "if", "import", "in", "infix", "infixl", "infixr"
+ ,"instance", "let", "module", "newtype", "of", "then", "type"
+ ,"where", "_"]
+
+horSpaces :: Monad m => ParsecT String u m Builder
+horSpaces = mconcat . map B.singleton <$> many (satisfy (== ' '))
+
+horSpaces1 :: Monad m => ParsecT String u m Builder
+horSpaces1 = mconcat . map B.singleton <$> many1 (satisfy (== ' '))
+
+text :: Monad m => String -> ParsecT String u m Builder
+text s = B.fromString <$> string s
+
+atmost :: Int -> ParsecT s u m a -> ParsecT s u m [a]
+atmost 0 _ = return []
+atmost n p = ((:) <$> p <*> atmost (n-1) p) <|> return []
+
+(+++) :: ParsecT s u m Builder -> ParsecT s u m Builder -> ParsecT s u m Builder
+p +++ q = (<>) <$> p <*> q
+
+combine :: (Monoid m, Applicative f) => [f m] -> f m
+combine = fmap mconcat . sequenceA
+
+build :: Builder -> Text
+build = B.toLazyText