1 files changed, 182 insertions, 0 deletions
diff --git a/Parser.hs b/Parser.hs
new file mode 100644
index 0000000..afc5b15
--- /dev/null
+++ b/Parser.hs
@@ -0,0 +1,182 @@
+module Parser (
+    parseSourceFile,
+) where
+
+import Control.Monad
+import Control.Monad.Reader
+import Data.Char
+import Data.Either
+import Data.List (intersperse)
+import qualified Data.Text.Lazy as T
+import Data.Text.Lazy (Text)
+import qualified Data.Text.Lazy.Builder as B
+import Data.Text.Lazy.Builder (Builder)
+import Text.Parsec
+
+import Pattern
+import SourceFile
+
+
+type Parser = ParsecT String () (Reader Pattern)
+type Parser' = Parsec String ()
+
+parseSourceFile :: FilePath -> String -> Pattern -> Either ParseError SourceFile
+parseSourceFile fname source pat = runReader (runParserT pSourceFile () fname source) pat
+
+pSourceFile :: Parser SourceFile
+pSourceFile = do
+    SourceFile . concat <$> many (pDataDef <|> pInstanceDef <|> pFunDef <|> skipLine)
+  where
+    skipLine :: Parser [Chunk]
+    skipLine =
+        fmap (pure . CSkip . build) $
+            (\s -> B.fromString (s ++ "\n")) <$> manyTill anyChar newline
+
+-- Parses a function definition on the current line.
+-- Assumes the cursor is at the start of a line, and ends at the start of a line again.
+-- Fails if no function definition can be found on this line.
+pFunDef :: Parser [Chunk]
+pFunDef = try $ do
+    prefix <- build <$> combine
+            [horSpaces
+            ,option mempty (text "=" +++ horSpaces)
+            ,option mempty (text "let" +++ horSpaces1)
+            ,option mempty (text "where" +++ horSpaces1)]
+    let indent = T.length prefix
+    s1 <- combine
+            [pIdentifier
+            ,horSpaces
+            ,text "::"]
+    let limiter = lookAhead $ try $ do
+            _ <- newline
+            _ <- atmost (fromIntegral indent) (char ' ')
+            _ <- satisfy (not . isSpace)
+            return ()
+    typeText <- manyTill anyChar limiter
+    s2 <- B.singleton <$> newline
+    pat <- ask
+    return (CSkip (prefix `T.append` build s1) : parseTypeText pat typeText ++ [CSkip (build s2)])
+
+pDataDef :: Parser [Chunk]
+pDataDef = try $ do
+    s1 <- text "data"
+    let limiter = lookAhead $ try $
+            choice [eof
+                   ,do _ <- newline
+                       _ <- lookAhead (satisfy (not . isSpace))
+                       notFollowedBy (string "--")]
+    typeText <- manyTill anyChar limiter
+    s2 <- (eof >> return mempty) <|> (B.singleton <$> newline)
+    pat <- ask
+    return (CSkip (build s1) : parseTypeText pat typeText ++ [CSkip (build s2)])
+
+pInstanceDef :: Parser [Chunk]
+pInstanceDef = try $ do
+    s1 <- text "instance"
+    let limiter = lookAhead $ try $
+            choice [eof
+                   ,lookAhead $ try $ do
+                       _ <- satisfy prewordc
+                       _ <- string "where"
+                       eof <|> (satisfy prewordc >> return ())]
+    typeText <- manyTill anyChar limiter
+    pat <- ask
+    return (CSkip (build s1) : parseTypeText pat typeText)
+
+parseTypeText :: Pattern -> String -> [Chunk]
+parseTypeText (Pattern patname _) inputText =
+    fromRight [CSkip (T.pack inputText)] $
+        parse pTypeText "" inputText
+  where
+    pTypeText :: Parser' [Chunk]
+    pTypeText = do
+        s1 <- manyTill anyChar $
+                try (choice [eof
+                            ,lookAhead $ try $ do
+                                _ <- satisfy prewordc
+                                _ <- string (T.unpack patname)
+                                return ()])
+        choice [eof >> return [CSkip (T.pack s1)]
+               ,do s2 <- B.singleton <$> satisfy prewordc
+                   sName <- build <$> text (T.unpack patname)
+                   -- Try to parse some arguments; if that fails wholesale, just ignore this occurrence
+                   choice [do args <- try (many pArgument)
+                              rest <- pTypeText
+                              return (CSkip (build (B.fromString s1 <> s2))
+                                     :COcc (Occurrence sName args)
+                                     :rest)
+                          ,do rest <- pTypeText
+                              return (CSkip (build (B.fromString s1 <> s2))
+                                     :CSkip sName
+                                     :rest)]]
+
+    -- Returns preceding spaces and the argument itself. Requires non-zero spacing before.
+    pArgument :: Parser' (Text, Text)
+    pArgument = try $ do
+        s1 <- build <$> horSpaces1
+        s2 <- build <$> pExprAtom
+        return (s1, s2)
+
+    pExprAtom :: Parser' Builder
+    pExprAtom = pName <|> pParens
+      where
+        pExprApp :: Parser' Builder
+        pExprApp = pExprAtom +++ (mconcat <$> many (try (horSpaces1 +++ pExprAtom)))
+
+        pParens :: Parser' Builder
+        pParens = try $
+            combine [text "("
+                    ,mconcat . intersperse (B.singleton ',') <$>
+                        (horSpaces +++ pExprApp) `sepBy` char ','
+                    ,horSpaces
+                    ,text ")"]
+
+prewordc :: Char -> Bool
+prewordc c = not (isAlpha c) && not (isDigit c) && c /= '_'
+
+pIdentifier :: Parser Builder
+pIdentifier = pOperator <|> pName
+  where
+    pOperator :: Parser Builder
+    pOperator = try $ combine
+        [text "("
+        ,do s <- many1 (satisfy (\c -> isSymbol c && c `notElem` "()"))
+            guard (not (all (== '-') s))
+            return (B.fromString s)
+        ,text ")"]
+
+pName :: Monad m => ParsecT String u m Builder
+pName = try $ do
+    c1 <- satisfy (\c -> isAlpha c || c == '_')
+    cs <- many $ satisfy (\c -> isAlpha c || isDigit c || c `elem` "_'")
+    guard (not (reservedName (c1 : cs)))
+    return (B.fromString (c1 : cs))
+  where
+    reservedName :: String -> Bool
+    reservedName s = s `elem`
+        ["case", "class", "data", "default", "deriving", "do", "else"
+        ,"foreign", "if", "import", "in", "infix", "infixl", "infixr"
+        ,"instance", "let", "module", "newtype", "of", "then", "type"
+        ,"where", "_"]
+
+horSpaces :: Monad m => ParsecT String u m Builder
+horSpaces = mconcat . map B.singleton <$> many (satisfy (== ' '))
+
+horSpaces1 :: Monad m => ParsecT String u m Builder
+horSpaces1 = mconcat . map B.singleton <$> many1 (satisfy (== ' '))
+
+text :: Monad m => String -> ParsecT String u m Builder
+text s = B.fromString <$> string s
+
+atmost :: Int -> ParsecT s u m a -> ParsecT s u m [a]
+atmost 0 _ = return []
+atmost n p = ((:) <$> p <*> atmost (n-1) p) <|> return []
+
+(+++) :: ParsecT s u m Builder -> ParsecT s u m Builder -> ParsecT s u m Builder
+p +++ q = (<>) <$> p <*> q
+
+combine :: (Monoid m, Applicative f) => [f m] -> f m
+combine = fmap mconcat . sequenceA
+
+build :: Builder -> Text
+build = B.toLazyText