summaryrefslogtreecommitdiff
path: root/Parser.hs
blob: 017544a5c389218d59f3ea91d897a4c4a3d94954 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
module Parser (
    parseSourceFile,
) where

import Control.Monad
import Control.Monad.Reader
import Data.Char
import Data.Either
import Data.List (intersperse)
import qualified Data.Text.Lazy as T
import Data.Text.Lazy (Text)
import qualified Data.Text.Lazy.Builder as B
import Data.Text.Lazy.Builder (Builder)
import Text.Parsec

import Pattern
import SourceFile


type Parser = ParsecT String () (Reader Pattern)
type Parser' = Parsec String ()

parseSourceFile :: FilePath -> String -> Pattern -> Either ParseError SourceFile
parseSourceFile fname source pat = runReader (runParserT pSourceFile () fname source) pat

pSourceFile :: Parser SourceFile
pSourceFile = do
    result <- concat <$> many (pDataDef <|> pInstanceDef <|> pFunDef <|> skipLine)
    after <- skipLineEOF
    return (SourceFile (result ++ after))

skipLine :: Parser [Chunk]
skipLine = try $
    fmap (pure . CSkip . build) $
         (\s -> B.fromString (s ++ "\n")) <$> manyTill anyChar newline

skipLineEOF :: Parser [Chunk]
skipLineEOF = pure . CSkip . T.pack <$> manyTill (notFollowedBy newline >> anyChar) eof

-- Parses a function definition on the current line.
-- Assumes the cursor is at the start of a line, and ends at the start of a line again.
-- Fails if no function definition can be found on this line.
pFunDef :: Parser [Chunk]
pFunDef = flip label "function definition" $ try $ do
    prefix <- build <$> combine
            [horSpaces
            ,option mempty (text "=" +++ horSpaces)
            ,option mempty (text "let" +++ horSpaces1)
            ,option mempty (text "where" +++ horSpaces1)]
    let indent = T.length prefix
    s1 <- combine
            [pIdentifier
            ,horSpaces
            ,text "::"]
    let limiter = lookAhead $ try $ do
            _ <- newline
            _ <- atmost (fromIntegral indent) (char ' ')
            _ <- satisfy (not . isSpace)
            return ()
    typeText <- manyTill anyChar limiter
    s2 <- B.singleton <$> newline
    pat <- ask
    return (CSkip (prefix `T.append` build s1) : parseTypeText pat typeText ++ [CSkip (build s2)])

pDataDef :: Parser [Chunk]
pDataDef = flip label "data/type definition" $ try $ do
    s1 <- text "data" <|> text "type" <|> text "newtype"
    let limiter = lookAhead $ try $
            choice [eof
                   ,do _ <- newline
                       _ <- lookAhead (satisfy (not . isSpace))
                       notFollowedBy (string "--")]
    typeText <- manyTill anyChar limiter
    s2 <- (eof >> return mempty) <|> (B.singleton <$> newline)
    pat <- ask
    return (CSkip (build s1) : parseTypeText pat typeText ++ [CSkip (build s2)])

pInstanceDef :: Parser [Chunk]
pInstanceDef = flip label "instance definition" $ try $ do
    s1 <- text "instance"
    let limiter = lookAhead $ try $
            choice [eof
                   ,lookAhead $ try $ do
                       _ <- satisfy prewordc
                       _ <- string "where"
                       eof <|> (satisfy prewordc >> return ())]
    typeText <- manyTill anyChar limiter
    rest <- skipLine
    pat <- ask
    return (CSkip (build s1) : parseTypeText pat typeText ++ rest)

parseTypeText :: Pattern -> String -> [Chunk]
parseTypeText (Pattern patname _) inputText =
    fromRight [CSkip (T.pack inputText)] $
        parse pTypeText "" inputText
  where
    pTypeText :: Parser' [Chunk]
    pTypeText = do
        s1 <- manyTill anyChar $
                try (choice [eof
                            ,lookAhead $ try $ do
                                _ <- satisfy prewordc
                                _ <- string (T.unpack patname)
                                return ()])
        choice [eof >> return [CSkip (T.pack s1)]
               ,do s2 <- B.singleton <$> satisfy prewordc
                   sName <- build <$> text (T.unpack patname)
                   -- Try to parse some arguments; if that fails wholesale, just ignore this occurrence
                   choice [do args <- try (many pArgument)
                              rest <- pTypeText
                              return (CSkip (build (B.fromString s1 <> s2))
                                     :COcc (Occurrence sName args)
                                     :rest)
                          ,do rest <- pTypeText
                              return (CSkip (build (B.fromString s1 <> s2))
                                     :CSkip sName
                                     :rest)]]

    -- Returns preceding spaces and the argument itself. Requires non-zero spacing before.
    pArgument :: Parser' (Text, Text)
    pArgument = try $ do
        s1 <- build <$> horSpaces1
        s2 <- build <$> pExprAtom
        return (s1, s2)

    pExprAtom :: Parser' Builder
    pExprAtom = pName <|> pParens
      where
        pExprApp :: Parser' Builder
        pExprApp = pExprAtom +++ (mconcat <$> many (try (horSpaces1 +++ pExprAtom)))

        pParens :: Parser' Builder
        pParens = try $
            combine [text "("
                    ,mconcat . intersperse (B.singleton ',') <$>
                        (horSpaces +++ pExprApp) `sepBy` char ','
                    ,horSpaces
                    ,text ")"]

prewordc :: Char -> Bool
prewordc c = not (isAlpha c) && not (isDigit c) && c /= '_'

pIdentifier :: Parser Builder
pIdentifier = pOperator <|> pName
  where
    pOperator :: Parser Builder
    pOperator = try $ combine
        [text "("
        ,do s <- many1 (satisfy (\c -> isSymbol c && c `notElem` "()"))
            guard (not (all (== '-') s))
            return (B.fromString s)
        ,text ")"]

pName :: Monad m => ParsecT String u m Builder
pName = try $ do
    c1 <- satisfy (\c -> isAlpha c || c == '_')
    cs <- many $ satisfy (\c -> isAlpha c || isDigit c || c `elem` "_'")
    guard (not (reservedName (c1 : cs)))
    return (B.fromString (c1 : cs))
  where
    reservedName :: String -> Bool
    reservedName s = s `elem`
        ["case", "class", "data", "default", "deriving", "do", "else"
        ,"foreign", "if", "import", "in", "infix", "infixl", "infixr"
        ,"instance", "let", "module", "newtype", "of", "then", "type"
        ,"where", "_"]

horSpaces :: Monad m => ParsecT String u m Builder
horSpaces = mconcat . map B.singleton <$> many (satisfy (== ' '))

horSpaces1 :: Monad m => ParsecT String u m Builder
horSpaces1 = mconcat . map B.singleton <$> many1 (satisfy (== ' '))

text :: Monad m => String -> ParsecT String u m Builder
text s = B.fromString <$> string s

atmost :: Int -> ParsecT s u m a -> ParsecT s u m [a]
atmost 0 _ = return []
atmost n p = ((:) <$> p <*> atmost (n-1) p) <|> return []

(+++) :: ParsecT s u m Builder -> ParsecT s u m Builder -> ParsecT s u m Builder
p +++ q = (<>) <$> p <*> q

combine :: (Monoid m, Applicative f) => [f m] -> f m
combine = fmap mconcat . sequenceA

build :: Builder -> Text
build = B.toLazyText