{-# LANGUAGE TupleSections #-} module ProgramParser(parseProgram) where import Control.Monad import Data.Char import Data.Maybe import Text.Parsec import qualified Text.Parsec.Expr as E import AST import Defs type Parser = Parsec String () parseProgram :: String -> Either String Program parseProgram s = case parse pProgram "" s of Left err -> Left $ show err Right p -> Right p pProgram :: Parser Program pProgram = between pWhiteComment eof go where go :: Parser Program go = (pDTypedef >>= \d -> addTypedef d <$> go) <|> (pDFunc >>= \d -> addFunc d <$> go) <|> (pDVar >>= \d -> addVar d <$> go) <|> return (Program [] [] []) addTypedef d (Program a b c) = Program (d:a) b c addVar d (Program a b c) = Program a (d:b) c addFunc d (Program a b c) = Program a b (d:c) pDTypedef :: Parser DTypedef pDTypedef = do symbol "type" n <- pName symbol ":=" t <- pType symbol ";" return $ DTypedef n t pDFunc :: Parser DFunc pDFunc = do symbol "func" (rt,n) <- (try $ pType >>= \t -> (Just t,) <$> pName) <|> ((Nothing,) <$> pName) symbol "(" args <- sepBy pTypeAndName (symbol ",") symbol ")" body <- pBlock return $ DFunc rt n args body pDVar :: Parser DVar pDVar = do (t,n) <- try pTypeAndName symbol ":=" e <- pExpression symbol ";" return $ DVar t n e pTypeAndName :: Parser (Type, Name) pTypeAndName = (,) <$> pType <*> pName pType :: Parser Type pType = (flip label "type") $ pStruct <|> do t <- pBasicType (do symbol "[" msz <- optionMaybe pInteger symbol "]" return $ TArr t msz) <|> return t pStruct :: Parser Type pStruct = do symbol "struct" symbol "{" ms <- many $ pTypeAndName <* symbol ";" symbol "}" return $ TStruct ms pBasicType :: Parser Type pBasicType = (symbol "int" >> return TInt) <|> (symbol "char" >> return TChar) <|> (TName <$> pName) pBlock :: Parser Block pBlock = do symbol "{" body <- many pStatement symbol "}" return $ Block body pStatement :: Parser Statement pStatement = pSIf <|> pSWhile <|> pSReturn <|> pSBreak <|> pSDebugger <|> pSDecl <|> pSAs <|> pSExpr pSDecl :: Parser Statement pSDecl = do (t, n) <- try $ do t <- pType n <- pName symbol ":=" return (t, n) e <- pExpression symbol ";" return $ SDecl t n e pSAs :: Parser Statement pSAs = do n <- try $ pAsExpression <* symbol "=" e <- pExpression symbol ";" return $ SAs n e pSIf :: Parser Statement pSIf = do symbol "if" symbol "(" cond <- pExpression symbol ")" bl1 <- pBlock bl2 <- try (symbol "else" >> pBlock) <|> return (Block []) return $ SIf cond bl1 bl2 pSWhile :: Parser Statement pSWhile = do symbol "while" symbol "(" cond <- pExpression symbol ")" bl <- pBlock return $ SWhile cond bl pSReturn :: Parser Statement pSReturn = do symbol "return" m <- optionMaybe pExpression symbol ";" return $ SReturn m pSBreak :: Parser Statement pSBreak = do symbol "break" m <- optionMaybe pIntegerInt symbol ";" return $ SBreak (fromMaybe 0 m) pSDebugger :: Parser Statement pSDebugger = symbol "debugger" >> symbol ";" >> return SDebugger pSExpr :: Parser Statement pSExpr = do e <- pExpression symbol ";" return $ SExpr e pExpression :: Parser Expression pExpression = E.buildExpressionParser optable litparser "expression" where optable = [[E.Infix (symbol "**" >> return (mkEBin BOPow)) E.AssocRight], [E.Infix (symbol "*" >> return (mkEBin BOMul)) E.AssocLeft, E.Infix (symbol "/" >> return (mkEBin BODiv)) E.AssocLeft, E.Infix (symbol "%" >> return (mkEBin BOMod)) E.AssocLeft], [E.Infix (symbol "^" >> return (mkEBin BOBitXor)) E.AssocLeft], [E.Infix (symbol "&" >> return (mkEBin BOBitAnd)) E.AssocLeft], [E.Infix (symbol "|" >> return (mkEBin BOBitOr)) E.AssocLeft], [E.Infix (symbol "+" >> return (mkEBin BOAdd)) E.AssocLeft, E.Infix (symbol "-" >> return (mkEBin BOSub)) E.AssocLeft], [E.Infix (symbol ">=" >> return (mkEBin BOGeq)) E.AssocNone, E.Infix (symbol "<=" >> return (mkEBin BOLeq)) E.AssocNone, E.Infix (symbol ">" >> return (mkEBin BOGt)) E.AssocNone, E.Infix (symbol "<" >> return (mkEBin BOLt)) E.AssocNone, E.Infix (symbol "==" >> return (mkEBin BOEq)) E.AssocNone, E.Infix (symbol "!=" >> return (mkEBin BONeq)) E.AssocNone], [E.Infix (symbol "&&" >> return (mkEBin BOAnd)) E.AssocLeft], [E.Infix (symbol "||" >> return (mkEBin BOOr)) E.AssocLeft]] mkEBin :: BinaryOp -> Expression -> Expression -> Expression mkEBin bo a b = EBin bo a b Nothing mkELit :: Literal -> Expression mkELit l = ELit l Nothing litparser :: Parser Expression litparser = do preops <- many pPrefixOp e <- pParenExpr <|> pENew <|> (mkELit <$> pLiteral) postops <- many pPostfixOp return $ foldl (flip ($)) e (postops ++ preops) pAsExpression :: Parser AsExpression pAsExpression = do n <- pName postops <- many pPostfixAsOp return $ foldl (flip ($)) (AEVar n Nothing) postops pPrefixOp :: Parser (Expression -> Expression) pPrefixOp = (symbol "!" >> return (\e -> EUn UONot e Nothing)) <|> (symbol "-" >> return (\e -> EUn UONeg e Nothing)) pPostfixOp :: Parser (Expression -> Expression) pPostfixOp = (do expr <- between (symbol "[") (symbol "]") pExpression return $ \e -> ESubscript e expr Nothing) <|> (do symbol "." n <- pName return $ \e -> EGet e n Nothing) pPostfixAsOp :: Parser (AsExpression -> AsExpression) pPostfixAsOp = (do expr <- between (symbol "[") (symbol "]") pExpression return $ \ae -> AESubscript ae expr Nothing) <|> (do symbol "." n <- pName return $ \ae -> AEGet ae n Nothing) pParenExpr :: Parser Expression pParenExpr = do symbol "(" e <- pExpression symbol ")" return e pENew :: Parser Expression pENew = do symbol "new" t <- pBasicType symbol "[" e <- pExpression symbol "]" return $ ENew t e pLiteral :: Parser Literal pLiteral = (LInt <$> pInteger) <|> (LChar <$> pCharLit) <|> (LStr <$> pString) <|> pLStruct <|> pLCall <|> (LVar <$> pName) pCharLit :: Parser Char pCharLit = do void $ char '\'' c <- pStringChar (const False) void $ char '\'' pWhiteComment return c pStringChar :: (Char -> Bool) -> Parser Char pStringChar avoid = (char '\\' >> ((char 'n' >> return '\n') <|> (char 'r' >> return '\r') <|> (char 't' >> return '\t') <|> (char '0' >> return '\0') <|> (char 'x' >> pHexDigit >>= \a -> pHexDigit >>= \b -> return (chr $ 16 * a + b)))) <|> satisfy (not . avoid) where pHexDigit :: Parser Int pHexDigit = (subtract 48 . fromEnum <$> digit) <|> ((+ (10 - 97)) . ord <$> oneOf "abcdef") <|> ((+ (10 - 65)) . ord <$> oneOf "ABCDEF") pString :: Parser String pString = do void $ char '"' s <- many (pStringChar (== '"')) void $ char '"' return s pLStruct :: Parser Literal pLStruct = do symbol "{" ms <- sepBy (pName >>= \n -> symbol "=" >> pExpression >>= \e -> return (n,e)) (symbol ",") symbol "}" return $ LStruct ms pLCall :: Parser Literal pLCall = do n <- try $ pName <* symbol "(" al <- sepBy pExpression (symbol ",") symbol ")" return $ LCall n al pName :: Parser Name pName = do c0 <- satisfy (\c -> isAlpha c || c == '_') cr <- many $ satisfy (\c -> isAlpha c || isDigit c || c == '_') pWhiteComment return $ c0 : cr pInteger :: Parser Integer pInteger = bareint <* pWhiteComment where bareint = (try (string "0x") >> many1 (satisfy isHexDigit) >>= return . read . ("0x" ++)) <|> (try (string "0b") >> many1 (oneOf "01") >>= return . bin2int) <|> (many1 (satisfy isDigit) >>= return . read) bin2int :: String -> Integer bin2int s = go (reverse s) where go "" = 0 go ('0':s') = 2 * go s' go ('1':s') = 2 * go s' + 1 go (_:_) = undefined pIntegerInt :: Parser Int pIntegerInt = do i <- pInteger when (i > (fromIntegral (maxBound :: Int) :: Integer) || i < (fromIntegral (minBound :: Int) :: Integer)) $ unexpected $ "Integer literal " ++ show i ++ " does not fit in an Int" return $ fromIntegral i symbol :: String -> Parser () symbol "" = error "symbol \"\"" symbol s = try $ do void $ string s when (isAlpha (last s)) $ void $ notFollowedBy (satisfy isAlpha) when (isDigit (last s)) $ void $ notFollowedBy (satisfy isDigit) when (isOperatorChar (last s)) $ void $ notFollowedBy (satisfy isOperatorChar) pWhiteComment isOperatorChar :: Char -> Bool isOperatorChar = (`elem` "*/%^&|+-><=!") pWhiteComment :: Parser () pWhiteComment = void $ pWhite >> endBy pComment pWhite pWhite :: Parser () pWhite = void $ many (oneOf " \t\n") pComment :: Parser () pComment = pLineComment <|> pBlockComment pLineComment :: Parser () pLineComment = do void $ try $ string "//" void $ many (satisfy (/= '\n')) eof <|> void (char '\n') pBlockComment :: Parser () pBlockComment = do void $ try $ string "/*" void $ sepEndBy (manyTill anyToken (lookAhead $ try (string "/*") <|> try (string "*/"))) pBlockComment void $ string "*/"