From bc52411ae2ed26cab1d5086ae6df68f23ebbd052 Mon Sep 17 00:00:00 2001 From: Tom Smeding Date: Wed, 10 Jun 2020 19:59:03 +0200 Subject: Initial state I found the code in --- .gitignore | 1 + README.md | 4 ++ ast/CC/Source.hs | 48 +++++++++++++++++++ ast/CC/Typed.hs | 35 ++++++++++++++ compcomp.cabal | 47 +++++++++++++++++++ main/Main.hs | 33 ++++++++++++++ parser/CC/Parser.hs | 114 ++++++++++++++++++++++++++++++++++++++++++++++ typecheck/CC/Typecheck.hs | 83 +++++++++++++++++++++++++++++++++ utils/CC/IdSupply.hs | 29 ++++++++++++ utils/CC/Pretty.hs | 9 ++++ utils/CC/Types.hs | 44 ++++++++++++++++++ 11 files changed, 447 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 ast/CC/Source.hs create mode 100644 ast/CC/Typed.hs create mode 100644 compcomp.cabal create mode 100644 main/Main.hs create mode 100644 parser/CC/Parser.hs create mode 100644 typecheck/CC/Typecheck.hs create mode 100644 utils/CC/IdSupply.hs create mode 100644 utils/CC/Pretty.hs create mode 100644 utils/CC/Types.hs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2022b2b --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/dist-newstyle/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..4b79ccc --- /dev/null +++ b/README.md @@ -0,0 +1,4 @@ +# Composable Compiler + +A prototype compiler for a small, Haskell-like functional language, built up of +small components that form a full compiler when placed in a pipeline. diff --git a/ast/CC/Source.hs b/ast/CC/Source.hs new file mode 100644 index 0000000..80d8f01 --- /dev/null +++ b/ast/CC/Source.hs @@ -0,0 +1,48 @@ +module CC.Source(module CC.Source, module CC.Types) where + +import CC.Pretty +import CC.Types + + +data Program = Program [Decl] + deriving (Show, Read) + +data Decl = Def Def -- import? + deriving (Show, Read) + +data Def = Function (Maybe Type) + (Name, SourceRange) + [(Name, SourceRange)] + Expr + deriving (Show, Read) + +data Type = TFun Type Type + | TInt + deriving (Show, Read) + +data Expr = Call SourceRange Expr Expr + | Int SourceRange Int + | Var SourceRange Name + deriving (Show, Read) + +instance Pretty Type where + pretty = unparse + +class Unparse a where + -- Parentheses are required if precedence of unparsed element is + -- greater than the argument. + unparsePrec :: Int -> a -> String + + unparse :: a -> String + unparse = unparsePrec 0 + +instance Unparse Type where + unparsePrec _ TInt = "Int" + unparsePrec p (TFun a b) = + let s = unparsePrec 3 a ++ " -> " ++ unparsePrec 2 b + in if p > 2 then "(" ++ s ++ ")" else s + +instance HasRange Expr where + range (Call sr _ _) = sr + range (Int sr _) = sr + range (Var sr _) = sr diff --git a/ast/CC/Typed.hs b/ast/CC/Typed.hs new file mode 100644 index 0000000..b11067f --- /dev/null +++ b/ast/CC/Typed.hs @@ -0,0 +1,35 @@ +module CC.Typed( + module CC.Typed, + module CC.Types +) where + +import CC.Types + + +data ProgramT = ProgramT [DeclT] + deriving (Show, Read) + +data DeclT = DefT DefT -- import? + deriving (Show, Read) + +data DefT = FunctionT TypeT Name [Name] ExprT + deriving (Show, Read) + +data TypeT = TFunT TypeT TypeT + | TIntT + | TyVar Int + deriving (Show, Read) + +data ExprT = CallT TypeT ExprT ExprT + | IntT Int + | VarT Occ + deriving (Show, Read) + +-- | Occurrence of a variable +data Occ = Occ Name TypeT + deriving (Show, Read) + +exprType :: ExprT -> TypeT +exprType (CallT typ _ _) = typ +exprType (IntT _) = TIntT +exprType (VarT (Occ _ typ)) = typ diff --git a/compcomp.cabal b/compcomp.cabal new file mode 100644 index 0000000..20a3d37 --- /dev/null +++ b/compcomp.cabal @@ -0,0 +1,47 @@ +name: compcomp +version: 0.1.0 +cabal-version: >= 1.10 +build-type: Simple +license: MIT +author: Tom Smeding +maintainer: tom.smeding@gmail.com + +executable compcomp + hs-source-dirs: main + main-is: Main.hs + default-language: Haskell2010 + ghc-options: -Wall -O2 + build-depends: base >= 4 && < 5, cc-parser, cc-typecheck, cc-utils + other-modules: + +library cc-parser + hs-source-dirs: parser + default-language: Haskell2010 + ghc-options: -Wall -O2 + build-depends: base >= 4 && < 5, parsec, cc-ast, cc-utils + exposed-modules: CC.Parser + other-modules: + +library cc-typecheck + hs-source-dirs: typecheck + default-language: Haskell2010 + ghc-options: -Wall -O2 + build-depends: base >= 4 && < 5, containers, mtl, cc-ast, cc-utils + exposed-modules: CC.Typecheck + other-modules: + +library cc-ast + hs-source-dirs: ast + default-language: Haskell2010 + ghc-options: -Wall -O2 + build-depends: base >= 4 && < 5, cc-utils + exposed-modules: CC.Source, CC.Typed + other-modules: + +library cc-utils + hs-source-dirs: utils + default-language: Haskell2010 + ghc-options: -Wall -O2 + build-depends: base >= 4 && < 5 + exposed-modules: CC.Types, CC.Pretty + other-modules: diff --git a/main/Main.hs b/main/Main.hs new file mode 100644 index 0000000..58e475c --- /dev/null +++ b/main/Main.hs @@ -0,0 +1,33 @@ +module Main where + +import System.Exit + +import qualified CC.Parser +import qualified CC.Typecheck +import CC.Types + + +-- Put the passes in a type-level list to be able to run subsequences of +-- passes. + + +type Pass a b = Context -> a -> Either String b + +pass :: (Read a, Show b, Show e) => (Context -> a -> Either e b) -> Pass a b +pass f ctx a = either (Left . show) Right (f ctx a) + +passJoin :: (Read a, Show b, Read b, Show c) => Pass a b -> Pass b c -> Pass a c +passJoin f1 f2 ctx a = f1 ctx a >>= f2 ctx + +main :: IO () +main = do + let parse = pass CC.Parser.runPass + typecheck = pass CC.Typecheck.runPass + + let combined = parse `passJoin` typecheck + + source <- getContents + let context = Context "" + case combined context (read source) of + Right prog -> print prog + Left err -> die (show err) diff --git a/parser/CC/Parser.hs b/parser/CC/Parser.hs new file mode 100644 index 0000000..0088956 --- /dev/null +++ b/parser/CC/Parser.hs @@ -0,0 +1,114 @@ +module CC.Parser(runPass, parseProgram) where + +import Control.Monad +import Text.Parsec hiding (State, SourcePos, getPosition, token) +import qualified Text.Parsec + +import CC.Source + + +type Parser a = Parsec String State a +type State = Int -- base indentation level; hanging lines should be > this + +runPass :: Context -> RawString -> Either ParseError Program +runPass (Context path) (RawString src) = parseProgram path src + +parseProgram :: FilePath -> String -> Either ParseError Program +parseProgram fname src = runParser pProgram 0 fname src + +pProgram :: Parser Program +pProgram = do + prog <- Program <$> many pDecl + emptyLines + eof + return prog + +pDecl :: Parser Decl +pDecl = Def <$> pDef + +pDef :: Parser Def +pDef = do + func <- try $ do + emptyLines + putState 0 + name <- pName0 "declaration head name" + return name + mtyp <- optionMaybe $ do + symbol "::" + typ <- pType + whitespace >> void newline + emptyLines + func' <- fst <$> pName0 + guard (fst func == func') + return typ + args <- many pName + symbol "=" + expr <- pExpr + return (Function mtyp func args expr) + +pType :: Parser Type +pType = chainr1 pTypeAtom (symbol "->" >> return TFun) + +pTypeAtom :: Parser Type +pTypeAtom = (wordToken "Int" >> return TInt) <|> between (token "(") (token ")") pType + +pExpr :: Parser Expr +pExpr = lab "expression" $ do + atoms <- many1 pExprAtom + return (foldl1 (\a b -> Call (mergeRange (range a) (range b)) a b) atoms) + +pExprAtom :: Parser Expr +pExprAtom = + choice [ uncurry (flip Int) <$> pInt + , uncurry (flip Var) <$> pName + , between (token "(") (token ")") pExpr ] + +pInt :: Parser (Int, SourceRange) +pInt = try (whitespace >> pInt0) + where + pInt0 = do + p1 <- getPosition + num <- read <$> many1 digit + p2 <- getPosition + return (num, SourceRange p1 p2) + +pName0 :: Parser (Name, SourceRange) +pName0 = do + p1 <- getPosition + c <- pWordFirstChar + cs <- many pWordMidChar + p2 <- getPosition + notFollowedBy pWordMidChar + return (c : cs, SourceRange p1 p2) + +pWordFirstChar :: Parser Char +pWordFirstChar = letter <|> oneOf "_$#!" + +pWordMidChar :: Parser Char +pWordMidChar = alphaNum <|> oneOf "_$#!" + +pName :: Parser (Name, SourceRange) +pName = try (whitespace >> pName0) + +symbol :: String -> Parser () +symbol s = token s >> (eof <|> void space <|> void (oneOf "(){}[]")) + +wordToken :: String -> Parser () +wordToken s = token s >> notFollowedBy pWordMidChar + +token :: String -> Parser () +token s = try (whitespace >> void (string s)) + +emptyLines :: Parser () +emptyLines = (try (whitespace >> newline) >> emptyLines) <|> try (whitespace >> eof) <|> return () + +whitespace :: Parser () +whitespace = void (many (char ' ')) + +lab :: String -> Parser a -> Parser a +lab = flip label + +getPosition :: Parser SourcePos +getPosition = do + pos <- Text.Parsec.getPosition + return (SourcePos (sourceLine pos) (sourceColumn pos)) diff --git a/typecheck/CC/Typecheck.hs b/typecheck/CC/Typecheck.hs new file mode 100644 index 0000000..47f42e3 --- /dev/null +++ b/typecheck/CC/Typecheck.hs @@ -0,0 +1,83 @@ +module CC.Typecheck(runPass) where + +import Control.Monad.State.Strict +import Data.List (intersect) +import qualified Data.Map.Strict as Map + +import CC.Pretty +import CC.Source +import CC.Typed + + +data TypeError = TypeError SourceRange TypeT TypeT + deriving (Show) + +instance Pretty TypeError where + pretty (TypeError sr real expect) = + "Type error: Expression at " ++ pretty sr ++ " has type " ++ pretty real ++ + ", but should have type " ++ pretty expect + +type IdSupplyT m a = StateT Int m a + +genId :: Monad m => IdSupplyT m Int +genId = state (\idval -> (idval, idval + 1)) + +genTyVar :: Monad m => IdSupplyT m TypeT +genTyVar = TyVar <$> genId + +type TM a = IdSupplyT (Either TypeError) a + +runTM :: TM a -> Either TypeError a +runTM m = evalStateT m 1 + + +runPass :: Context -> Program -> Either TypeError ProgramT +runPass _ prog = runTM (typeCheck prog) + +typeCheck :: Program -> TM ProgramT +typeCheck (Program decls) = ProgramT <$> mapM typeCheckDL decls + +typeCheckDL :: Decl -> TM DeclT +typeCheckDL (Def def) = DefT <$> typeCheckD def + +typeCheckD :: Def -> TM DefT +typeCheckD (Function mt (fname, fnameR) args body) = do + (body', _) <- typeCheckE body + return (FunctionT (exprType body') fname (map fst args) body') + +typeCheckE :: Expr -> TM (ExprT, Mapping) +typeCheckE (Call sr func arg) = do + (func', m1) <- typeCheckE func + (arg', m2) <- typeCheckE arg + m <- combine m1 m2 + + let functype = exprType func' + argtype = exprType arg' + tvar <- genTyVar + apply <- unify (range func) functype (TFunT tvar argtype) + let restype = TFunT (apply tvar) (apply argtype) + func'' = down apply func' + arg'' = down apply arg' + + return (CallT restype func'' arg'') +typeCheckE (Int _ val) = return (IntT val, mempty) +typeCheckE (Var _ name) = VarT . Occ name <$> genTyVar + +-- For each variable, its inferred type and the position of its first +-- occurrence in a program fragment. +type Mapping = Map.Map Name (TypeT, SourceRange) + +combine :: Mapping -> Mapping -> TM Mapping +combine mp1 mp2 = do + let leftmap = Map.filterWithKey (\name _ -> not (Map.member name mp2)) mp1 + rightmap = Map.filterWithKey (\name _ -> not (Map.member name mp1)) mp2 + overlap = Map.keys mp1 `intersect` Map.keys mp2 + combine1 name (t1, sr1) (t2, sr2) + | t1 == t2 = Right (t1, sr1) + | otherwise = Left (TypeError sr2 t2 t1) + midpairs <- sequence [combine1 name (mp1 Map.! name) (mp2 Map.! name) + | name <- overlap] + return (Map.unions [leftmap, rightmap, Map.fromList midpairs]) + +unify :: SourceRange -> TypeT -> TypeT -> TM (TypeT -> TypeT) +unify = undefined diff --git a/utils/CC/IdSupply.hs b/utils/CC/IdSupply.hs new file mode 100644 index 0000000..234f6cc --- /dev/null +++ b/utils/CC/IdSupply.hs @@ -0,0 +1,29 @@ +module CC.IdSupply(IdSupply, runIdSupply, genId) where + +import Control.Monad.Trans + + +data IdSupply a = IdSupply (Int -> (Int, a)) + +instance Functor IdSupply where + fmap f (IdSupply act) = IdSupply (fmap f . act) + +instance Applicative IdSupply where + pure x = IdSupply (\idval -> (idval, x)) + IdSupply f <*> IdSupply x = + IdSupply (\idval -> let (idval', f') = f idval + in f' <$> x idval') + +instance Monad IdSupply where + IdSupply x >>= f = + IdSupply (\idval -> let (idval', x') = x idval + IdSupply res = f x' + in res idval') + +instance MonadTrans + +runIdSupply :: Int -> IdSupply a -> a +runIdSupply startid (IdSupply f) = snd (f startid) + +genId :: IdSupply Int +genId = IdSupply (\idval -> (idval + 1, idval)) diff --git a/utils/CC/Pretty.hs b/utils/CC/Pretty.hs new file mode 100644 index 0000000..0a41abe --- /dev/null +++ b/utils/CC/Pretty.hs @@ -0,0 +1,9 @@ +module CC.Pretty where + + +class Pretty a where + pretty :: a -> String + + +instance Pretty Int where + pretty = show diff --git a/utils/CC/Types.hs b/utils/CC/Types.hs new file mode 100644 index 0000000..dc1b0e5 --- /dev/null +++ b/utils/CC/Types.hs @@ -0,0 +1,44 @@ +module CC.Types where + +import CC.Pretty + + +-- | Names of variables in the program +type Name = String + +-- | Source metadata for compilation +data Context = Context FilePath + +-- | Position in a source file; `SourcePos line column`, both zero-based +data SourcePos = SourcePos Int Int + deriving (Show, Read, Eq, Ord) + +instance Pretty SourcePos where + pretty (SourcePos line col) = show (line + 1) ++ ":" ++ show (col + 1) + +-- | A range in the original source code (for diagnostics and debug +-- information); [from, to). +data SourceRange = SourceRange SourcePos SourcePos + deriving (Show, Read) + +instance Pretty SourceRange where + pretty (SourceRange from@(SourcePos fromLine fromCol) to@(SourcePos toLine toCol)) + | fromLine == toLine = + show (fromLine + 1) ++ ":" ++ show (fromCol + 1) ++ "-" ++ show (toCol + 1) + | otherwise = + show from ++ "-" ++ show to + +class HasRange a where + range :: a -> SourceRange + +mergeRange :: SourceRange -> SourceRange -> SourceRange +mergeRange (SourceRange p1 p2) (SourceRange q1 q2) = SourceRange (min p1 q1) (max p2 q2) + +-- | A newtype with no-op Read and Show instances +newtype RawString = RawString String + +instance Read RawString where + readsPrec _ s = [(RawString s, "")] + +instance Show RawString where + show (RawString s) = s -- cgit v1.2.3-54-g00ecf