aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Smeding <tom.smeding@gmail.com>2020-06-10 19:59:03 +0200
committerTom Smeding <tom.smeding@gmail.com>2020-06-10 19:59:13 +0200
commitbc52411ae2ed26cab1d5086ae6df68f23ebbd052 (patch)
tree365e7bab678bb46981befe5b2a1c0c967a9a9c57
Initial state I found the code in
-rw-r--r--.gitignore1
-rw-r--r--README.md4
-rw-r--r--ast/CC/Source.hs48
-rw-r--r--ast/CC/Typed.hs35
-rw-r--r--compcomp.cabal47
-rw-r--r--main/Main.hs33
-rw-r--r--parser/CC/Parser.hs114
-rw-r--r--typecheck/CC/Typecheck.hs83
-rw-r--r--utils/CC/IdSupply.hs29
-rw-r--r--utils/CC/Pretty.hs9
-rw-r--r--utils/CC/Types.hs44
11 files changed, 447 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..2022b2b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+/dist-newstyle/
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..4b79ccc
--- /dev/null
+++ b/README.md
@@ -0,0 +1,4 @@
+# Composable Compiler
+
+A prototype compiler for a small, Haskell-like functional language, built up of
+small components that form a full compiler when placed in a pipeline.
diff --git a/ast/CC/Source.hs b/ast/CC/Source.hs
new file mode 100644
index 0000000..80d8f01
--- /dev/null
+++ b/ast/CC/Source.hs
@@ -0,0 +1,48 @@
+module CC.Source(module CC.Source, module CC.Types) where
+
+import CC.Pretty
+import CC.Types
+
+
+data Program = Program [Decl]
+ deriving (Show, Read)
+
+data Decl = Def Def -- import?
+ deriving (Show, Read)
+
+data Def = Function (Maybe Type)
+ (Name, SourceRange)
+ [(Name, SourceRange)]
+ Expr
+ deriving (Show, Read)
+
+data Type = TFun Type Type
+ | TInt
+ deriving (Show, Read)
+
+data Expr = Call SourceRange Expr Expr
+ | Int SourceRange Int
+ | Var SourceRange Name
+ deriving (Show, Read)
+
+instance Pretty Type where
+ pretty = unparse
+
+class Unparse a where
+ -- Parentheses are required if precedence of unparsed element is
+ -- greater than the argument.
+ unparsePrec :: Int -> a -> String
+
+ unparse :: a -> String
+ unparse = unparsePrec 0
+
+instance Unparse Type where
+ unparsePrec _ TInt = "Int"
+ unparsePrec p (TFun a b) =
+ let s = unparsePrec 3 a ++ " -> " ++ unparsePrec 2 b
+ in if p > 2 then "(" ++ s ++ ")" else s
+
+instance HasRange Expr where
+ range (Call sr _ _) = sr
+ range (Int sr _) = sr
+ range (Var sr _) = sr
diff --git a/ast/CC/Typed.hs b/ast/CC/Typed.hs
new file mode 100644
index 0000000..b11067f
--- /dev/null
+++ b/ast/CC/Typed.hs
@@ -0,0 +1,35 @@
+module CC.Typed(
+ module CC.Typed,
+ module CC.Types
+) where
+
+import CC.Types
+
+
+data ProgramT = ProgramT [DeclT]
+ deriving (Show, Read)
+
+data DeclT = DefT DefT -- import?
+ deriving (Show, Read)
+
+data DefT = FunctionT TypeT Name [Name] ExprT
+ deriving (Show, Read)
+
+data TypeT = TFunT TypeT TypeT
+ | TIntT
+ | TyVar Int
+ deriving (Show, Read)
+
+data ExprT = CallT TypeT ExprT ExprT
+ | IntT Int
+ | VarT Occ
+ deriving (Show, Read)
+
+-- | Occurrence of a variable
+data Occ = Occ Name TypeT
+ deriving (Show, Read)
+
+exprType :: ExprT -> TypeT
+exprType (CallT typ _ _) = typ
+exprType (IntT _) = TIntT
+exprType (VarT (Occ _ typ)) = typ
diff --git a/compcomp.cabal b/compcomp.cabal
new file mode 100644
index 0000000..20a3d37
--- /dev/null
+++ b/compcomp.cabal
@@ -0,0 +1,47 @@
+name: compcomp
+version: 0.1.0
+cabal-version: >= 1.10
+build-type: Simple
+license: MIT
+author: Tom Smeding
+maintainer: tom.smeding@gmail.com
+
+executable compcomp
+ hs-source-dirs: main
+ main-is: Main.hs
+ default-language: Haskell2010
+ ghc-options: -Wall -O2
+ build-depends: base >= 4 && < 5, cc-parser, cc-typecheck, cc-utils
+ other-modules:
+
+library cc-parser
+ hs-source-dirs: parser
+ default-language: Haskell2010
+ ghc-options: -Wall -O2
+ build-depends: base >= 4 && < 5, parsec, cc-ast, cc-utils
+ exposed-modules: CC.Parser
+ other-modules:
+
+library cc-typecheck
+ hs-source-dirs: typecheck
+ default-language: Haskell2010
+ ghc-options: -Wall -O2
+ build-depends: base >= 4 && < 5, containers, mtl, cc-ast, cc-utils
+ exposed-modules: CC.Typecheck
+ other-modules:
+
+library cc-ast
+ hs-source-dirs: ast
+ default-language: Haskell2010
+ ghc-options: -Wall -O2
+ build-depends: base >= 4 && < 5, cc-utils
+ exposed-modules: CC.Source, CC.Typed
+ other-modules:
+
+library cc-utils
+ hs-source-dirs: utils
+ default-language: Haskell2010
+ ghc-options: -Wall -O2
+ build-depends: base >= 4 && < 5
+ exposed-modules: CC.Types, CC.Pretty
+ other-modules:
diff --git a/main/Main.hs b/main/Main.hs
new file mode 100644
index 0000000..58e475c
--- /dev/null
+++ b/main/Main.hs
@@ -0,0 +1,33 @@
+module Main where
+
+import System.Exit
+
+import qualified CC.Parser
+import qualified CC.Typecheck
+import CC.Types
+
+
+-- Put the passes in a type-level list to be able to run subsequences of
+-- passes.
+
+
+type Pass a b = Context -> a -> Either String b
+
+pass :: (Read a, Show b, Show e) => (Context -> a -> Either e b) -> Pass a b
+pass f ctx a = either (Left . show) Right (f ctx a)
+
+passJoin :: (Read a, Show b, Read b, Show c) => Pass a b -> Pass b c -> Pass a c
+passJoin f1 f2 ctx a = f1 ctx a >>= f2 ctx
+
+main :: IO ()
+main = do
+ let parse = pass CC.Parser.runPass
+ typecheck = pass CC.Typecheck.runPass
+
+ let combined = parse `passJoin` typecheck
+
+ source <- getContents
+ let context = Context "<stdin>"
+ case combined context (read source) of
+ Right prog -> print prog
+ Left err -> die (show err)
diff --git a/parser/CC/Parser.hs b/parser/CC/Parser.hs
new file mode 100644
index 0000000..0088956
--- /dev/null
+++ b/parser/CC/Parser.hs
@@ -0,0 +1,114 @@
+module CC.Parser(runPass, parseProgram) where
+
+import Control.Monad
+import Text.Parsec hiding (State, SourcePos, getPosition, token)
+import qualified Text.Parsec
+
+import CC.Source
+
+
+type Parser a = Parsec String State a
+type State = Int -- base indentation level; hanging lines should be > this
+
+runPass :: Context -> RawString -> Either ParseError Program
+runPass (Context path) (RawString src) = parseProgram path src
+
+parseProgram :: FilePath -> String -> Either ParseError Program
+parseProgram fname src = runParser pProgram 0 fname src
+
+pProgram :: Parser Program
+pProgram = do
+ prog <- Program <$> many pDecl
+ emptyLines
+ eof
+ return prog
+
+pDecl :: Parser Decl
+pDecl = Def <$> pDef
+
+pDef :: Parser Def
+pDef = do
+ func <- try $ do
+ emptyLines
+ putState 0
+ name <- pName0 <?> "declaration head name"
+ return name
+ mtyp <- optionMaybe $ do
+ symbol "::"
+ typ <- pType
+ whitespace >> void newline
+ emptyLines
+ func' <- fst <$> pName0
+ guard (fst func == func')
+ return typ
+ args <- many pName
+ symbol "="
+ expr <- pExpr
+ return (Function mtyp func args expr)
+
+pType :: Parser Type
+pType = chainr1 pTypeAtom (symbol "->" >> return TFun)
+
+pTypeAtom :: Parser Type
+pTypeAtom = (wordToken "Int" >> return TInt) <|> between (token "(") (token ")") pType
+
+pExpr :: Parser Expr
+pExpr = lab "expression" $ do
+ atoms <- many1 pExprAtom
+ return (foldl1 (\a b -> Call (mergeRange (range a) (range b)) a b) atoms)
+
+pExprAtom :: Parser Expr
+pExprAtom =
+ choice [ uncurry (flip Int) <$> pInt
+ , uncurry (flip Var) <$> pName
+ , between (token "(") (token ")") pExpr ]
+
+pInt :: Parser (Int, SourceRange)
+pInt = try (whitespace >> pInt0)
+ where
+ pInt0 = do
+ p1 <- getPosition
+ num <- read <$> many1 digit
+ p2 <- getPosition
+ return (num, SourceRange p1 p2)
+
+pName0 :: Parser (Name, SourceRange)
+pName0 = do
+ p1 <- getPosition
+ c <- pWordFirstChar
+ cs <- many pWordMidChar
+ p2 <- getPosition
+ notFollowedBy pWordMidChar
+ return (c : cs, SourceRange p1 p2)
+
+pWordFirstChar :: Parser Char
+pWordFirstChar = letter <|> oneOf "_$#!"
+
+pWordMidChar :: Parser Char
+pWordMidChar = alphaNum <|> oneOf "_$#!"
+
+pName :: Parser (Name, SourceRange)
+pName = try (whitespace >> pName0)
+
+symbol :: String -> Parser ()
+symbol s = token s >> (eof <|> void space <|> void (oneOf "(){}[]"))
+
+wordToken :: String -> Parser ()
+wordToken s = token s >> notFollowedBy pWordMidChar
+
+token :: String -> Parser ()
+token s = try (whitespace >> void (string s))
+
+emptyLines :: Parser ()
+emptyLines = (try (whitespace >> newline) >> emptyLines) <|> try (whitespace >> eof) <|> return ()
+
+whitespace :: Parser ()
+whitespace = void (many (char ' '))
+
+lab :: String -> Parser a -> Parser a
+lab = flip label
+
+getPosition :: Parser SourcePos
+getPosition = do
+ pos <- Text.Parsec.getPosition
+ return (SourcePos (sourceLine pos) (sourceColumn pos))
diff --git a/typecheck/CC/Typecheck.hs b/typecheck/CC/Typecheck.hs
new file mode 100644
index 0000000..47f42e3
--- /dev/null
+++ b/typecheck/CC/Typecheck.hs
@@ -0,0 +1,83 @@
+module CC.Typecheck(runPass) where
+
+import Control.Monad.State.Strict
+import Data.List (intersect)
+import qualified Data.Map.Strict as Map
+
+import CC.Pretty
+import CC.Source
+import CC.Typed
+
+
+data TypeError = TypeError SourceRange TypeT TypeT
+ deriving (Show)
+
+instance Pretty TypeError where
+ pretty (TypeError sr real expect) =
+ "Type error: Expression at " ++ pretty sr ++ " has type " ++ pretty real ++
+ ", but should have type " ++ pretty expect
+
+type IdSupplyT m a = StateT Int m a
+
+genId :: Monad m => IdSupplyT m Int
+genId = state (\idval -> (idval, idval + 1))
+
+genTyVar :: Monad m => IdSupplyT m TypeT
+genTyVar = TyVar <$> genId
+
+type TM a = IdSupplyT (Either TypeError) a
+
+runTM :: TM a -> Either TypeError a
+runTM m = evalStateT m 1
+
+
+runPass :: Context -> Program -> Either TypeError ProgramT
+runPass _ prog = runTM (typeCheck prog)
+
+typeCheck :: Program -> TM ProgramT
+typeCheck (Program decls) = ProgramT <$> mapM typeCheckDL decls
+
+typeCheckDL :: Decl -> TM DeclT
+typeCheckDL (Def def) = DefT <$> typeCheckD def
+
+typeCheckD :: Def -> TM DefT
+typeCheckD (Function mt (fname, fnameR) args body) = do
+ (body', _) <- typeCheckE body
+ return (FunctionT (exprType body') fname (map fst args) body')
+
+typeCheckE :: Expr -> TM (ExprT, Mapping)
+typeCheckE (Call sr func arg) = do
+ (func', m1) <- typeCheckE func
+ (arg', m2) <- typeCheckE arg
+ m <- combine m1 m2
+
+ let functype = exprType func'
+ argtype = exprType arg'
+ tvar <- genTyVar
+ apply <- unify (range func) functype (TFunT tvar argtype)
+ let restype = TFunT (apply tvar) (apply argtype)
+ func'' = down apply func'
+ arg'' = down apply arg'
+
+ return (CallT restype func'' arg'')
+typeCheckE (Int _ val) = return (IntT val, mempty)
+typeCheckE (Var _ name) = VarT . Occ name <$> genTyVar
+
+-- For each variable, its inferred type and the position of its first
+-- occurrence in a program fragment.
+type Mapping = Map.Map Name (TypeT, SourceRange)
+
+combine :: Mapping -> Mapping -> TM Mapping
+combine mp1 mp2 = do
+ let leftmap = Map.filterWithKey (\name _ -> not (Map.member name mp2)) mp1
+ rightmap = Map.filterWithKey (\name _ -> not (Map.member name mp1)) mp2
+ overlap = Map.keys mp1 `intersect` Map.keys mp2
+ combine1 name (t1, sr1) (t2, sr2)
+ | t1 == t2 = Right (t1, sr1)
+ | otherwise = Left (TypeError sr2 t2 t1)
+ midpairs <- sequence [combine1 name (mp1 Map.! name) (mp2 Map.! name)
+ | name <- overlap]
+ return (Map.unions [leftmap, rightmap, Map.fromList midpairs])
+
+unify :: SourceRange -> TypeT -> TypeT -> TM (TypeT -> TypeT)
+unify = undefined
diff --git a/utils/CC/IdSupply.hs b/utils/CC/IdSupply.hs
new file mode 100644
index 0000000..234f6cc
--- /dev/null
+++ b/utils/CC/IdSupply.hs
@@ -0,0 +1,29 @@
+module CC.IdSupply(IdSupply, runIdSupply, genId) where
+
+import Control.Monad.Trans
+
+
+data IdSupply a = IdSupply (Int -> (Int, a))
+
+instance Functor IdSupply where
+ fmap f (IdSupply act) = IdSupply (fmap f . act)
+
+instance Applicative IdSupply where
+ pure x = IdSupply (\idval -> (idval, x))
+ IdSupply f <*> IdSupply x =
+ IdSupply (\idval -> let (idval', f') = f idval
+ in f' <$> x idval')
+
+instance Monad IdSupply where
+ IdSupply x >>= f =
+ IdSupply (\idval -> let (idval', x') = x idval
+ IdSupply res = f x'
+ in res idval')
+
+instance MonadTrans
+
+runIdSupply :: Int -> IdSupply a -> a
+runIdSupply startid (IdSupply f) = snd (f startid)
+
+genId :: IdSupply Int
+genId = IdSupply (\idval -> (idval + 1, idval))
diff --git a/utils/CC/Pretty.hs b/utils/CC/Pretty.hs
new file mode 100644
index 0000000..0a41abe
--- /dev/null
+++ b/utils/CC/Pretty.hs
@@ -0,0 +1,9 @@
+module CC.Pretty where
+
+
+class Pretty a where
+ pretty :: a -> String
+
+
+instance Pretty Int where
+ pretty = show
diff --git a/utils/CC/Types.hs b/utils/CC/Types.hs
new file mode 100644
index 0000000..dc1b0e5
--- /dev/null
+++ b/utils/CC/Types.hs
@@ -0,0 +1,44 @@
+module CC.Types where
+
+import CC.Pretty
+
+
+-- | Names of variables in the program
+type Name = String
+
+-- | Source metadata for compilation
+data Context = Context FilePath
+
+-- | Position in a source file; `SourcePos line column`, both zero-based
+data SourcePos = SourcePos Int Int
+ deriving (Show, Read, Eq, Ord)
+
+instance Pretty SourcePos where
+ pretty (SourcePos line col) = show (line + 1) ++ ":" ++ show (col + 1)
+
+-- | A range in the original source code (for diagnostics and debug
+-- information); [from, to).
+data SourceRange = SourceRange SourcePos SourcePos
+ deriving (Show, Read)
+
+instance Pretty SourceRange where
+ pretty (SourceRange from@(SourcePos fromLine fromCol) to@(SourcePos toLine toCol))
+ | fromLine == toLine =
+ show (fromLine + 1) ++ ":" ++ show (fromCol + 1) ++ "-" ++ show (toCol + 1)
+ | otherwise =
+ show from ++ "-" ++ show to
+
+class HasRange a where
+ range :: a -> SourceRange
+
+mergeRange :: SourceRange -> SourceRange -> SourceRange
+mergeRange (SourceRange p1 p2) (SourceRange q1 q2) = SourceRange (min p1 q1) (max p2 q2)
+
+-- | A newtype with no-op Read and Show instances
+newtype RawString = RawString String
+
+instance Read RawString where
+ readsPrec _ s = [(RawString s, "")]
+
+instance Show RawString where
+ show (RawString s) = s