diff options
| -rw-r--r-- | .gitignore | 1 | ||||
| -rw-r--r-- | cbits/hook.c | 176 | ||||
| -rw-r--r-- | ghc-gc-hook.cabal | 33 | ||||
| -rw-r--r-- | src/GHC/GC_Hook.hs | 130 | ||||
| -rw-r--r-- | test/Main.hs | 14 | 
5 files changed, 354 insertions, 0 deletions
| diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c33954f --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +dist-newstyle/ diff --git a/cbits/hook.c b/cbits/hook.c new file mode 100644 index 0000000..e05ccd4 --- /dev/null +++ b/cbits/hook.c @@ -0,0 +1,176 @@ +#include "Rts.h" +#include <string.h> + +// needs C11 +#include <threads.h> + +static size_t min_sz(size_t a, size_t b) { +	return a < b ? a : b; +} + +extern RtsConfig rtsConfig; + +// A copy of GCDetails_ with known structure that can be depended on by the Haskell code. +struct ShadowDetails { +	// The generation number of this GC +	uint32_t gen; +	// Number of threads used in this GC +	uint32_t threads; +	// Number of bytes allocated since the previous GC +	uint64_t allocated_bytes; +	// Total amount of live data in the heap (incliudes large + compact data). +	// Updated after every GC. Data in uncollected generations (in minor GCs) +	// are considered live. +	uint64_t live_bytes; +	// Total amount of live data in large objects +	uint64_t large_objects_bytes; +	// Total amount of live data in compact regions +	uint64_t compact_bytes; +	// Total amount of slop (wasted memory) +	uint64_t slop_bytes; +	// Total amount of memory in use by the RTS +	uint64_t mem_in_use_bytes; +	// Total amount of data copied during this GC +	uint64_t copied_bytes; +	// In parallel GC, the max amount of data copied by any one thread +	uint64_t par_max_copied_bytes; +	// In parallel GC, the amount of balanced data copied by all threads +	uint64_t par_balanced_copied_bytes; +	// The time elapsed during synchronisation before GC +	// NOTE: nanoseconds! +	uint64_t sync_elapsed_ns; +	// The CPU time used during GC itself +	// NOTE: nanoseconds! +	uint64_t cpu_ns; +	// The time elapsed during GC itself +	// NOTE: nanoseconds! +	uint64_t elapsed_ns; + +	// Concurrent garbage collector + +	// The CPU time used during the post-mark pause phase of the concurrent +	// nonmoving GC. +	// NOTE: nanoseconds! +	uint64_t nonmoving_gc_sync_cpu_ns; +	// The time elapsed during the post-mark pause phase of the concurrent +	// nonmoving GC. +	// NOTE: nanoseconds! +	uint64_t nonmoving_gc_sync_elapsed_ns; +	// The CPU time used during the post-mark pause phase of the concurrent +	// nonmoving GC. +	// NOTE: nanoseconds! +	uint64_t nonmoving_gc_cpu_ns; +	// The time elapsed during the post-mark pause phase of the concurrent +	// nonmoving GC. +	// NOTE: nanoseconds! +	uint64_t nonmoving_gc_elapsed_ns; +}; + +static void shadow_copy(struct ShadowDetails *dst, const struct GCDetails_ *src) { +#define COPY(field) dst->field = src->field; +#define COPYTIME(field) dst->field = TimeToNS(src->field); +	COPY(gen); +	COPY(threads); +	COPY(allocated_bytes); +	COPY(live_bytes); +	COPY(large_objects_bytes); +	COPY(compact_bytes); +	COPY(slop_bytes); +	COPY(mem_in_use_bytes); +	COPY(copied_bytes); +	COPY(par_max_copied_bytes); +	COPY(par_balanced_copied_bytes); +	COPYTIME(sync_elapsed_ns); +	COPYTIME(cpu_ns); +	COPYTIME(elapsed_ns); + +	COPYTIME(nonmoving_gc_sync_cpu_ns); +	COPYTIME(nonmoving_gc_sync_elapsed_ns); +	COPYTIME(nonmoving_gc_cpu_ns); +	COPYTIME(nonmoving_gc_elapsed_ns); +#undef COPY +#undef COPYTIME +} + +// -------- +// GLOBAL VARIABLES +// -------- + +static void (*old_hook)(const struct GCDetails_ *details) = NULL; +static mtx_t detlog_mutex; +static size_t detlog_capacity = 0, detlog_length = 0; +static struct ShadowDetails *detlog = NULL; + +// -------- +// END OF GLOBAL VARIABLES +// -------- + +static void hook_callback(const struct GCDetails_ *details) { +	static bool alloc_failed = false; + +	if (alloc_failed) goto cleanup_no_mutex; + +	if (mtx_lock(&detlog_mutex) != thrd_success) { +		fprintf(stderr, "ghc-gc-hook: ERROR: Mutex lock failed\n"); +		alloc_failed = true;  // dumb proxy for "don't do anything anymore please" +		goto cleanup_no_mutex; +	} + +	// mutex is locked from here + +	if (detlog_length == detlog_capacity) { +		detlog_capacity = detlog_capacity == 0 ? 128 : 2 * detlog_capacity; +		detlog = realloc(detlog, detlog_capacity * sizeof(detlog[0])); +		if (detlog == NULL || detlog_capacity == 0) {  // also check for overflow here +			fprintf(stderr, "ghc-gc-hook: ERROR: Could not allocate memory for GC log hook\n"); +			alloc_failed = true; +			goto cleanup; +		} +	} + +	shadow_copy(&detlog[detlog_length], details); +	detlog_length++; + +cleanup: +	mtx_unlock(&detlog_mutex);  // ignore return value + +cleanup_no_mutex: +	if (old_hook) old_hook(details); +} + +// -------- +// EXPORTED FUNCTIONS +// -------- + +void copy_log_to_buffer(size_t space_available, char *buffer, size_t *unit_size, size_t *num_stored) { +	if (mtx_lock(&detlog_mutex) != thrd_success) { +		fprintf(stderr, "ghc-gc-hook: ERROR: Mutex lock failed\n"); +		*unit_size = 0; +		*num_stored = 0; +		return; +	} + +	const size_t sz = sizeof(detlog[0]); +	const size_t n = min_sz(space_available / sz, detlog_length); + +	// First copy over the fitting items +	memcpy(buffer, detlog, n * sz); +	*unit_size = sz; +	*num_stored = n; + +	// Then shift back the remaining items +	memmove(detlog, detlog + n, (detlog_length - n) * sizeof(detlog[0])); +	detlog_length -= n; + +	mtx_unlock(&detlog_mutex); +} + +void set_gchook() { +	if (mtx_init(&detlog_mutex, mtx_plain) != thrd_success) { +		fprintf(stderr, "ghc-gc-hook: ERROR: Mutex initialisation failed\n"); +		return; +	} + +	old_hook = rtsConfig.gcDoneHook; +	rtsConfig.gcDoneHook = hook_callback; +} diff --git a/ghc-gc-hook.cabal b/ghc-gc-hook.cabal new file mode 100644 index 0000000..6f21e56 --- /dev/null +++ b/ghc-gc-hook.cabal @@ -0,0 +1,33 @@ +cabal-version:       2.0 +name:                ghc-gc-hook +synopsis:            GHC garbage collection hook +version:             0.1.0.0 +license:             MIT +author:              Tom Smeding +maintainer:          tom@tomsmeding.com +build-type:          Simple + +library +  exposed-modules: +    GHC.GC_Hook +  c-sources: cbits/hook.c +  build-depends: +    base >= 4.13 && < 4.17 +  hs-source-dirs: src +  default-language: Haskell2010 +  cc-options: -Wall -O2 +  ghc-options: -Wall -O2 + +test-suite test +  type: exitcode-stdio-1.0 +  main-is: Main.hs +  hs-source-dirs: test +  build-depends: +    base >= 4.13 && < 4.17, +    ghc-gc-hook +  default-language: Haskell2010 +  ghc-options: -Wall -threaded + +source-repository head +  type: git +  location: https://git.tomsmeding.com/ghc-gc-hook diff --git a/src/GHC/GC_Hook.hs b/src/GHC/GC_Hook.hs new file mode 100644 index 0000000..ea1586c --- /dev/null +++ b/src/GHC/GC_Hook.hs @@ -0,0 +1,130 @@ +{-# LANGUAGE ForeignFunctionInterface #-} +{-# LANGUAGE TypeApplications #-} +module GHC.GC_Hook ( +  setGCHook, +  getGCLog, +) where + +import Control.Monad ((>=>)) +import Data.Word (Word32, Word64) +import Foreign.C.Types (CChar, CSize(..)) +import Foreign.Marshal.Alloc (alloca, allocaBytes) +import Foreign.Ptr (Ptr, castPtr, plusPtr) +import Foreign.Storable (peek) + +foreign import ccall "set_gchook" c_set_gchook +  :: IO () +foreign import ccall "copy_log_to_buffer" c_copy_log_to_buffer +  :: CSize -> Ptr CChar -> Ptr CSize -> Ptr CSize -> IO () + +data Details = Details +  { -- | The generation number of this GC +    detGen :: Word32 +  , -- | Number of threads used in this GC +    detThreads :: Word32 +  , -- | Number of bytes allocated since the previous GC +    detAllocatedBytes :: Word64 +  , -- | Total amount of live data in the heap (incliudes large + compact data). +    -- Updated after every GC. Data in uncollected generations (in minor GCs) +    -- are considered live. +    detLiveBytes :: Word64 +  , -- | Total amount of live data in large objects +    detLargeObjectsBytes :: Word64 +  , -- | Total amount of live data in compact regions +    detCompactBytes :: Word64 +  , -- | Total amount of slop (wasted memory) +    detSlopBytes :: Word64 +  , -- | Total amount of memory in use by the RTS +    detMemInUseBytes :: Word64 +  , -- | Total amount of data copied during this GC +    detCopiedBytes :: Word64 +  , -- | In parallel GC, the max amount of data copied by any one thread +    detParMaxCopiedBytes :: Word64 +  , -- | In parallel GC, the amount of balanced data copied by all threads +    detParBalancedCopiedBytes :: Word64 +  , -- | The time elapsed during synchronisation before GC +    -- NOTE: nanoseconds! +    detSyncElapsedNs :: Word64 +  , -- | The CPU time used during GC itself +    -- NOTE: nanoseconds! +    detCpuNs :: Word64 +  , -- | The time elapsed during GC itself +    -- NOTE: nanoseconds! +    detElapsedNs :: Word64 + +  , -- | Concurrent garbage collector. +    -- The CPU time used during the post-mark pause phase of the concurrent +    -- nonmoving GC. +    -- NOTE: nanoseconds! +    detNonmovingGcSyncCpuNs :: Word64 +  , -- | Concurrent garbage collector. +    -- The time elapsed during the post-mark pause phase of the concurrent +    -- nonmoving GC. +    -- NOTE: nanoseconds! +    detNonmovingGcSyncElapsedNs :: Word64 +  , -- | Concurrent garbage collector. +    -- The CPU time used during the post-mark pause phase of the concurrent +    -- nonmoving GC. +    -- NOTE: nanoseconds! +    detNonmovingGcCpuNs :: Word64 +  , -- | Concurrent garbage collector. +    -- The time elapsed during the post-mark pause phase of the concurrent +    -- nonmoving GC. +    -- NOTE: nanoseconds! +    detNonmovingGcElapsedNs :: Word64 +  } +  deriving (Show) + +zeroDetails :: Details +zeroDetails = Details 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +setGCHook :: IO () +setGCHook = do +  c_set_gchook + +getGCLog :: IO [Details] +getGCLog = +  let bufferCapacity = 2048 +  in allocaBytes bufferCapacity $ \pbuffer -> +     alloca $ \punitsize -> +     alloca $ \pnumstored -> do +       c_copy_log_to_buffer (fromIntegral @Int @CSize bufferCapacity) pbuffer punitsize pnumstored +       unitsize <- fromIntegral @CSize @Int <$> peek punitsize +       numstored <- fromIntegral @CSize @Int <$> peek pnumstored +       sequence [peekDetails unitsize (pbuffer `plusPtr` (i * unitsize)) +                | i <- [0 .. numstored - 1]] + +peekDetails :: Int -> Ptr a -> IO Details +peekDetails unitsize startptr = +  let getField :: Int -> (Int, Ptr a -> Details -> IO Details) +               -> Details -> IO Details +      getField offset (_, fun) = fun (startptr `plusPtr` offset) +  in if last offsets == unitsize +       then foldr (>=>) return (zipWith getField offsets fields) zeroDetails +       else error "hook.c not compatible with GC_Hook.hs, ShadowDetails mismatch" +  where +    fields :: [(Int, Ptr a -> Details -> IO Details)] +    fields = +      [(4, peekModify $ \d x -> d { detGen = x }) +      ,(4, peekModify $ \d x -> d { detThreads = x }) +      ,(8, peekModify $ \d x -> d { detAllocatedBytes = x }) +      ,(8, peekModify $ \d x -> d { detLiveBytes = x }) +      ,(8, peekModify $ \d x -> d { detLargeObjectsBytes = x }) +      ,(8, peekModify $ \d x -> d { detCompactBytes = x }) +      ,(8, peekModify $ \d x -> d { detSlopBytes = x }) +      ,(8, peekModify $ \d x -> d { detMemInUseBytes = x }) +      ,(8, peekModify $ \d x -> d { detCopiedBytes = x }) +      ,(8, peekModify $ \d x -> d { detParMaxCopiedBytes = x }) +      ,(8, peekModify $ \d x -> d { detParBalancedCopiedBytes = x }) +      ,(8, peekModify $ \d x -> d { detSyncElapsedNs = x }) +      ,(8, peekModify $ \d x -> d { detCpuNs = x }) +      ,(8, peekModify $ \d x -> d { detElapsedNs = x }) +      ,(8, peekModify $ \d x -> d { detNonmovingGcSyncCpuNs = x }) +      ,(8, peekModify $ \d x -> d { detNonmovingGcSyncElapsedNs = x }) +      ,(8, peekModify $ \d x -> d { detNonmovingGcCpuNs = x }) +      ,(8, peekModify $ \d x -> d { detNonmovingGcElapsedNs = x }) +      ] +      where peekModify g p d = peek (castPtr p) >>= \x -> return (g d x) + +    offsets :: [Int] +    offsets = scanl (+) 0 (map fst fields) diff --git a/test/Main.hs b/test/Main.hs new file mode 100644 index 0000000..c813e48 --- /dev/null +++ b/test/Main.hs @@ -0,0 +1,14 @@ +module Main where + +import Control.Monad (forM_) + +import GHC.GC_Hook + + +main :: IO () +main = do +  setGCHook +  forM_ [1..10] $ \i -> do +    let l = [i..10000] +    print (sum l + product l + length l) +  getGCLog >>= print | 
