From e6ca9f166c5af915946bb8ae5ed7e5a42f40b8bd Mon Sep 17 00:00:00 2001 From: Tom Smeding Date: Thu, 21 Apr 2022 00:28:51 +0200 Subject: Initial --- .gitignore | 1 + cbits/hook.c | 176 +++++++++++++++++++++++++++++++++++++++++++++++++++++ ghc-gc-hook.cabal | 33 ++++++++++ src/GHC/GC_Hook.hs | 130 +++++++++++++++++++++++++++++++++++++++ test/Main.hs | 14 +++++ 5 files changed, 354 insertions(+) create mode 100644 .gitignore create mode 100644 cbits/hook.c create mode 100644 ghc-gc-hook.cabal create mode 100644 src/GHC/GC_Hook.hs create mode 100644 test/Main.hs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c33954f --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +dist-newstyle/ diff --git a/cbits/hook.c b/cbits/hook.c new file mode 100644 index 0000000..e05ccd4 --- /dev/null +++ b/cbits/hook.c @@ -0,0 +1,176 @@ +#include "Rts.h" +#include + +// needs C11 +#include + +static size_t min_sz(size_t a, size_t b) { + return a < b ? a : b; +} + +extern RtsConfig rtsConfig; + +// A copy of GCDetails_ with known structure that can be depended on by the Haskell code. +struct ShadowDetails { + // The generation number of this GC + uint32_t gen; + // Number of threads used in this GC + uint32_t threads; + // Number of bytes allocated since the previous GC + uint64_t allocated_bytes; + // Total amount of live data in the heap (incliudes large + compact data). + // Updated after every GC. Data in uncollected generations (in minor GCs) + // are considered live. + uint64_t live_bytes; + // Total amount of live data in large objects + uint64_t large_objects_bytes; + // Total amount of live data in compact regions + uint64_t compact_bytes; + // Total amount of slop (wasted memory) + uint64_t slop_bytes; + // Total amount of memory in use by the RTS + uint64_t mem_in_use_bytes; + // Total amount of data copied during this GC + uint64_t copied_bytes; + // In parallel GC, the max amount of data copied by any one thread + uint64_t par_max_copied_bytes; + // In parallel GC, the amount of balanced data copied by all threads + uint64_t par_balanced_copied_bytes; + // The time elapsed during synchronisation before GC + // NOTE: nanoseconds! + uint64_t sync_elapsed_ns; + // The CPU time used during GC itself + // NOTE: nanoseconds! + uint64_t cpu_ns; + // The time elapsed during GC itself + // NOTE: nanoseconds! + uint64_t elapsed_ns; + + // Concurrent garbage collector + + // The CPU time used during the post-mark pause phase of the concurrent + // nonmoving GC. + // NOTE: nanoseconds! + uint64_t nonmoving_gc_sync_cpu_ns; + // The time elapsed during the post-mark pause phase of the concurrent + // nonmoving GC. + // NOTE: nanoseconds! + uint64_t nonmoving_gc_sync_elapsed_ns; + // The CPU time used during the post-mark pause phase of the concurrent + // nonmoving GC. + // NOTE: nanoseconds! + uint64_t nonmoving_gc_cpu_ns; + // The time elapsed during the post-mark pause phase of the concurrent + // nonmoving GC. + // NOTE: nanoseconds! + uint64_t nonmoving_gc_elapsed_ns; +}; + +static void shadow_copy(struct ShadowDetails *dst, const struct GCDetails_ *src) { +#define COPY(field) dst->field = src->field; +#define COPYTIME(field) dst->field = TimeToNS(src->field); + COPY(gen); + COPY(threads); + COPY(allocated_bytes); + COPY(live_bytes); + COPY(large_objects_bytes); + COPY(compact_bytes); + COPY(slop_bytes); + COPY(mem_in_use_bytes); + COPY(copied_bytes); + COPY(par_max_copied_bytes); + COPY(par_balanced_copied_bytes); + COPYTIME(sync_elapsed_ns); + COPYTIME(cpu_ns); + COPYTIME(elapsed_ns); + + COPYTIME(nonmoving_gc_sync_cpu_ns); + COPYTIME(nonmoving_gc_sync_elapsed_ns); + COPYTIME(nonmoving_gc_cpu_ns); + COPYTIME(nonmoving_gc_elapsed_ns); +#undef COPY +#undef COPYTIME +} + +// -------- +// GLOBAL VARIABLES +// -------- + +static void (*old_hook)(const struct GCDetails_ *details) = NULL; +static mtx_t detlog_mutex; +static size_t detlog_capacity = 0, detlog_length = 0; +static struct ShadowDetails *detlog = NULL; + +// -------- +// END OF GLOBAL VARIABLES +// -------- + +static void hook_callback(const struct GCDetails_ *details) { + static bool alloc_failed = false; + + if (alloc_failed) goto cleanup_no_mutex; + + if (mtx_lock(&detlog_mutex) != thrd_success) { + fprintf(stderr, "ghc-gc-hook: ERROR: Mutex lock failed\n"); + alloc_failed = true; // dumb proxy for "don't do anything anymore please" + goto cleanup_no_mutex; + } + + // mutex is locked from here + + if (detlog_length == detlog_capacity) { + detlog_capacity = detlog_capacity == 0 ? 128 : 2 * detlog_capacity; + detlog = realloc(detlog, detlog_capacity * sizeof(detlog[0])); + if (detlog == NULL || detlog_capacity == 0) { // also check for overflow here + fprintf(stderr, "ghc-gc-hook: ERROR: Could not allocate memory for GC log hook\n"); + alloc_failed = true; + goto cleanup; + } + } + + shadow_copy(&detlog[detlog_length], details); + detlog_length++; + +cleanup: + mtx_unlock(&detlog_mutex); // ignore return value + +cleanup_no_mutex: + if (old_hook) old_hook(details); +} + +// -------- +// EXPORTED FUNCTIONS +// -------- + +void copy_log_to_buffer(size_t space_available, char *buffer, size_t *unit_size, size_t *num_stored) { + if (mtx_lock(&detlog_mutex) != thrd_success) { + fprintf(stderr, "ghc-gc-hook: ERROR: Mutex lock failed\n"); + *unit_size = 0; + *num_stored = 0; + return; + } + + const size_t sz = sizeof(detlog[0]); + const size_t n = min_sz(space_available / sz, detlog_length); + + // First copy over the fitting items + memcpy(buffer, detlog, n * sz); + *unit_size = sz; + *num_stored = n; + + // Then shift back the remaining items + memmove(detlog, detlog + n, (detlog_length - n) * sizeof(detlog[0])); + detlog_length -= n; + + mtx_unlock(&detlog_mutex); +} + +void set_gchook() { + if (mtx_init(&detlog_mutex, mtx_plain) != thrd_success) { + fprintf(stderr, "ghc-gc-hook: ERROR: Mutex initialisation failed\n"); + return; + } + + old_hook = rtsConfig.gcDoneHook; + rtsConfig.gcDoneHook = hook_callback; +} diff --git a/ghc-gc-hook.cabal b/ghc-gc-hook.cabal new file mode 100644 index 0000000..6f21e56 --- /dev/null +++ b/ghc-gc-hook.cabal @@ -0,0 +1,33 @@ +cabal-version: 2.0 +name: ghc-gc-hook +synopsis: GHC garbage collection hook +version: 0.1.0.0 +license: MIT +author: Tom Smeding +maintainer: tom@tomsmeding.com +build-type: Simple + +library + exposed-modules: + GHC.GC_Hook + c-sources: cbits/hook.c + build-depends: + base >= 4.13 && < 4.17 + hs-source-dirs: src + default-language: Haskell2010 + cc-options: -Wall -O2 + ghc-options: -Wall -O2 + +test-suite test + type: exitcode-stdio-1.0 + main-is: Main.hs + hs-source-dirs: test + build-depends: + base >= 4.13 && < 4.17, + ghc-gc-hook + default-language: Haskell2010 + ghc-options: -Wall -threaded + +source-repository head + type: git + location: https://git.tomsmeding.com/ghc-gc-hook diff --git a/src/GHC/GC_Hook.hs b/src/GHC/GC_Hook.hs new file mode 100644 index 0000000..ea1586c --- /dev/null +++ b/src/GHC/GC_Hook.hs @@ -0,0 +1,130 @@ +{-# LANGUAGE ForeignFunctionInterface #-} +{-# LANGUAGE TypeApplications #-} +module GHC.GC_Hook ( + setGCHook, + getGCLog, +) where + +import Control.Monad ((>=>)) +import Data.Word (Word32, Word64) +import Foreign.C.Types (CChar, CSize(..)) +import Foreign.Marshal.Alloc (alloca, allocaBytes) +import Foreign.Ptr (Ptr, castPtr, plusPtr) +import Foreign.Storable (peek) + +foreign import ccall "set_gchook" c_set_gchook + :: IO () +foreign import ccall "copy_log_to_buffer" c_copy_log_to_buffer + :: CSize -> Ptr CChar -> Ptr CSize -> Ptr CSize -> IO () + +data Details = Details + { -- | The generation number of this GC + detGen :: Word32 + , -- | Number of threads used in this GC + detThreads :: Word32 + , -- | Number of bytes allocated since the previous GC + detAllocatedBytes :: Word64 + , -- | Total amount of live data in the heap (incliudes large + compact data). + -- Updated after every GC. Data in uncollected generations (in minor GCs) + -- are considered live. + detLiveBytes :: Word64 + , -- | Total amount of live data in large objects + detLargeObjectsBytes :: Word64 + , -- | Total amount of live data in compact regions + detCompactBytes :: Word64 + , -- | Total amount of slop (wasted memory) + detSlopBytes :: Word64 + , -- | Total amount of memory in use by the RTS + detMemInUseBytes :: Word64 + , -- | Total amount of data copied during this GC + detCopiedBytes :: Word64 + , -- | In parallel GC, the max amount of data copied by any one thread + detParMaxCopiedBytes :: Word64 + , -- | In parallel GC, the amount of balanced data copied by all threads + detParBalancedCopiedBytes :: Word64 + , -- | The time elapsed during synchronisation before GC + -- NOTE: nanoseconds! + detSyncElapsedNs :: Word64 + , -- | The CPU time used during GC itself + -- NOTE: nanoseconds! + detCpuNs :: Word64 + , -- | The time elapsed during GC itself + -- NOTE: nanoseconds! + detElapsedNs :: Word64 + + , -- | Concurrent garbage collector. + -- The CPU time used during the post-mark pause phase of the concurrent + -- nonmoving GC. + -- NOTE: nanoseconds! + detNonmovingGcSyncCpuNs :: Word64 + , -- | Concurrent garbage collector. + -- The time elapsed during the post-mark pause phase of the concurrent + -- nonmoving GC. + -- NOTE: nanoseconds! + detNonmovingGcSyncElapsedNs :: Word64 + , -- | Concurrent garbage collector. + -- The CPU time used during the post-mark pause phase of the concurrent + -- nonmoving GC. + -- NOTE: nanoseconds! + detNonmovingGcCpuNs :: Word64 + , -- | Concurrent garbage collector. + -- The time elapsed during the post-mark pause phase of the concurrent + -- nonmoving GC. + -- NOTE: nanoseconds! + detNonmovingGcElapsedNs :: Word64 + } + deriving (Show) + +zeroDetails :: Details +zeroDetails = Details 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +setGCHook :: IO () +setGCHook = do + c_set_gchook + +getGCLog :: IO [Details] +getGCLog = + let bufferCapacity = 2048 + in allocaBytes bufferCapacity $ \pbuffer -> + alloca $ \punitsize -> + alloca $ \pnumstored -> do + c_copy_log_to_buffer (fromIntegral @Int @CSize bufferCapacity) pbuffer punitsize pnumstored + unitsize <- fromIntegral @CSize @Int <$> peek punitsize + numstored <- fromIntegral @CSize @Int <$> peek pnumstored + sequence [peekDetails unitsize (pbuffer `plusPtr` (i * unitsize)) + | i <- [0 .. numstored - 1]] + +peekDetails :: Int -> Ptr a -> IO Details +peekDetails unitsize startptr = + let getField :: Int -> (Int, Ptr a -> Details -> IO Details) + -> Details -> IO Details + getField offset (_, fun) = fun (startptr `plusPtr` offset) + in if last offsets == unitsize + then foldr (>=>) return (zipWith getField offsets fields) zeroDetails + else error "hook.c not compatible with GC_Hook.hs, ShadowDetails mismatch" + where + fields :: [(Int, Ptr a -> Details -> IO Details)] + fields = + [(4, peekModify $ \d x -> d { detGen = x }) + ,(4, peekModify $ \d x -> d { detThreads = x }) + ,(8, peekModify $ \d x -> d { detAllocatedBytes = x }) + ,(8, peekModify $ \d x -> d { detLiveBytes = x }) + ,(8, peekModify $ \d x -> d { detLargeObjectsBytes = x }) + ,(8, peekModify $ \d x -> d { detCompactBytes = x }) + ,(8, peekModify $ \d x -> d { detSlopBytes = x }) + ,(8, peekModify $ \d x -> d { detMemInUseBytes = x }) + ,(8, peekModify $ \d x -> d { detCopiedBytes = x }) + ,(8, peekModify $ \d x -> d { detParMaxCopiedBytes = x }) + ,(8, peekModify $ \d x -> d { detParBalancedCopiedBytes = x }) + ,(8, peekModify $ \d x -> d { detSyncElapsedNs = x }) + ,(8, peekModify $ \d x -> d { detCpuNs = x }) + ,(8, peekModify $ \d x -> d { detElapsedNs = x }) + ,(8, peekModify $ \d x -> d { detNonmovingGcSyncCpuNs = x }) + ,(8, peekModify $ \d x -> d { detNonmovingGcSyncElapsedNs = x }) + ,(8, peekModify $ \d x -> d { detNonmovingGcCpuNs = x }) + ,(8, peekModify $ \d x -> d { detNonmovingGcElapsedNs = x }) + ] + where peekModify g p d = peek (castPtr p) >>= \x -> return (g d x) + + offsets :: [Int] + offsets = scanl (+) 0 (map fst fields) diff --git a/test/Main.hs b/test/Main.hs new file mode 100644 index 0000000..c813e48 --- /dev/null +++ b/test/Main.hs @@ -0,0 +1,14 @@ +module Main where + +import Control.Monad (forM_) + +import GHC.GC_Hook + + +main :: IO () +main = do + setGCHook + forM_ [1..10] $ \i -> do + let l = [i..10000] + print (sum l + product l + length l) + getGCLog >>= print -- cgit v1.2.3-54-g00ecf