summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Smeding <tom@tomsmeding.com>2022-04-21 00:28:51 +0200
committerTom Smeding <tom@tomsmeding.com>2022-04-21 00:28:51 +0200
commite6ca9f166c5af915946bb8ae5ed7e5a42f40b8bd (patch)
treef7f6af4edca512c2a6de0aed094aec5d5d10582f
Initial
-rw-r--r--.gitignore1
-rw-r--r--cbits/hook.c176
-rw-r--r--ghc-gc-hook.cabal33
-rw-r--r--src/GHC/GC_Hook.hs130
-rw-r--r--test/Main.hs14
5 files changed, 354 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c33954f
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+dist-newstyle/
diff --git a/cbits/hook.c b/cbits/hook.c
new file mode 100644
index 0000000..e05ccd4
--- /dev/null
+++ b/cbits/hook.c
@@ -0,0 +1,176 @@
+#include "Rts.h"
+#include <string.h>
+
+// needs C11
+#include <threads.h>
+
+static size_t min_sz(size_t a, size_t b) {
+ return a < b ? a : b;
+}
+
+extern RtsConfig rtsConfig;
+
+// A copy of GCDetails_ with known structure that can be depended on by the Haskell code.
+struct ShadowDetails {
+ // The generation number of this GC
+ uint32_t gen;
+ // Number of threads used in this GC
+ uint32_t threads;
+ // Number of bytes allocated since the previous GC
+ uint64_t allocated_bytes;
+ // Total amount of live data in the heap (incliudes large + compact data).
+ // Updated after every GC. Data in uncollected generations (in minor GCs)
+ // are considered live.
+ uint64_t live_bytes;
+ // Total amount of live data in large objects
+ uint64_t large_objects_bytes;
+ // Total amount of live data in compact regions
+ uint64_t compact_bytes;
+ // Total amount of slop (wasted memory)
+ uint64_t slop_bytes;
+ // Total amount of memory in use by the RTS
+ uint64_t mem_in_use_bytes;
+ // Total amount of data copied during this GC
+ uint64_t copied_bytes;
+ // In parallel GC, the max amount of data copied by any one thread
+ uint64_t par_max_copied_bytes;
+ // In parallel GC, the amount of balanced data copied by all threads
+ uint64_t par_balanced_copied_bytes;
+ // The time elapsed during synchronisation before GC
+ // NOTE: nanoseconds!
+ uint64_t sync_elapsed_ns;
+ // The CPU time used during GC itself
+ // NOTE: nanoseconds!
+ uint64_t cpu_ns;
+ // The time elapsed during GC itself
+ // NOTE: nanoseconds!
+ uint64_t elapsed_ns;
+
+ // Concurrent garbage collector
+
+ // The CPU time used during the post-mark pause phase of the concurrent
+ // nonmoving GC.
+ // NOTE: nanoseconds!
+ uint64_t nonmoving_gc_sync_cpu_ns;
+ // The time elapsed during the post-mark pause phase of the concurrent
+ // nonmoving GC.
+ // NOTE: nanoseconds!
+ uint64_t nonmoving_gc_sync_elapsed_ns;
+ // The CPU time used during the post-mark pause phase of the concurrent
+ // nonmoving GC.
+ // NOTE: nanoseconds!
+ uint64_t nonmoving_gc_cpu_ns;
+ // The time elapsed during the post-mark pause phase of the concurrent
+ // nonmoving GC.
+ // NOTE: nanoseconds!
+ uint64_t nonmoving_gc_elapsed_ns;
+};
+
+static void shadow_copy(struct ShadowDetails *dst, const struct GCDetails_ *src) {
+#define COPY(field) dst->field = src->field;
+#define COPYTIME(field) dst->field = TimeToNS(src->field);
+ COPY(gen);
+ COPY(threads);
+ COPY(allocated_bytes);
+ COPY(live_bytes);
+ COPY(large_objects_bytes);
+ COPY(compact_bytes);
+ COPY(slop_bytes);
+ COPY(mem_in_use_bytes);
+ COPY(copied_bytes);
+ COPY(par_max_copied_bytes);
+ COPY(par_balanced_copied_bytes);
+ COPYTIME(sync_elapsed_ns);
+ COPYTIME(cpu_ns);
+ COPYTIME(elapsed_ns);
+
+ COPYTIME(nonmoving_gc_sync_cpu_ns);
+ COPYTIME(nonmoving_gc_sync_elapsed_ns);
+ COPYTIME(nonmoving_gc_cpu_ns);
+ COPYTIME(nonmoving_gc_elapsed_ns);
+#undef COPY
+#undef COPYTIME
+}
+
+// --------
+// GLOBAL VARIABLES
+// --------
+
+static void (*old_hook)(const struct GCDetails_ *details) = NULL;
+static mtx_t detlog_mutex;
+static size_t detlog_capacity = 0, detlog_length = 0;
+static struct ShadowDetails *detlog = NULL;
+
+// --------
+// END OF GLOBAL VARIABLES
+// --------
+
+static void hook_callback(const struct GCDetails_ *details) {
+ static bool alloc_failed = false;
+
+ if (alloc_failed) goto cleanup_no_mutex;
+
+ if (mtx_lock(&detlog_mutex) != thrd_success) {
+ fprintf(stderr, "ghc-gc-hook: ERROR: Mutex lock failed\n");
+ alloc_failed = true; // dumb proxy for "don't do anything anymore please"
+ goto cleanup_no_mutex;
+ }
+
+ // mutex is locked from here
+
+ if (detlog_length == detlog_capacity) {
+ detlog_capacity = detlog_capacity == 0 ? 128 : 2 * detlog_capacity;
+ detlog = realloc(detlog, detlog_capacity * sizeof(detlog[0]));
+ if (detlog == NULL || detlog_capacity == 0) { // also check for overflow here
+ fprintf(stderr, "ghc-gc-hook: ERROR: Could not allocate memory for GC log hook\n");
+ alloc_failed = true;
+ goto cleanup;
+ }
+ }
+
+ shadow_copy(&detlog[detlog_length], details);
+ detlog_length++;
+
+cleanup:
+ mtx_unlock(&detlog_mutex); // ignore return value
+
+cleanup_no_mutex:
+ if (old_hook) old_hook(details);
+}
+
+// --------
+// EXPORTED FUNCTIONS
+// --------
+
+void copy_log_to_buffer(size_t space_available, char *buffer, size_t *unit_size, size_t *num_stored) {
+ if (mtx_lock(&detlog_mutex) != thrd_success) {
+ fprintf(stderr, "ghc-gc-hook: ERROR: Mutex lock failed\n");
+ *unit_size = 0;
+ *num_stored = 0;
+ return;
+ }
+
+ const size_t sz = sizeof(detlog[0]);
+ const size_t n = min_sz(space_available / sz, detlog_length);
+
+ // First copy over the fitting items
+ memcpy(buffer, detlog, n * sz);
+ *unit_size = sz;
+ *num_stored = n;
+
+ // Then shift back the remaining items
+ memmove(detlog, detlog + n, (detlog_length - n) * sizeof(detlog[0]));
+ detlog_length -= n;
+
+ mtx_unlock(&detlog_mutex);
+}
+
+void set_gchook() {
+ if (mtx_init(&detlog_mutex, mtx_plain) != thrd_success) {
+ fprintf(stderr, "ghc-gc-hook: ERROR: Mutex initialisation failed\n");
+ return;
+ }
+
+ old_hook = rtsConfig.gcDoneHook;
+ rtsConfig.gcDoneHook = hook_callback;
+}
diff --git a/ghc-gc-hook.cabal b/ghc-gc-hook.cabal
new file mode 100644
index 0000000..6f21e56
--- /dev/null
+++ b/ghc-gc-hook.cabal
@@ -0,0 +1,33 @@
+cabal-version: 2.0
+name: ghc-gc-hook
+synopsis: GHC garbage collection hook
+version: 0.1.0.0
+license: MIT
+author: Tom Smeding
+maintainer: tom@tomsmeding.com
+build-type: Simple
+
+library
+ exposed-modules:
+ GHC.GC_Hook
+ c-sources: cbits/hook.c
+ build-depends:
+ base >= 4.13 && < 4.17
+ hs-source-dirs: src
+ default-language: Haskell2010
+ cc-options: -Wall -O2
+ ghc-options: -Wall -O2
+
+test-suite test
+ type: exitcode-stdio-1.0
+ main-is: Main.hs
+ hs-source-dirs: test
+ build-depends:
+ base >= 4.13 && < 4.17,
+ ghc-gc-hook
+ default-language: Haskell2010
+ ghc-options: -Wall -threaded
+
+source-repository head
+ type: git
+ location: https://git.tomsmeding.com/ghc-gc-hook
diff --git a/src/GHC/GC_Hook.hs b/src/GHC/GC_Hook.hs
new file mode 100644
index 0000000..ea1586c
--- /dev/null
+++ b/src/GHC/GC_Hook.hs
@@ -0,0 +1,130 @@
+{-# LANGUAGE ForeignFunctionInterface #-}
+{-# LANGUAGE TypeApplications #-}
+module GHC.GC_Hook (
+ setGCHook,
+ getGCLog,
+) where
+
+import Control.Monad ((>=>))
+import Data.Word (Word32, Word64)
+import Foreign.C.Types (CChar, CSize(..))
+import Foreign.Marshal.Alloc (alloca, allocaBytes)
+import Foreign.Ptr (Ptr, castPtr, plusPtr)
+import Foreign.Storable (peek)
+
+foreign import ccall "set_gchook" c_set_gchook
+ :: IO ()
+foreign import ccall "copy_log_to_buffer" c_copy_log_to_buffer
+ :: CSize -> Ptr CChar -> Ptr CSize -> Ptr CSize -> IO ()
+
+data Details = Details
+ { -- | The generation number of this GC
+ detGen :: Word32
+ , -- | Number of threads used in this GC
+ detThreads :: Word32
+ , -- | Number of bytes allocated since the previous GC
+ detAllocatedBytes :: Word64
+ , -- | Total amount of live data in the heap (incliudes large + compact data).
+ -- Updated after every GC. Data in uncollected generations (in minor GCs)
+ -- are considered live.
+ detLiveBytes :: Word64
+ , -- | Total amount of live data in large objects
+ detLargeObjectsBytes :: Word64
+ , -- | Total amount of live data in compact regions
+ detCompactBytes :: Word64
+ , -- | Total amount of slop (wasted memory)
+ detSlopBytes :: Word64
+ , -- | Total amount of memory in use by the RTS
+ detMemInUseBytes :: Word64
+ , -- | Total amount of data copied during this GC
+ detCopiedBytes :: Word64
+ , -- | In parallel GC, the max amount of data copied by any one thread
+ detParMaxCopiedBytes :: Word64
+ , -- | In parallel GC, the amount of balanced data copied by all threads
+ detParBalancedCopiedBytes :: Word64
+ , -- | The time elapsed during synchronisation before GC
+ -- NOTE: nanoseconds!
+ detSyncElapsedNs :: Word64
+ , -- | The CPU time used during GC itself
+ -- NOTE: nanoseconds!
+ detCpuNs :: Word64
+ , -- | The time elapsed during GC itself
+ -- NOTE: nanoseconds!
+ detElapsedNs :: Word64
+
+ , -- | Concurrent garbage collector.
+ -- The CPU time used during the post-mark pause phase of the concurrent
+ -- nonmoving GC.
+ -- NOTE: nanoseconds!
+ detNonmovingGcSyncCpuNs :: Word64
+ , -- | Concurrent garbage collector.
+ -- The time elapsed during the post-mark pause phase of the concurrent
+ -- nonmoving GC.
+ -- NOTE: nanoseconds!
+ detNonmovingGcSyncElapsedNs :: Word64
+ , -- | Concurrent garbage collector.
+ -- The CPU time used during the post-mark pause phase of the concurrent
+ -- nonmoving GC.
+ -- NOTE: nanoseconds!
+ detNonmovingGcCpuNs :: Word64
+ , -- | Concurrent garbage collector.
+ -- The time elapsed during the post-mark pause phase of the concurrent
+ -- nonmoving GC.
+ -- NOTE: nanoseconds!
+ detNonmovingGcElapsedNs :: Word64
+ }
+ deriving (Show)
+
+zeroDetails :: Details
+zeroDetails = Details 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+setGCHook :: IO ()
+setGCHook = do
+ c_set_gchook
+
+getGCLog :: IO [Details]
+getGCLog =
+ let bufferCapacity = 2048
+ in allocaBytes bufferCapacity $ \pbuffer ->
+ alloca $ \punitsize ->
+ alloca $ \pnumstored -> do
+ c_copy_log_to_buffer (fromIntegral @Int @CSize bufferCapacity) pbuffer punitsize pnumstored
+ unitsize <- fromIntegral @CSize @Int <$> peek punitsize
+ numstored <- fromIntegral @CSize @Int <$> peek pnumstored
+ sequence [peekDetails unitsize (pbuffer `plusPtr` (i * unitsize))
+ | i <- [0 .. numstored - 1]]
+
+peekDetails :: Int -> Ptr a -> IO Details
+peekDetails unitsize startptr =
+ let getField :: Int -> (Int, Ptr a -> Details -> IO Details)
+ -> Details -> IO Details
+ getField offset (_, fun) = fun (startptr `plusPtr` offset)
+ in if last offsets == unitsize
+ then foldr (>=>) return (zipWith getField offsets fields) zeroDetails
+ else error "hook.c not compatible with GC_Hook.hs, ShadowDetails mismatch"
+ where
+ fields :: [(Int, Ptr a -> Details -> IO Details)]
+ fields =
+ [(4, peekModify $ \d x -> d { detGen = x })
+ ,(4, peekModify $ \d x -> d { detThreads = x })
+ ,(8, peekModify $ \d x -> d { detAllocatedBytes = x })
+ ,(8, peekModify $ \d x -> d { detLiveBytes = x })
+ ,(8, peekModify $ \d x -> d { detLargeObjectsBytes = x })
+ ,(8, peekModify $ \d x -> d { detCompactBytes = x })
+ ,(8, peekModify $ \d x -> d { detSlopBytes = x })
+ ,(8, peekModify $ \d x -> d { detMemInUseBytes = x })
+ ,(8, peekModify $ \d x -> d { detCopiedBytes = x })
+ ,(8, peekModify $ \d x -> d { detParMaxCopiedBytes = x })
+ ,(8, peekModify $ \d x -> d { detParBalancedCopiedBytes = x })
+ ,(8, peekModify $ \d x -> d { detSyncElapsedNs = x })
+ ,(8, peekModify $ \d x -> d { detCpuNs = x })
+ ,(8, peekModify $ \d x -> d { detElapsedNs = x })
+ ,(8, peekModify $ \d x -> d { detNonmovingGcSyncCpuNs = x })
+ ,(8, peekModify $ \d x -> d { detNonmovingGcSyncElapsedNs = x })
+ ,(8, peekModify $ \d x -> d { detNonmovingGcCpuNs = x })
+ ,(8, peekModify $ \d x -> d { detNonmovingGcElapsedNs = x })
+ ]
+ where peekModify g p d = peek (castPtr p) >>= \x -> return (g d x)
+
+ offsets :: [Int]
+ offsets = scanl (+) 0 (map fst fields)
diff --git a/test/Main.hs b/test/Main.hs
new file mode 100644
index 0000000..c813e48
--- /dev/null
+++ b/test/Main.hs
@@ -0,0 +1,14 @@
+module Main where
+
+import Control.Monad (forM_)
+
+import GHC.GC_Hook
+
+
+main :: IO ()
+main = do
+ setGCHook
+ forM_ [1..10] $ \i -> do
+ let l = [i..10000]
+ print (sum l + product l + length l)
+ getGCLog >>= print