From 6276ed3c7bcd20c8b860e1275386ecd068671bcc Mon Sep 17 00:00:00 2001 From: Tom Smeding Date: Fri, 14 Mar 2025 21:57:56 +0100 Subject: Optimise reductions and dotprod with more vectorisation Turns out that if you don't supply -ffast-math, the C compiler will faithfully reproduce your linear reduction order, which is rather disastrous for parallelisation with vector units. This changes the summation order, so numerical results might differ slightly. To wit: the test suite needed adjustment. --- test/Tests/C.hs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'test/Tests/C.hs') diff --git a/test/Tests/C.hs b/test/Tests/C.hs index bc8e0de..a0f103d 100644 --- a/test/Tests/C.hs +++ b/test/Tests/C.hs @@ -35,6 +35,10 @@ import Gen import Util +-- | Appropriate for simple different summation orders +fineTol :: Double +fineTol = 1e-8 + prop_sum_nonempty :: Property prop_sum_nonempty = property $ genRank $ \outrank@(SNat @n) -> do -- Test nonempty _results_. The first dimension of the input is allowed to be 0, because then OR.rerank doesn't fail yet. @@ -46,7 +50,7 @@ prop_sum_nonempty = property $ genRank $ \outrank@(SNat @n) -> do genStorables (Range.singleton (product sh)) (\w -> fromIntegral w / fromIntegral (maxBound :: Word64)) let rarr = rfromOrthotope inrank arr - rtoOrthotope (rsumOuter1 rarr) === orSumOuter1 outrank arr + almostEq fineTol (rtoOrthotope (rsumOuter1 rarr)) (orSumOuter1 outrank arr) prop_sum_empty :: Property prop_sum_empty = property $ genRank $ \outrankm1@(SNat @nm1) -> do @@ -74,7 +78,7 @@ prop_sum_lasteq1 = property $ genRank $ \outrank@(SNat @n) -> do genStorables (Range.singleton (product insh)) (\w -> fromIntegral w / fromIntegral (maxBound :: Word64)) let rarr = rfromOrthotope inrank arr - rtoOrthotope (rsumOuter1 rarr) === orSumOuter1 outrank arr + almostEq fineTol (rtoOrthotope (rsumOuter1 rarr)) (orSumOuter1 outrank arr) prop_sum_replicated :: Bool -> Property prop_sum_replicated doTranspose = property $ -- cgit v1.2.3-70-g09d2