From 6276ed3c7bcd20c8b860e1275386ecd068671bcc Mon Sep 17 00:00:00 2001
From: Tom Smeding <tom@tomsmeding.com>
Date: Fri, 14 Mar 2025 21:57:56 +0100
Subject: Optimise reductions and dotprod with more vectorisation

Turns out that if you don't supply -ffast-math, the C compiler will
faithfully reproduce your linear reduction order, which is rather
disastrous for parallelisation with vector units.

This changes the summation order, so numerical results might differ
slightly. To wit: the test suite needed adjustment.
---
 test/Tests/C.hs | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'test/Tests/C.hs')

diff --git a/test/Tests/C.hs b/test/Tests/C.hs
index bc8e0de..a0f103d 100644
--- a/test/Tests/C.hs
+++ b/test/Tests/C.hs
@@ -35,6 +35,10 @@ import Gen
 import Util
 
 
+-- | Appropriate for simple different summation orders
+fineTol :: Double
+fineTol = 1e-8
+
 prop_sum_nonempty :: Property
 prop_sum_nonempty = property $ genRank $ \outrank@(SNat @n) -> do
   -- Test nonempty _results_. The first dimension of the input is allowed to be 0, because then OR.rerank doesn't fail yet.
@@ -46,7 +50,7 @@ prop_sum_nonempty = property $ genRank $ \outrank@(SNat @n) -> do
            genStorables (Range.singleton (product sh))
                         (\w -> fromIntegral w / fromIntegral (maxBound :: Word64))
   let rarr = rfromOrthotope inrank arr
-  rtoOrthotope (rsumOuter1 rarr) === orSumOuter1 outrank arr
+  almostEq fineTol (rtoOrthotope (rsumOuter1 rarr)) (orSumOuter1 outrank arr)
 
 prop_sum_empty :: Property
 prop_sum_empty = property $ genRank $ \outrankm1@(SNat @nm1) -> do
@@ -74,7 +78,7 @@ prop_sum_lasteq1 = property $ genRank $ \outrank@(SNat @n) -> do
            genStorables (Range.singleton (product insh))
                         (\w -> fromIntegral w / fromIntegral (maxBound :: Word64))
   let rarr = rfromOrthotope inrank arr
-  rtoOrthotope (rsumOuter1 rarr) === orSumOuter1 outrank arr
+  almostEq fineTol (rtoOrthotope (rsumOuter1 rarr)) (orSumOuter1 outrank arr)
 
 prop_sum_replicated :: Bool -> Property
 prop_sum_replicated doTranspose = property $
-- 
cgit v1.2.3-70-g09d2