diff options
author | Tom Smeding <tom@tomsmeding.com> | 2024-06-12 22:07:25 +0200 |
---|---|---|
committer | Tom Smeding <tom@tomsmeding.com> | 2024-06-12 22:07:25 +0200 |
commit | a088130c3e722d3c589be388a98daab28a73b23f (patch) | |
tree | 7e84255479e5df4cbd830ff939cdbd7cd9899e21 /cbits | |
parent | 39e84802d630ba2ce7e1d51641e39982d6091511 (diff) |
Only use intel SIMD on intel platforms
Diffstat (limited to 'cbits')
-rw-r--r-- | cbits/arith.c | 13 |
1 files changed, 13 insertions, 0 deletions
diff --git a/cbits/arith.c b/cbits/arith.c index 73bf99f..fb993c8 100644 --- a/cbits/arith.c +++ b/cbits/arith.c @@ -1,10 +1,18 @@ +// Architecture detection +#if defined(__x86_64__) || defined(_M_X64) +#define OX_ARCH_INTEL +#endif + #include <stdio.h> #include <stdint.h> #include <stdlib.h> #include <stdbool.h> #include <string.h> #include <math.h> + +#ifdef OX_ARCH_INTEL #include <emmintrin.h> +#endif // These are the wrapper macros used in arith_lists.h. Preset them to empty to // avoid having to touch macros unrelated to the particular operation set below. @@ -218,6 +226,7 @@ static double log1pexp_double(double x) { LOG1PEXP_IMPL(x); } // The 'double' version here is about 2x as fast as gcc's own vectorisation. DOTPROD_OP(i32) DOTPROD_OP(i64) +#ifdef OX_ARCH_INTEL float oxarop_dotprod_float(i64 length, const float *arr1, const float *arr2) { __m128 accum = _mm_setzero_ps(); i64 i; @@ -240,6 +249,10 @@ double oxarop_dotprod_double(i64 length, const double *arr1, const double *arr2) if (i < length) tot += arr1[i] * arr2[i]; return tot; } +#else +DOTPROD_OP(float) +DOTPROD_OP(double) +#endif /***************************************************************************** |