aboutsummaryrefslogtreecommitdiff
path: root/cbits/arith.c
blob: 3c5d8460c3688663ee6bd08e87c5af0559e1c3e6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#include <stdint.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <math.h>

typedef int32_t i32;
typedef int64_t i64;

#define COMM_OP(name, op, typ) \
  void oxarop_ ## name ## _ ## typ ## _sv(i64 n, typ *out, typ x, typ *y) { \
    for (i64 i = 0; i < n; i++) out[i] = x op y[i]; \
  } \
  void oxarop_ ## name ## _ ## typ ## _vv(i64 n, typ *out, const typ *x, const typ *y) { \
    for (i64 i = 0; i < n; i++) out[i] = x[i] op y[i]; \
  }

#define NONCOMM_OP(name, op, typ) \
  COMM_OP(name, op, typ) \
  void oxarop_ ## name ## _ ## typ ## _vs(i64 n, typ *out, typ *x, typ y) { \
    for (i64 i = 0; i < n; i++) out[i] = x[i] op y; \
  }

#define UNARY_OP(name, op, typ) \
  void oxarop_ ## name ## _ ## typ(i64 n, typ *out, typ *x) { \
    for (i64 i = 0; i < n; i++) out[i] = op(x[i]); \
  }

#define GEN_ABS(x) \
  _Generic((x), \
      int: abs, \
      long: labs, \
      long long: llabs, \
      float: fabsf, \
      double: fabs)(x)

// This does not result in multiple loads with GCC 13.
#define GEN_SIGNUM(x) ((x) < 0 ? -1 : (x) > 0 ? 1 : 0)

// Walk a orthotope-style strided array, except for the inner dimension. The
// body is run for every "inner vector".
#define TARRAY_WALK_NOINNER(again_label_name, rank, shape, strides, body) \
  do { \
    i64 idx[(rank) - 1]; \
    memset(idx, 0, ((rank) - 1) * sizeof(idx[0])); \
    i64 arrlinidx = 0; \
    i64 outlinidx = 0; \
  again_label_name: \
    { \
      body \
    } \
    for (i64 dim = (rank) - 2; dim >= 0; dim--) { \
      if (++idx[dim] < (shape)[dim]) { \
        arrlinidx += (strides)[dim]; \
        outlinidx++; \
        goto again_label_name; \
      } \
      arrlinidx -= (idx[dim] - 1) * (strides)[dim]; \
      idx[dim] = 0; \
    } \
  } while (false)

// preconditions:
// - all strides are >0
// - shape is everywhere >0
// - rank is >= 1
// - out has capacity for (shape[0] * ... * shape[rank - 2]) elements
// Reduces along the innermost dimension.
// 'out' will be filled densely in linearisation order.
#define REDUCE1_OP(name, op, typ) \
  void oxarop_ ## name ## _ ## typ(i64 rank, const i64 *shape, const i64 *strides, typ *out, const typ *arr) { \
    if (strides[rank - 1] == 1) { \
      TARRAY_WALK_NOINNER(again1, rank, shape, strides, { \
        typ accum = arr[arrlinidx]; \
        for (i64 i = 1; i < shape[rank - 1]; i++) { \
          accum = accum op arr[arrlinidx + i]; \
        } \
        out[outlinidx] = accum; \
      }); \
    } else { \
      TARRAY_WALK_NOINNER(again2, rank, shape, strides, { \
        typ accum = arr[arrlinidx]; \
        for (i64 i = 1; i < shape[rank - 1]; i++) { \
          accum = accum op arr[arrlinidx + strides[rank - 1] * i]; \
        } \
        out[outlinidx] = accum; \
      }); \
    } \
  }

#define NUM_TYPES_LOOP_XLIST \
  X(i32) X(i64) X(double) X(float)

#define X(typ) \
  COMM_OP(add, +, typ) \
  NONCOMM_OP(sub, -, typ) \
  COMM_OP(mul, *, typ) \
  UNARY_OP(neg, -, typ) \
  UNARY_OP(abs, GEN_ABS, typ) \
  UNARY_OP(signum, GEN_SIGNUM, typ) \
  REDUCE1_OP(sum1, +, typ) \
  REDUCE1_OP(product1, *, typ)
NUM_TYPES_LOOP_XLIST
#undef X