summaryrefslogtreecommitdiff
path: root/compute.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'compute.cpp')
-rw-r--r--compute.cpp191
1 files changed, 191 insertions, 0 deletions
diff --git a/compute.cpp b/compute.cpp
new file mode 100644
index 0000000..d3f5e42
--- /dev/null
+++ b/compute.cpp
@@ -0,0 +1,191 @@
+#include "compute.h"
+#include <complex>
+#include <random>
+#include <thread>
+#include <memory>
+#include <cstdlib>
+#include <cassert>
+#include <immintrin.h>
+
+struct Pointgen {
+ Pointgen(std::mt19937::result_type seed) : randgen{seed} {}
+ double operator()() {
+ return dist(randgen);
+ }
+
+private:
+ std::mt19937 randgen;
+ std::uniform_real_distribution<double> dist{-1.5, 1.5};
+};
+
+std::ostream& operator<<(std::ostream &os, __m256d x) {
+ union {
+ __m256d vec;
+ double arr[4];
+ } u;
+ u.vec = x;
+ return os << '<' << u.arr[0] << ',' << u.arr[1] << ',' << u.arr[2] << ',' << u.arr[3] << '>';
+}
+
+std::ostream& operator<<(std::ostream &os, __m256i x) {
+ union {
+ __m256i vec;
+ int64_t arr[4];
+ } u;
+ u.vec = x;
+ return os << '<' << u.arr[0] << ',' << u.arr[1] << ',' << u.arr[2] << ',' << u.arr[3] << '>';
+}
+
+union int64_4vec {
+ __m256i i;
+ int64_t a[4];
+};
+
+static HistImage accumulate_brot_simple(const Options &options, int64_t numorbits_override, std::mt19937::result_type seed) {
+ HistImage img{(size_t)options.imgwidth.value, (size_t)options.imgheight.value};
+
+ Pointgen gen{seed};
+
+ // trace[0] is never set; considered equal to c
+ std::vector<std::pair<double, double>> trace(options.maxiter.value + 1);
+
+ for (int64_t orbiti = 0; orbiti < numorbits_override; orbiti++) {
+ const double cx = gen(), cy = gen();
+ // std::cout << "cx=" << cx << " cy=" << cy << std::endl;
+
+ int iteri = 0;
+ {
+ double a = cx, b = cy;
+ if (a * a + b * b >= 4) continue;
+ while (iteri < options.maxiter.value) {
+ const double newb = 2 * a * b + cy;
+ a = a * a - b * b + cx;
+ b = newb;
+ iteri++;
+ trace[iteri] = std::make_pair(a, b);
+
+ // std::cout << "a=" << a << " b=" << b << std::endl;
+ if (a * a + b * b >= 4) goto escape;
+ }
+ continue; // did not escape, so in set, so ignore
+ }
+
+ escape:
+ if (options.orbit_burnin.value == 0) trace[0] = std::make_pair(cx, cy);
+ // std::cout << "iteri=" << iteri << std::endl;
+ for (int i = options.orbit_burnin.value; i <= iteri; i++) {
+ const double zx = trace[i].first, zy = trace[i].second;
+ const int imgx = (zx - options.xmin.value) / options.cplxwidth.value * (options.imgwidth.value - 1);
+ const int imgy = (zy - options.ymin.value) / options.cplxheight.value * (options.imgheight.value - 1);
+ // std::cerr << " zx=" << zx << " zy=" << zy << " img=(" << imgx << ',' << imgy << ")" << std::endl;
+ if (0 <= imgx && imgx < options.imgwidth.value &&
+ 0 <= imgy && imgy < options.imgheight.value) {
+ img.data[options.imgwidth.value * imgy + imgx]++;
+ }
+ }
+ }
+
+ return img;
+}
+
+static HistImage accumulate_brot_vectorised(const Options &options, int64_t numorbits_override, std::mt19937::result_type seed) {
+ HistImage img{(size_t)options.imgwidth.value, (size_t)options.imgheight.value};
+
+ Pointgen gen{seed};
+
+ using F4 = __m256d;
+ using I4 = __m256i;
+
+ // trace[0] is never set; considered equal to c
+ std::vector<std::pair<F4, F4>> trace(options.maxiter.value + 1);
+
+ for (int64_t orbiti = 0; orbiti < numorbits_override; orbiti += 4) {
+ const F4 cx = _mm256_set_pd(gen(), gen(), gen(), gen());
+ const F4 cy = _mm256_set_pd(gen(), gen(), gen(), gen());
+ // std::cout << "cx=" << cx << " cy=" << cy << std::endl;
+
+ I4 escaped_mask = _mm256_setzero_si256();
+ I4 escaped_at = _mm256_setzero_si256();
+
+ int iteri = 0;
+ {
+ F4 a = cx, b = cy;
+ if (_mm256_movemask_pd(a * a + b * b < 4) == 0) continue;
+ while (iteri < options.maxiter.value) {
+ const F4 newb = 2 * a * b + cy;
+ a = a * a - b * b + cx;
+ b = newb;
+ iteri++;
+ trace[iteri] = std::make_pair(a, b);
+
+ const I4 remaining = a * a + b * b < 4;
+ const I4 escaped = ~remaining;
+ escaped_at |= _mm256_set1_epi64x(iteri) & ~escaped_mask & escaped;
+ escaped_mask |= escaped;
+ // std::cout << "a=" << a << " b=" << b << " rem=" << remaining << " esc=" << escaped << " at=" << escaped_at << " mask=" << escaped_mask << std::endl;
+ if (_mm256_movemask_epi8(remaining) == 0) goto escape;
+ // a = _mm256_castsi256_pd(_mm256_castpd_si256(a) & remaining);
+ // b = _mm256_castsi256_pd(_mm256_castpd_si256(b) & remaining);
+ }
+ }
+ if (_mm256_movemask_epi8(escaped_mask) == 0) continue; // if none escaped, nothing to do
+
+ escape:
+ if (options.orbit_burnin.value == 0) trace[0] = std::make_pair(cx, cy);
+ union int64_4vec escaped_at_un;
+ _mm256_store_si256(&escaped_at_un.i, escaped_at);
+ for (int tracei = 0; tracei < 4; tracei++) {
+ const int limit = escaped_at_un.a[tracei];
+ // std::cout << "tracei=" << tracei << " limit=" << limit << std::endl;
+ for (int i = options.orbit_burnin.value; i <= limit; i++) {
+ const double zx = ((const double*)&trace[i].first)[tracei];
+ const double zy = ((const double*)&trace[i].second)[tracei];
+ const int imgx = (zx - options.xmin.value) / options.cplxwidth.value * (options.imgwidth.value - 1);
+ const int imgy = (zy - options.ymin.value) / options.cplxheight.value * (options.imgheight.value - 1);
+ // std::cerr << " zx=" << zx << " zy=" << zy << " img=(" << imgx << ',' << imgy << ")" << std::endl;
+ if (0 <= imgx && imgx < options.imgwidth.value &&
+ 0 <= imgy && imgy < options.imgheight.value) {
+ img.data[options.imgwidth.value * imgy + imgx]++;
+ }
+ }
+ }
+ }
+
+ return img;
+}
+
+static HistImage accumulate_brot(const Options &options, int64_t numorbits_override, std::mt19937::result_type seed) {
+ if (options.algorithm.value == "simple") return accumulate_brot_simple(options, numorbits_override, seed);
+ if (options.algorithm.value == "vectorised") return accumulate_brot_vectorised(options, numorbits_override, seed);
+ std::cerr << "Invalid algorithm '" << options.algorithm.value << "'" << std::endl;
+ exit(1);
+}
+
+HistImage compute_brot(const Options &options) {
+ std::vector<std::unique_ptr<HistImage>> images(options.num_threads.value);
+
+ std::mt19937 randgen(std::random_device{}());
+
+ std::vector<std::thread> ths(options.num_threads.value);
+ for (size_t i = 0; i < (size_t)options.num_threads.value; i++) {
+ std::mt19937::result_type seed = randgen();
+ ths[i] = std::thread{[&options, &images, i, seed]() {
+ images[i] = std::make_unique<HistImage>(
+ accumulate_brot(options, options.numorbits.value / options.num_threads.value, seed));
+ }};
+ }
+
+ for (std::thread &th : ths) th.join();
+
+ HistImage first = std::move(*images[0]);
+ images[0].reset();
+
+ for (int i = 1; i < options.num_threads.value; i++) {
+ first.add(*images[i]);
+ images[i].reset();
+ }
+
+ return first;
+}
+
+// vim: set sw=4 ts=4 noet: