#include "compute.h" #include #include #include #include #include #include #include struct Pointgen { Pointgen(std::mt19937::result_type seed) : randgen{seed} {} double operator()() { return dist(randgen); } private: std::mt19937 randgen; std::uniform_real_distribution dist{-1.5, 1.5}; }; std::ostream& operator<<(std::ostream &os, __m256d x) { union { __m256d vec; double arr[4]; } u; u.vec = x; return os << '<' << u.arr[0] << ',' << u.arr[1] << ',' << u.arr[2] << ',' << u.arr[3] << '>'; } std::ostream& operator<<(std::ostream &os, __m256i x) { union { __m256i vec; int64_t arr[4]; } u; u.vec = x; return os << '<' << u.arr[0] << ',' << u.arr[1] << ',' << u.arr[2] << ',' << u.arr[3] << '>'; } union int64_4vec { __m256i i; int64_t a[4]; }; static HistImage accumulate_brot_simple(const Options &options, int64_t numorbits_override, std::mt19937::result_type seed) { HistImage img{(size_t)options.imgwidth.value, (size_t)options.imgheight.value}; Pointgen gen{seed}; // trace[0] is never set; considered equal to c std::vector> trace(options.maxiter.value + 1); for (int64_t orbiti = 0; orbiti < numorbits_override; orbiti++) { const double cx = gen(), cy = gen(); // std::cout << "cx=" << cx << " cy=" << cy << std::endl; int iteri = 0; { double a = cx, b = cy; if (a * a + b * b >= 4) continue; while (iteri < options.maxiter.value) { const double newb = 2 * a * b + cy; a = a * a - b * b + cx; b = newb; iteri++; trace[iteri] = std::make_pair(a, b); // std::cout << "a=" << a << " b=" << b << std::endl; if (a * a + b * b >= 4) goto escape; } continue; // did not escape, so in set, so ignore } escape: if (options.orbit_burnin.value == 0) trace[0] = std::make_pair(cx, cy); // std::cout << "iteri=" << iteri << std::endl; for (int i = options.orbit_burnin.value; i <= iteri; i++) { const double zx = trace[i].first, zy = trace[i].second; const int imgx = (zx - options.xmin.value) / options.cplxwidth.value * (options.imgwidth.value - 1); const int imgy = (zy - options.ymin.value) / options.cplxheight.value * (options.imgheight.value - 1); // std::cerr << " zx=" << zx << " zy=" << zy << " img=(" << imgx << ',' << imgy << ")" << std::endl; if (0 <= imgx && imgx < options.imgwidth.value && 0 <= imgy && imgy < options.imgheight.value) { img.data[options.imgwidth.value * imgy + imgx]++; } } } return img; } static HistImage accumulate_brot_vectorised(const Options &options, int64_t numorbits_override, std::mt19937::result_type seed) { HistImage img{(size_t)options.imgwidth.value, (size_t)options.imgheight.value}; Pointgen gen{seed}; using F4 = __m256d; using I4 = __m256i; // trace[0] is never set; considered equal to c std::vector> trace(options.maxiter.value + 1); for (int64_t orbiti = 0; orbiti < numorbits_override; orbiti += 4) { const F4 cx = _mm256_set_pd(gen(), gen(), gen(), gen()); const F4 cy = _mm256_set_pd(gen(), gen(), gen(), gen()); // std::cout << "cx=" << cx << " cy=" << cy << std::endl; I4 escaped_mask = _mm256_setzero_si256(); I4 escaped_at = _mm256_setzero_si256(); int iteri = 0; { F4 a = cx, b = cy; if (_mm256_movemask_pd(a * a + b * b < 4) == 0) continue; while (iteri < options.maxiter.value) { const F4 newb = 2 * a * b + cy; a = a * a - b * b + cx; b = newb; iteri++; trace[iteri] = std::make_pair(a, b); const I4 remaining = a * a + b * b < 4; const I4 escaped = ~remaining; escaped_at |= _mm256_set1_epi64x(iteri) & ~escaped_mask & escaped; escaped_mask |= escaped; // std::cout << "a=" << a << " b=" << b << " rem=" << remaining << " esc=" << escaped << " at=" << escaped_at << " mask=" << escaped_mask << std::endl; if (_mm256_movemask_epi8(remaining) == 0) goto escape; // a = _mm256_castsi256_pd(_mm256_castpd_si256(a) & remaining); // b = _mm256_castsi256_pd(_mm256_castpd_si256(b) & remaining); } } if (_mm256_movemask_epi8(escaped_mask) == 0) continue; // if none escaped, nothing to do escape: if (options.orbit_burnin.value == 0) trace[0] = std::make_pair(cx, cy); union int64_4vec escaped_at_un; _mm256_store_si256(&escaped_at_un.i, escaped_at); for (int tracei = 0; tracei < 4; tracei++) { const int limit = escaped_at_un.a[tracei]; // std::cout << "tracei=" << tracei << " limit=" << limit << std::endl; for (int i = options.orbit_burnin.value; i <= limit; i++) { const double zx = ((const double*)&trace[i].first)[tracei]; const double zy = ((const double*)&trace[i].second)[tracei]; const int imgx = (zx - options.xmin.value) / options.cplxwidth.value * (options.imgwidth.value - 1); const int imgy = (zy - options.ymin.value) / options.cplxheight.value * (options.imgheight.value - 1); // std::cerr << " zx=" << zx << " zy=" << zy << " img=(" << imgx << ',' << imgy << ")" << std::endl; if (0 <= imgx && imgx < options.imgwidth.value && 0 <= imgy && imgy < options.imgheight.value) { img.data[options.imgwidth.value * imgy + imgx]++; } } } } return img; } static HistImage accumulate_brot(const Options &options, int64_t numorbits_override, std::mt19937::result_type seed) { if (options.algorithm.value == "simple") return accumulate_brot_simple(options, numorbits_override, seed); if (options.algorithm.value == "vectorised") return accumulate_brot_vectorised(options, numorbits_override, seed); std::cerr << "Invalid algorithm '" << options.algorithm.value << "'" << std::endl; exit(1); } HistImage compute_brot(const Options &options) { std::vector> images(options.num_threads.value); std::mt19937 randgen(std::random_device{}()); std::vector ths(options.num_threads.value); for (size_t i = 0; i < (size_t)options.num_threads.value; i++) { std::mt19937::result_type seed = randgen(); ths[i] = std::thread{[&options, &images, i, seed]() { images[i] = std::make_unique( accumulate_brot(options, options.numorbits.value / options.num_threads.value, seed)); }}; } for (std::thread &th : ths) th.join(); HistImage first = std::move(*images[0]); images[0].reset(); for (int i = 1; i < options.num_threads.value; i++) { first.add(*images[i]); images[i].reset(); } return first; } // vim: set sw=4 ts=4 noet: