diff options
author | Tom Smeding <tom@tomsmeding.com> | 2025-09-03 23:31:40 +0200 |
---|---|---|
committer | Tom Smeding <tom@tomsmeding.com> | 2025-09-03 23:37:53 +0200 |
commit | 7e60437d3c064bca402d486be967c43bf4326067 (patch) | |
tree | 8b4307b90c942861176d9c705cd760b9fe7b6c6b /compute.cpp |
This includes old code too, perhaps from 2021-05-05, judging from the
birth timestamp on the directory. The old code included option parsing
and equation solving for fancy mutually-dependent options, but no actual
rendering (imagine that).
Diffstat (limited to 'compute.cpp')
-rw-r--r-- | compute.cpp | 191 |
1 files changed, 191 insertions, 0 deletions
diff --git a/compute.cpp b/compute.cpp new file mode 100644 index 0000000..d3f5e42 --- /dev/null +++ b/compute.cpp @@ -0,0 +1,191 @@ +#include "compute.h" +#include <complex> +#include <random> +#include <thread> +#include <memory> +#include <cstdlib> +#include <cassert> +#include <immintrin.h> + +struct Pointgen { + Pointgen(std::mt19937::result_type seed) : randgen{seed} {} + double operator()() { + return dist(randgen); + } + +private: + std::mt19937 randgen; + std::uniform_real_distribution<double> dist{-1.5, 1.5}; +}; + +std::ostream& operator<<(std::ostream &os, __m256d x) { + union { + __m256d vec; + double arr[4]; + } u; + u.vec = x; + return os << '<' << u.arr[0] << ',' << u.arr[1] << ',' << u.arr[2] << ',' << u.arr[3] << '>'; +} + +std::ostream& operator<<(std::ostream &os, __m256i x) { + union { + __m256i vec; + int64_t arr[4]; + } u; + u.vec = x; + return os << '<' << u.arr[0] << ',' << u.arr[1] << ',' << u.arr[2] << ',' << u.arr[3] << '>'; +} + +union int64_4vec { + __m256i i; + int64_t a[4]; +}; + +static HistImage accumulate_brot_simple(const Options &options, int64_t numorbits_override, std::mt19937::result_type seed) { + HistImage img{(size_t)options.imgwidth.value, (size_t)options.imgheight.value}; + + Pointgen gen{seed}; + + // trace[0] is never set; considered equal to c + std::vector<std::pair<double, double>> trace(options.maxiter.value + 1); + + for (int64_t orbiti = 0; orbiti < numorbits_override; orbiti++) { + const double cx = gen(), cy = gen(); + // std::cout << "cx=" << cx << " cy=" << cy << std::endl; + + int iteri = 0; + { + double a = cx, b = cy; + if (a * a + b * b >= 4) continue; + while (iteri < options.maxiter.value) { + const double newb = 2 * a * b + cy; + a = a * a - b * b + cx; + b = newb; + iteri++; + trace[iteri] = std::make_pair(a, b); + + // std::cout << "a=" << a << " b=" << b << std::endl; + if (a * a + b * b >= 4) goto escape; + } + continue; // did not escape, so in set, so ignore + } + + escape: + if (options.orbit_burnin.value == 0) trace[0] = std::make_pair(cx, cy); + // std::cout << "iteri=" << iteri << std::endl; + for (int i = options.orbit_burnin.value; i <= iteri; i++) { + const double zx = trace[i].first, zy = trace[i].second; + const int imgx = (zx - options.xmin.value) / options.cplxwidth.value * (options.imgwidth.value - 1); + const int imgy = (zy - options.ymin.value) / options.cplxheight.value * (options.imgheight.value - 1); + // std::cerr << " zx=" << zx << " zy=" << zy << " img=(" << imgx << ',' << imgy << ")" << std::endl; + if (0 <= imgx && imgx < options.imgwidth.value && + 0 <= imgy && imgy < options.imgheight.value) { + img.data[options.imgwidth.value * imgy + imgx]++; + } + } + } + + return img; +} + +static HistImage accumulate_brot_vectorised(const Options &options, int64_t numorbits_override, std::mt19937::result_type seed) { + HistImage img{(size_t)options.imgwidth.value, (size_t)options.imgheight.value}; + + Pointgen gen{seed}; + + using F4 = __m256d; + using I4 = __m256i; + + // trace[0] is never set; considered equal to c + std::vector<std::pair<F4, F4>> trace(options.maxiter.value + 1); + + for (int64_t orbiti = 0; orbiti < numorbits_override; orbiti += 4) { + const F4 cx = _mm256_set_pd(gen(), gen(), gen(), gen()); + const F4 cy = _mm256_set_pd(gen(), gen(), gen(), gen()); + // std::cout << "cx=" << cx << " cy=" << cy << std::endl; + + I4 escaped_mask = _mm256_setzero_si256(); + I4 escaped_at = _mm256_setzero_si256(); + + int iteri = 0; + { + F4 a = cx, b = cy; + if (_mm256_movemask_pd(a * a + b * b < 4) == 0) continue; + while (iteri < options.maxiter.value) { + const F4 newb = 2 * a * b + cy; + a = a * a - b * b + cx; + b = newb; + iteri++; + trace[iteri] = std::make_pair(a, b); + + const I4 remaining = a * a + b * b < 4; + const I4 escaped = ~remaining; + escaped_at |= _mm256_set1_epi64x(iteri) & ~escaped_mask & escaped; + escaped_mask |= escaped; + // std::cout << "a=" << a << " b=" << b << " rem=" << remaining << " esc=" << escaped << " at=" << escaped_at << " mask=" << escaped_mask << std::endl; + if (_mm256_movemask_epi8(remaining) == 0) goto escape; + // a = _mm256_castsi256_pd(_mm256_castpd_si256(a) & remaining); + // b = _mm256_castsi256_pd(_mm256_castpd_si256(b) & remaining); + } + } + if (_mm256_movemask_epi8(escaped_mask) == 0) continue; // if none escaped, nothing to do + + escape: + if (options.orbit_burnin.value == 0) trace[0] = std::make_pair(cx, cy); + union int64_4vec escaped_at_un; + _mm256_store_si256(&escaped_at_un.i, escaped_at); + for (int tracei = 0; tracei < 4; tracei++) { + const int limit = escaped_at_un.a[tracei]; + // std::cout << "tracei=" << tracei << " limit=" << limit << std::endl; + for (int i = options.orbit_burnin.value; i <= limit; i++) { + const double zx = ((const double*)&trace[i].first)[tracei]; + const double zy = ((const double*)&trace[i].second)[tracei]; + const int imgx = (zx - options.xmin.value) / options.cplxwidth.value * (options.imgwidth.value - 1); + const int imgy = (zy - options.ymin.value) / options.cplxheight.value * (options.imgheight.value - 1); + // std::cerr << " zx=" << zx << " zy=" << zy << " img=(" << imgx << ',' << imgy << ")" << std::endl; + if (0 <= imgx && imgx < options.imgwidth.value && + 0 <= imgy && imgy < options.imgheight.value) { + img.data[options.imgwidth.value * imgy + imgx]++; + } + } + } + } + + return img; +} + +static HistImage accumulate_brot(const Options &options, int64_t numorbits_override, std::mt19937::result_type seed) { + if (options.algorithm.value == "simple") return accumulate_brot_simple(options, numorbits_override, seed); + if (options.algorithm.value == "vectorised") return accumulate_brot_vectorised(options, numorbits_override, seed); + std::cerr << "Invalid algorithm '" << options.algorithm.value << "'" << std::endl; + exit(1); +} + +HistImage compute_brot(const Options &options) { + std::vector<std::unique_ptr<HistImage>> images(options.num_threads.value); + + std::mt19937 randgen(std::random_device{}()); + + std::vector<std::thread> ths(options.num_threads.value); + for (size_t i = 0; i < (size_t)options.num_threads.value; i++) { + std::mt19937::result_type seed = randgen(); + ths[i] = std::thread{[&options, &images, i, seed]() { + images[i] = std::make_unique<HistImage>( + accumulate_brot(options, options.numorbits.value / options.num_threads.value, seed)); + }}; + } + + for (std::thread &th : ths) th.join(); + + HistImage first = std::move(*images[0]); + images[0].reset(); + + for (int i = 1; i < options.num_threads.value; i++) { + first.add(*images[i]); + images[i].reset(); + } + + return first; +} + +// vim: set sw=4 ts=4 noet: |