diff options
Diffstat (limited to 'aberth')
-rw-r--r-- | aberth/Makefile | 6 | ||||
-rw-r--r-- | aberth/compute_host.cpp | 58 | ||||
-rw-r--r-- | aberth/compute_host.h | 9 | ||||
-rw-r--r-- | aberth/defs.h | 2 | ||||
-rw-r--r-- | aberth/host_aberth.cpp | 96 | ||||
-rw-r--r-- | aberth/host_aberth.h | 12 | ||||
-rw-r--r-- | aberth/kernel.h | 1 | ||||
-rw-r--r-- | aberth/main.cpp | 224 | ||||
-rw-r--r-- | aberth/poly.cpp (renamed from aberth/util.cpp) | 3 | ||||
-rw-r--r-- | aberth/poly.h | 42 | ||||
-rw-r--r-- | aberth/polygen.cpp | 43 | ||||
-rw-r--r-- | aberth/polygen.h | 25 | ||||
-rw-r--r-- | aberth/util.h | 2 |
13 files changed, 297 insertions, 226 deletions
diff --git a/aberth/Makefile b/aberth/Makefile index 99f0e16..cd59309 100644 --- a/aberth/Makefile +++ b/aberth/Makefile @@ -1,6 +1,6 @@ CXX = g++ -CXXFLAGS = -Wall -Wextra -std=c++17 -fwrapv $(OPTFLAGS) -OPTFLAGS = -O3 -ffast-math -march=native -mtune=native +CXXFLAGS = -Wall -Wextra -std=c++17 $(OPTFLAGS) +OPTFLAGS = -O3 -ffast-math -march=native -mtune=native -flto LDFLAGS = -pthread ifeq ($(shell uname),Darwin) LDFLAGS += -framework OpenCL @@ -24,7 +24,7 @@ remake: clean aberth: $(OBJECTS) aberth_kernel.o ../lodepng.o - $(CXX) -o $@ $^ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) aberth_kernel.h: aberth_kernel.fut futhark opencl --library $^ diff --git a/aberth/compute_host.cpp b/aberth/compute_host.cpp new file mode 100644 index 0000000..38bf43d --- /dev/null +++ b/aberth/compute_host.cpp @@ -0,0 +1,58 @@ +#include <thread> +#include <mutex> +#include <tuple> +#include <cassert> +#include "compute_host.h" +#include "host_aberth.h" +#include "polygen.h" +#include "util.h" + +using namespace std; + + +vector<int> computeHost(int W, int H, Com bottomLeft, Com topRight) { + constexpr const int numThreads = 4; + static_assert(ispow2(numThreads)); + + vector<int> counts(W * H); + mutex countsMutex; + + vector<PolyGen::Job> jobs = PolyGen::genJobs(numThreads); + assert(jobs.size() == numThreads); + + vector<thread> threads(jobs.size()); + for (int i = 0; i < (int)jobs.size(); i++) { + threads[i] = thread([W, H, &counts, &countsMutex, job = jobs[i], bottomLeft, topRight]() { + auto calcIndex = [](double value, double left, double right, int steps) -> int { + return (value - left) / (right - left) * (steps - 1) + 0.5; + }; + auto calcPos = [W, H, bottomLeft, topRight, &calcIndex](Com z) -> pair<int, int> { + return make_pair( + calcIndex(z.real(), bottomLeft.real(), topRight.real(), W), + calcIndex(z.imag(), bottomLeft.imag(), topRight.imag(), H) + ); + }; + + vector<int> localCounts(W * H); + + Poly poly = job.init; + for (int i = 0; i < job.numItems; i++) { + for (Com z : aberth(poly)) { + int x, y; + tie(x, y) = calcPos(z); + if (0 <= x && x < W && 0 <= y && y < H) { + localCounts[W * y + x]++; + } + } + PolyGen::next(poly); + } + + lock_guard<mutex> guard(countsMutex); + for (int i = 0; i < W * H; i++) counts[i] += localCounts[i]; + }); + } + + for (thread &th : threads) th.join(); + + return counts; +} diff --git a/aberth/compute_host.h b/aberth/compute_host.h new file mode 100644 index 0000000..7a2ad36 --- /dev/null +++ b/aberth/compute_host.h @@ -0,0 +1,9 @@ +#pragma once + +#include <vector> +#include "defs.h" + +using namespace std; + + +vector<int> computeHost(int W, int H, Com bottomLeft, Com topRight); diff --git a/aberth/defs.h b/aberth/defs.h index 40eacf0..41d1f89 100644 --- a/aberth/defs.h +++ b/aberth/defs.h @@ -9,5 +9,3 @@ using namespace std; constexpr const int N = 18; using Com = complex<double>; -using Poly = array<int, N + 1>; -using AApprox = array<Com, N>; diff --git a/aberth/host_aberth.cpp b/aberth/host_aberth.cpp new file mode 100644 index 0000000..c567f30 --- /dev/null +++ b/aberth/host_aberth.cpp @@ -0,0 +1,96 @@ +#include <random> +#include <cmath> +#include "host_aberth.h" + +using namespace std; + + +struct AberthState { + // boundPoly is 's' in the stop condition formulated at p.189-190 of + // https://link.springer.com/article/10.1007%2FBF02207694 + + const Poly &poly; + Poly deriv; + Poly boundPoly; + AApprox approx; + double radius; + + AberthState(const Poly &poly); + void regenerate(); + bool step(); + void iterate(); +}; + +static thread_local minstd_rand randgenerator = minstd_rand(random_device()()); + +AberthState::AberthState(const Poly &poly) + : poly(poly), deriv(derivative(poly)), radius(maxRootNorm(poly)) { + + regenerate(); + for (int i = 0; i <= N; i++) { + boundPoly[i] = abs(poly[i]) * (4 * i + 1); + } +} + +void AberthState::regenerate() { + auto genCoord = [this]() { + return uniform_real_distribution<double>(-radius, radius)(randgenerator); + }; + + for (int i = 0; i < N; i++) { + approx[i] = Com(genCoord(), genCoord()); + } +} + +// Lagrange-style step where the new elements are computed in parallel from the previous values +bool AberthState::step() { + array<Com, N * N> pairs; + for (int i = 0; i < N - 1; i++) { + for (int j = i + 1; j < N; j++) { + pairs[N * i + j] = 1.0 / (approx[i] - approx[j]); + } + } + + bool allConverged = true; + + AApprox newapprox; + AApprox offsets; + for (int i = 0; i < N; i++) { + Com pval = eval(poly, approx[i]); + Com derivval = eval(deriv, poly.size() - 1, approx[i]); + Com quo = pval / derivval; + Com sum = 0; + for (int j = 0; j < i; j++) sum -= pairs[N * j + i]; + for (int j = i + 1; j < N; j++) sum += pairs[N * i + j]; + offsets[i] = quo / (1.0 - quo * sum); + + newapprox[i] = approx[i] - offsets[i]; + + double sval = eval(boundPoly, abs(newapprox[i])); + if (abs(pval) > 1e-5 * sval) allConverged = false; + } + + approx = newapprox; + + return allConverged; +} + +void AberthState::iterate() { + int tries = 1, stepIdx = 1; + + while (!step()) { + stepIdx++; + + if (stepIdx > tries * 100) { + regenerate(); + stepIdx = 0; + tries++; + } + } +} + +AApprox aberth(const Poly &poly) { + AberthState state(poly); + state.iterate(); + return state.approx; +} diff --git a/aberth/host_aberth.h b/aberth/host_aberth.h new file mode 100644 index 0000000..566a84e --- /dev/null +++ b/aberth/host_aberth.h @@ -0,0 +1,12 @@ +#pragma once + +#include <array> +#include "poly.h" + +using namespace std; + + +using AApprox = array<Com, N>; + + +AApprox aberth(const Poly &poly); diff --git a/aberth/kernel.h b/aberth/kernel.h index bd8ea32..3ab2772 100644 --- a/aberth/kernel.h +++ b/aberth/kernel.h @@ -1,6 +1,7 @@ #pragma once #include <vector> +#include <type_traits> #include "defs.h" extern "C" { diff --git a/aberth/main.cpp b/aberth/main.cpp index 1cdea3a..152640f 100644 --- a/aberth/main.cpp +++ b/aberth/main.cpp @@ -1,232 +1,20 @@ #include <iostream> #include <fstream> -#include <sstream> -#include <vector> -#include <array> #include <string> -#include <complex> -#include <random> -#include <utility> -#include <tuple> +#include <vector> #include <algorithm> -#include <thread> -#include <mutex> -#include <type_traits> +#include <tuple> #include <cstdint> +#include <cstring> #include <cassert> #include "../lodepng.h" +#include "compute_host.h" #include "defs.h" #include "kernel.h" -#include "util.h" using namespace std; -template <typename T> -static T eval(const Poly &p, int nterms, T pt) { - T value = p[nterms - 1]; - for (int i = nterms - 2; i >= 0; i--) { - value = pt * value + (double)p[i]; - } - return value; -} - -template <typename T> -static T eval(const Poly &p, T pt) { - return eval(p, p.size(), pt); -} - -static Poly derivative(const Poly &p) { - Poly res; - for (int i = res.size() - 2; i >= 0; i--) { - res[i] = (i+1) * p[i+1]; - } - return res; -} - -static double maxRootNorm(const Poly &poly) { - // Cauchy's bound: https://en.wikipedia.org/wiki/Geometrical_properties_of_polynomial_roots#Lagrange's_and_Cauchy's_bounds - - double value = 0; - double last = (double)poly.back(); - for (int i = 0; i < (int)poly.size() - 1; i++) { - value = max(value, abs(poly[i] / last)); - } - return 1 + value; -} - -static thread_local minstd_rand randgenerator = minstd_rand(random_device()()); - -struct AberthState { - const Poly &poly; - Poly deriv; - Poly boundPoly; - AApprox approx; - double radius; - - void regenerate() { - auto genCoord = [this]() { - return uniform_real_distribution<double>(-radius, radius)(randgenerator); - }; - for (int i = 0; i < N; i++) { - approx[i] = Com(genCoord(), genCoord()); - } - } - - // boundPoly is 's' in the stop condition formulated at p.189-190 of - // https://link.springer.com/article/10.1007%2FBF02207694 - - AberthState(const Poly &poly) - : poly(poly), deriv(derivative(poly)), radius(maxRootNorm(poly)) { - - regenerate(); - for (int i = 0; i <= N; i++) { - boundPoly[i] = abs(poly[i]) * (4 * i + 1); - } - } - - // Lagrange-style step where the new elements are computed in parallel from the previous values - bool step() { - array<Com, N * N> pairs; - for (int i = 0; i < N - 1; i++) { - for (int j = i + 1; j < N; j++) { - pairs[N * i + j] = 1.0 / (approx[i] - approx[j]); - } - } - - bool allConverged = true; - - AApprox newapprox; - AApprox offsets; - for (int i = 0; i < N; i++) { - Com pval = eval(poly, approx[i]); - Com derivval = eval(deriv, poly.size() - 1, approx[i]); - Com quo = pval / derivval; - Com sum = 0; - for (int j = 0; j < i; j++) sum -= pairs[N * j + i]; - for (int j = i + 1; j < N; j++) sum += pairs[N * i + j]; - offsets[i] = quo / (1.0 - quo * sum); - - // approx[i] -= offsets[i]; - newapprox[i] = approx[i] - offsets[i]; - - double sval = eval(boundPoly, abs(newapprox[i])); - if (abs(pval) > 1e-5 * sval) allConverged = false; - } - - approx = newapprox; - - return allConverged; - } - - void iterate() { - int tries = 1, stepIdx = 1; - while (!step()) { - stepIdx++; - - if (stepIdx > tries * 100) { - regenerate(); - stepIdx = 0; - tries++; - } - } - } -}; - -static AApprox aberth(const Poly &poly) { - AberthState state(poly); - state.iterate(); - return state.approx; -} - -// Returns whether we just looped around -static bool nextDerbyshire(Poly &poly) { - for (int i = 1; i < (int)poly.size(); i++) { - if (poly[i] == -1) { - poly[i] = 1; - return false; - } - poly[i] = -1; - } - return true; -} - -static Poly derbyshireAtIndex(int index) { - Poly poly; - poly[0] = 1; - for (int i = 1; i <= N; i++) { - poly[i] = index & 1 ? 1 : -1; - index >>= 1; - } - assert(index == 0); - return poly; -} - -struct Job { - Poly init; - int numItems; -}; - -static vector<Job> derbyshireJobs(int targetJobs) { - int njobs = min(1 << N, ceil2(targetJobs)); - int jobsize = (1 << N) / njobs; - - vector<Job> jobs(njobs); - for (int i = 0; i < njobs; i++) { - jobs[i].init = derbyshireAtIndex(i * jobsize); - jobs[i].numItems = jobsize; - } - - return jobs; -} - -static vector<int> computeCounts(int W, int H, Com bottomLeft, Com topRight) { - constexpr const int numThreads = 4; - static_assert(ispow2(numThreads)); - - vector<int> counts(W * H); - mutex countsMutex; - - vector<Job> jobs = derbyshireJobs(numThreads); - assert(jobs.size() == numThreads); - - vector<thread> threads(jobs.size()); - for (int i = 0; i < (int)jobs.size(); i++) { - threads[i] = thread([W, H, &counts, &countsMutex, job = jobs[i], bottomLeft, topRight]() { - auto calcIndex = [](double value, double left, double right, int steps) -> int { - return (value - left) / (right - left) * (steps - 1) + 0.5; - }; - auto calcPos = [W, H, bottomLeft, topRight, &calcIndex](Com z) -> pair<int, int> { - return make_pair( - calcIndex(z.real(), bottomLeft.real(), topRight.real(), W), - calcIndex(z.imag(), bottomLeft.imag(), topRight.imag(), H) - ); - }; - - vector<int> localCounts(W * H); - - Poly poly = job.init; - for (int i = 0; i < job.numItems; i++) { - for (Com z : aberth(poly)) { - int x, y; - tie(x, y) = calcPos(z); - if (0 <= x && x < W && 0 <= y && y < H) { - localCounts[W * y + x]++; - } - } - nextDerbyshire(poly); - } - - lock_guard<mutex> guard(countsMutex); - for (int i = 0; i < W * H; i++) counts[i] += localCounts[i]; - }); - } - - for (thread &th : threads) th.join(); - - return counts; -} - static void writeCounts(int W, int H, const vector<int> &counts, const char *fname) { ofstream f(fname); f << W << ' ' << H << '\n'; @@ -289,12 +77,12 @@ int main(int argc, char **argv) { const Com bottomLeft = Com(-1.5, -1.5); const Com topRight = Com(1.5, 1.5); - // counts = computeCounts(W, H, bottomLeft, topRight); + // counts = computeHost(W, H, bottomLeft, topRight); Kernel().run_chunked(counts, W, H, bottomLeft, topRight, 42, 1 << 14); // Kernel().run_all(counts, W, H, bottomLeft, topRight, 42); writeCounts(W, H, counts, "out.txt"); - } else if (argc == 2) { + } else if (argc == 2 && strcmp(argv[1], "-h") != 0) { tie(W, H, counts) = readCounts(argv[1]); } else { cerr << "Usage: " << argv[0] << " -- compute and draw" << endl; diff --git a/aberth/util.cpp b/aberth/poly.cpp index b8f6edc..cf19816 100644 --- a/aberth/util.cpp +++ b/aberth/poly.cpp @@ -1,4 +1,5 @@ -#include "util.h" +#include <sstream> +#include "poly.h" ostream& operator<<(ostream &os, const Poly &p) { diff --git a/aberth/poly.h b/aberth/poly.h new file mode 100644 index 0000000..82f04a2 --- /dev/null +++ b/aberth/poly.h @@ -0,0 +1,42 @@ +#pragma once + +#include "defs.h" + + +using Poly = array<int, N + 1>; + + +template <typename T> +T eval(const Poly &p, int nterms, T pt) { + T value = p[nterms - 1]; + for (int i = nterms - 2; i >= 0; i--) { + value = pt * value + (double)p[i]; + } + return value; +} + +template <typename T> +T eval(const Poly &p, T pt) { + return eval(p, p.size(), pt); +} + +inline Poly derivative(const Poly &p) { + Poly res; + for (int i = res.size() - 2; i >= 0; i--) { + res[i] = (i+1) * p[i+1]; + } + return res; +} + +inline double maxRootNorm(const Poly &poly) { + // Cauchy's bound: https://en.wikipedia.org/wiki/Geometrical_properties_of_polynomial_roots#Lagrange's_and_Cauchy's_bounds + + double value = 0; + double last = (double)poly.back(); + for (int i = 0; i < (int)poly.size() - 1; i++) { + value = max(value, abs(poly[i] / last)); + } + return 1 + value; +} + +ostream& operator<<(ostream &os, const Poly &p); diff --git a/aberth/polygen.cpp b/aberth/polygen.cpp new file mode 100644 index 0000000..6df89db --- /dev/null +++ b/aberth/polygen.cpp @@ -0,0 +1,43 @@ +#include "polygen.h" +#include "util.h" + +using namespace std; + + +namespace PolyGen::Derbyshire { + // Returns whether we just looped around + bool next(Poly &poly) { + for (int i = 1; i < (int)poly.size(); i++) { + if (poly[i] == -1) { + poly[i] = 1; + return false; + } + poly[i] = -1; + } + return true; + } + + Poly atIndex(int index) { + Poly poly; + poly[0] = 1; + for (int i = 1; i <= N; i++) { + poly[i] = index & 1 ? 1 : -1; + index >>= 1; + } + assert(index == 0); + return poly; + } + + vector<Job> genJobs(int targetJobs) { + int njobs = min(1 << N, ceil2(targetJobs)); + int jobsize = (1 << N) / njobs; + + vector<Job> jobs(njobs); + for (int i = 0; i < njobs; i++) { + jobs[i].init = atIndex(i * jobsize); + jobs[i].numItems = jobsize; + } + + return jobs; + } +} diff --git a/aberth/polygen.h b/aberth/polygen.h new file mode 100644 index 0000000..1381444 --- /dev/null +++ b/aberth/polygen.h @@ -0,0 +1,25 @@ +#pragma once + +#include <vector> +#include "poly.h" + +using namespace std; + + +namespace PolyGen { + struct Job { + Poly init; + int numItems; + }; + + namespace Derbyshire { + // Returns whether we just looped around + bool next(Poly &poly); + + Poly atIndex(int index); + + vector<Job> genJobs(int targetJobs); + } + + using namespace Derbyshire; +} diff --git a/aberth/util.h b/aberth/util.h index c21a7bb..a276e2a 100644 --- a/aberth/util.h +++ b/aberth/util.h @@ -27,5 +27,3 @@ constexpr static T ceil2(T value) { if (value2 == 0) return value << 1; } } - -ostream& operator<<(ostream &os, const Poly &p); |