aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortomsmeding <tom.smeding@gmail.com>2019-04-20 21:15:46 +0200
committertomsmeding <tom.smeding@gmail.com>2019-04-20 21:16:10 +0200
commit284ce56948aae60134a1073c6499c7fb97ffb326 (patch)
tree174a34d94072f4ee819a89101ef6ff182e5bcf4b
parentccade8e6ed96fb48b329d22aaa4c8d0826b3c8d1 (diff)
More reorganisation
-rw-r--r--aberth/Makefile6
-rw-r--r--aberth/compute_host.cpp58
-rw-r--r--aberth/compute_host.h9
-rw-r--r--aberth/defs.h2
-rw-r--r--aberth/host_aberth.cpp96
-rw-r--r--aberth/host_aberth.h12
-rw-r--r--aberth/kernel.h1
-rw-r--r--aberth/main.cpp224
-rw-r--r--aberth/poly.cpp (renamed from aberth/util.cpp)3
-rw-r--r--aberth/poly.h42
-rw-r--r--aberth/polygen.cpp43
-rw-r--r--aberth/polygen.h25
-rw-r--r--aberth/util.h2
13 files changed, 297 insertions, 226 deletions
diff --git a/aberth/Makefile b/aberth/Makefile
index 99f0e16..cd59309 100644
--- a/aberth/Makefile
+++ b/aberth/Makefile
@@ -1,6 +1,6 @@
CXX = g++
-CXXFLAGS = -Wall -Wextra -std=c++17 -fwrapv $(OPTFLAGS)
-OPTFLAGS = -O3 -ffast-math -march=native -mtune=native
+CXXFLAGS = -Wall -Wextra -std=c++17 $(OPTFLAGS)
+OPTFLAGS = -O3 -ffast-math -march=native -mtune=native -flto
LDFLAGS = -pthread
ifeq ($(shell uname),Darwin)
LDFLAGS += -framework OpenCL
@@ -24,7 +24,7 @@ remake: clean
aberth: $(OBJECTS) aberth_kernel.o ../lodepng.o
- $(CXX) -o $@ $^ $(LDFLAGS)
+ $(CXX) $(CXXFLAGS) -o $@ $^ $(LDFLAGS)
aberth_kernel.h: aberth_kernel.fut
futhark opencl --library $^
diff --git a/aberth/compute_host.cpp b/aberth/compute_host.cpp
new file mode 100644
index 0000000..38bf43d
--- /dev/null
+++ b/aberth/compute_host.cpp
@@ -0,0 +1,58 @@
+#include <thread>
+#include <mutex>
+#include <tuple>
+#include <cassert>
+#include "compute_host.h"
+#include "host_aberth.h"
+#include "polygen.h"
+#include "util.h"
+
+using namespace std;
+
+
+vector<int> computeHost(int W, int H, Com bottomLeft, Com topRight) {
+ constexpr const int numThreads = 4;
+ static_assert(ispow2(numThreads));
+
+ vector<int> counts(W * H);
+ mutex countsMutex;
+
+ vector<PolyGen::Job> jobs = PolyGen::genJobs(numThreads);
+ assert(jobs.size() == numThreads);
+
+ vector<thread> threads(jobs.size());
+ for (int i = 0; i < (int)jobs.size(); i++) {
+ threads[i] = thread([W, H, &counts, &countsMutex, job = jobs[i], bottomLeft, topRight]() {
+ auto calcIndex = [](double value, double left, double right, int steps) -> int {
+ return (value - left) / (right - left) * (steps - 1) + 0.5;
+ };
+ auto calcPos = [W, H, bottomLeft, topRight, &calcIndex](Com z) -> pair<int, int> {
+ return make_pair(
+ calcIndex(z.real(), bottomLeft.real(), topRight.real(), W),
+ calcIndex(z.imag(), bottomLeft.imag(), topRight.imag(), H)
+ );
+ };
+
+ vector<int> localCounts(W * H);
+
+ Poly poly = job.init;
+ for (int i = 0; i < job.numItems; i++) {
+ for (Com z : aberth(poly)) {
+ int x, y;
+ tie(x, y) = calcPos(z);
+ if (0 <= x && x < W && 0 <= y && y < H) {
+ localCounts[W * y + x]++;
+ }
+ }
+ PolyGen::next(poly);
+ }
+
+ lock_guard<mutex> guard(countsMutex);
+ for (int i = 0; i < W * H; i++) counts[i] += localCounts[i];
+ });
+ }
+
+ for (thread &th : threads) th.join();
+
+ return counts;
+}
diff --git a/aberth/compute_host.h b/aberth/compute_host.h
new file mode 100644
index 0000000..7a2ad36
--- /dev/null
+++ b/aberth/compute_host.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <vector>
+#include "defs.h"
+
+using namespace std;
+
+
+vector<int> computeHost(int W, int H, Com bottomLeft, Com topRight);
diff --git a/aberth/defs.h b/aberth/defs.h
index 40eacf0..41d1f89 100644
--- a/aberth/defs.h
+++ b/aberth/defs.h
@@ -9,5 +9,3 @@ using namespace std;
constexpr const int N = 18;
using Com = complex<double>;
-using Poly = array<int, N + 1>;
-using AApprox = array<Com, N>;
diff --git a/aberth/host_aberth.cpp b/aberth/host_aberth.cpp
new file mode 100644
index 0000000..c567f30
--- /dev/null
+++ b/aberth/host_aberth.cpp
@@ -0,0 +1,96 @@
+#include <random>
+#include <cmath>
+#include "host_aberth.h"
+
+using namespace std;
+
+
+struct AberthState {
+ // boundPoly is 's' in the stop condition formulated at p.189-190 of
+ // https://link.springer.com/article/10.1007%2FBF02207694
+
+ const Poly &poly;
+ Poly deriv;
+ Poly boundPoly;
+ AApprox approx;
+ double radius;
+
+ AberthState(const Poly &poly);
+ void regenerate();
+ bool step();
+ void iterate();
+};
+
+static thread_local minstd_rand randgenerator = minstd_rand(random_device()());
+
+AberthState::AberthState(const Poly &poly)
+ : poly(poly), deriv(derivative(poly)), radius(maxRootNorm(poly)) {
+
+ regenerate();
+ for (int i = 0; i <= N; i++) {
+ boundPoly[i] = abs(poly[i]) * (4 * i + 1);
+ }
+}
+
+void AberthState::regenerate() {
+ auto genCoord = [this]() {
+ return uniform_real_distribution<double>(-radius, radius)(randgenerator);
+ };
+
+ for (int i = 0; i < N; i++) {
+ approx[i] = Com(genCoord(), genCoord());
+ }
+}
+
+// Lagrange-style step where the new elements are computed in parallel from the previous values
+bool AberthState::step() {
+ array<Com, N * N> pairs;
+ for (int i = 0; i < N - 1; i++) {
+ for (int j = i + 1; j < N; j++) {
+ pairs[N * i + j] = 1.0 / (approx[i] - approx[j]);
+ }
+ }
+
+ bool allConverged = true;
+
+ AApprox newapprox;
+ AApprox offsets;
+ for (int i = 0; i < N; i++) {
+ Com pval = eval(poly, approx[i]);
+ Com derivval = eval(deriv, poly.size() - 1, approx[i]);
+ Com quo = pval / derivval;
+ Com sum = 0;
+ for (int j = 0; j < i; j++) sum -= pairs[N * j + i];
+ for (int j = i + 1; j < N; j++) sum += pairs[N * i + j];
+ offsets[i] = quo / (1.0 - quo * sum);
+
+ newapprox[i] = approx[i] - offsets[i];
+
+ double sval = eval(boundPoly, abs(newapprox[i]));
+ if (abs(pval) > 1e-5 * sval) allConverged = false;
+ }
+
+ approx = newapprox;
+
+ return allConverged;
+}
+
+void AberthState::iterate() {
+ int tries = 1, stepIdx = 1;
+
+ while (!step()) {
+ stepIdx++;
+
+ if (stepIdx > tries * 100) {
+ regenerate();
+ stepIdx = 0;
+ tries++;
+ }
+ }
+}
+
+AApprox aberth(const Poly &poly) {
+ AberthState state(poly);
+ state.iterate();
+ return state.approx;
+}
diff --git a/aberth/host_aberth.h b/aberth/host_aberth.h
new file mode 100644
index 0000000..566a84e
--- /dev/null
+++ b/aberth/host_aberth.h
@@ -0,0 +1,12 @@
+#pragma once
+
+#include <array>
+#include "poly.h"
+
+using namespace std;
+
+
+using AApprox = array<Com, N>;
+
+
+AApprox aberth(const Poly &poly);
diff --git a/aberth/kernel.h b/aberth/kernel.h
index bd8ea32..3ab2772 100644
--- a/aberth/kernel.h
+++ b/aberth/kernel.h
@@ -1,6 +1,7 @@
#pragma once
#include <vector>
+#include <type_traits>
#include "defs.h"
extern "C" {
diff --git a/aberth/main.cpp b/aberth/main.cpp
index 1cdea3a..152640f 100644
--- a/aberth/main.cpp
+++ b/aberth/main.cpp
@@ -1,232 +1,20 @@
#include <iostream>
#include <fstream>
-#include <sstream>
-#include <vector>
-#include <array>
#include <string>
-#include <complex>
-#include <random>
-#include <utility>
-#include <tuple>
+#include <vector>
#include <algorithm>
-#include <thread>
-#include <mutex>
-#include <type_traits>
+#include <tuple>
#include <cstdint>
+#include <cstring>
#include <cassert>
#include "../lodepng.h"
+#include "compute_host.h"
#include "defs.h"
#include "kernel.h"
-#include "util.h"
using namespace std;
-template <typename T>
-static T eval(const Poly &p, int nterms, T pt) {
- T value = p[nterms - 1];
- for (int i = nterms - 2; i >= 0; i--) {
- value = pt * value + (double)p[i];
- }
- return value;
-}
-
-template <typename T>
-static T eval(const Poly &p, T pt) {
- return eval(p, p.size(), pt);
-}
-
-static Poly derivative(const Poly &p) {
- Poly res;
- for (int i = res.size() - 2; i >= 0; i--) {
- res[i] = (i+1) * p[i+1];
- }
- return res;
-}
-
-static double maxRootNorm(const Poly &poly) {
- // Cauchy's bound: https://en.wikipedia.org/wiki/Geometrical_properties_of_polynomial_roots#Lagrange's_and_Cauchy's_bounds
-
- double value = 0;
- double last = (double)poly.back();
- for (int i = 0; i < (int)poly.size() - 1; i++) {
- value = max(value, abs(poly[i] / last));
- }
- return 1 + value;
-}
-
-static thread_local minstd_rand randgenerator = minstd_rand(random_device()());
-
-struct AberthState {
- const Poly &poly;
- Poly deriv;
- Poly boundPoly;
- AApprox approx;
- double radius;
-
- void regenerate() {
- auto genCoord = [this]() {
- return uniform_real_distribution<double>(-radius, radius)(randgenerator);
- };
- for (int i = 0; i < N; i++) {
- approx[i] = Com(genCoord(), genCoord());
- }
- }
-
- // boundPoly is 's' in the stop condition formulated at p.189-190 of
- // https://link.springer.com/article/10.1007%2FBF02207694
-
- AberthState(const Poly &poly)
- : poly(poly), deriv(derivative(poly)), radius(maxRootNorm(poly)) {
-
- regenerate();
- for (int i = 0; i <= N; i++) {
- boundPoly[i] = abs(poly[i]) * (4 * i + 1);
- }
- }
-
- // Lagrange-style step where the new elements are computed in parallel from the previous values
- bool step() {
- array<Com, N * N> pairs;
- for (int i = 0; i < N - 1; i++) {
- for (int j = i + 1; j < N; j++) {
- pairs[N * i + j] = 1.0 / (approx[i] - approx[j]);
- }
- }
-
- bool allConverged = true;
-
- AApprox newapprox;
- AApprox offsets;
- for (int i = 0; i < N; i++) {
- Com pval = eval(poly, approx[i]);
- Com derivval = eval(deriv, poly.size() - 1, approx[i]);
- Com quo = pval / derivval;
- Com sum = 0;
- for (int j = 0; j < i; j++) sum -= pairs[N * j + i];
- for (int j = i + 1; j < N; j++) sum += pairs[N * i + j];
- offsets[i] = quo / (1.0 - quo * sum);
-
- // approx[i] -= offsets[i];
- newapprox[i] = approx[i] - offsets[i];
-
- double sval = eval(boundPoly, abs(newapprox[i]));
- if (abs(pval) > 1e-5 * sval) allConverged = false;
- }
-
- approx = newapprox;
-
- return allConverged;
- }
-
- void iterate() {
- int tries = 1, stepIdx = 1;
- while (!step()) {
- stepIdx++;
-
- if (stepIdx > tries * 100) {
- regenerate();
- stepIdx = 0;
- tries++;
- }
- }
- }
-};
-
-static AApprox aberth(const Poly &poly) {
- AberthState state(poly);
- state.iterate();
- return state.approx;
-}
-
-// Returns whether we just looped around
-static bool nextDerbyshire(Poly &poly) {
- for (int i = 1; i < (int)poly.size(); i++) {
- if (poly[i] == -1) {
- poly[i] = 1;
- return false;
- }
- poly[i] = -1;
- }
- return true;
-}
-
-static Poly derbyshireAtIndex(int index) {
- Poly poly;
- poly[0] = 1;
- for (int i = 1; i <= N; i++) {
- poly[i] = index & 1 ? 1 : -1;
- index >>= 1;
- }
- assert(index == 0);
- return poly;
-}
-
-struct Job {
- Poly init;
- int numItems;
-};
-
-static vector<Job> derbyshireJobs(int targetJobs) {
- int njobs = min(1 << N, ceil2(targetJobs));
- int jobsize = (1 << N) / njobs;
-
- vector<Job> jobs(njobs);
- for (int i = 0; i < njobs; i++) {
- jobs[i].init = derbyshireAtIndex(i * jobsize);
- jobs[i].numItems = jobsize;
- }
-
- return jobs;
-}
-
-static vector<int> computeCounts(int W, int H, Com bottomLeft, Com topRight) {
- constexpr const int numThreads = 4;
- static_assert(ispow2(numThreads));
-
- vector<int> counts(W * H);
- mutex countsMutex;
-
- vector<Job> jobs = derbyshireJobs(numThreads);
- assert(jobs.size() == numThreads);
-
- vector<thread> threads(jobs.size());
- for (int i = 0; i < (int)jobs.size(); i++) {
- threads[i] = thread([W, H, &counts, &countsMutex, job = jobs[i], bottomLeft, topRight]() {
- auto calcIndex = [](double value, double left, double right, int steps) -> int {
- return (value - left) / (right - left) * (steps - 1) + 0.5;
- };
- auto calcPos = [W, H, bottomLeft, topRight, &calcIndex](Com z) -> pair<int, int> {
- return make_pair(
- calcIndex(z.real(), bottomLeft.real(), topRight.real(), W),
- calcIndex(z.imag(), bottomLeft.imag(), topRight.imag(), H)
- );
- };
-
- vector<int> localCounts(W * H);
-
- Poly poly = job.init;
- for (int i = 0; i < job.numItems; i++) {
- for (Com z : aberth(poly)) {
- int x, y;
- tie(x, y) = calcPos(z);
- if (0 <= x && x < W && 0 <= y && y < H) {
- localCounts[W * y + x]++;
- }
- }
- nextDerbyshire(poly);
- }
-
- lock_guard<mutex> guard(countsMutex);
- for (int i = 0; i < W * H; i++) counts[i] += localCounts[i];
- });
- }
-
- for (thread &th : threads) th.join();
-
- return counts;
-}
-
static void writeCounts(int W, int H, const vector<int> &counts, const char *fname) {
ofstream f(fname);
f << W << ' ' << H << '\n';
@@ -289,12 +77,12 @@ int main(int argc, char **argv) {
const Com bottomLeft = Com(-1.5, -1.5);
const Com topRight = Com(1.5, 1.5);
- // counts = computeCounts(W, H, bottomLeft, topRight);
+ // counts = computeHost(W, H, bottomLeft, topRight);
Kernel().run_chunked(counts, W, H, bottomLeft, topRight, 42, 1 << 14);
// Kernel().run_all(counts, W, H, bottomLeft, topRight, 42);
writeCounts(W, H, counts, "out.txt");
- } else if (argc == 2) {
+ } else if (argc == 2 && strcmp(argv[1], "-h") != 0) {
tie(W, H, counts) = readCounts(argv[1]);
} else {
cerr << "Usage: " << argv[0] << " -- compute and draw" << endl;
diff --git a/aberth/util.cpp b/aberth/poly.cpp
index b8f6edc..cf19816 100644
--- a/aberth/util.cpp
+++ b/aberth/poly.cpp
@@ -1,4 +1,5 @@
-#include "util.h"
+#include <sstream>
+#include "poly.h"
ostream& operator<<(ostream &os, const Poly &p) {
diff --git a/aberth/poly.h b/aberth/poly.h
new file mode 100644
index 0000000..82f04a2
--- /dev/null
+++ b/aberth/poly.h
@@ -0,0 +1,42 @@
+#pragma once
+
+#include "defs.h"
+
+
+using Poly = array<int, N + 1>;
+
+
+template <typename T>
+T eval(const Poly &p, int nterms, T pt) {
+ T value = p[nterms - 1];
+ for (int i = nterms - 2; i >= 0; i--) {
+ value = pt * value + (double)p[i];
+ }
+ return value;
+}
+
+template <typename T>
+T eval(const Poly &p, T pt) {
+ return eval(p, p.size(), pt);
+}
+
+inline Poly derivative(const Poly &p) {
+ Poly res;
+ for (int i = res.size() - 2; i >= 0; i--) {
+ res[i] = (i+1) * p[i+1];
+ }
+ return res;
+}
+
+inline double maxRootNorm(const Poly &poly) {
+ // Cauchy's bound: https://en.wikipedia.org/wiki/Geometrical_properties_of_polynomial_roots#Lagrange's_and_Cauchy's_bounds
+
+ double value = 0;
+ double last = (double)poly.back();
+ for (int i = 0; i < (int)poly.size() - 1; i++) {
+ value = max(value, abs(poly[i] / last));
+ }
+ return 1 + value;
+}
+
+ostream& operator<<(ostream &os, const Poly &p);
diff --git a/aberth/polygen.cpp b/aberth/polygen.cpp
new file mode 100644
index 0000000..6df89db
--- /dev/null
+++ b/aberth/polygen.cpp
@@ -0,0 +1,43 @@
+#include "polygen.h"
+#include "util.h"
+
+using namespace std;
+
+
+namespace PolyGen::Derbyshire {
+ // Returns whether we just looped around
+ bool next(Poly &poly) {
+ for (int i = 1; i < (int)poly.size(); i++) {
+ if (poly[i] == -1) {
+ poly[i] = 1;
+ return false;
+ }
+ poly[i] = -1;
+ }
+ return true;
+ }
+
+ Poly atIndex(int index) {
+ Poly poly;
+ poly[0] = 1;
+ for (int i = 1; i <= N; i++) {
+ poly[i] = index & 1 ? 1 : -1;
+ index >>= 1;
+ }
+ assert(index == 0);
+ return poly;
+ }
+
+ vector<Job> genJobs(int targetJobs) {
+ int njobs = min(1 << N, ceil2(targetJobs));
+ int jobsize = (1 << N) / njobs;
+
+ vector<Job> jobs(njobs);
+ for (int i = 0; i < njobs; i++) {
+ jobs[i].init = atIndex(i * jobsize);
+ jobs[i].numItems = jobsize;
+ }
+
+ return jobs;
+ }
+}
diff --git a/aberth/polygen.h b/aberth/polygen.h
new file mode 100644
index 0000000..1381444
--- /dev/null
+++ b/aberth/polygen.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include <vector>
+#include "poly.h"
+
+using namespace std;
+
+
+namespace PolyGen {
+ struct Job {
+ Poly init;
+ int numItems;
+ };
+
+ namespace Derbyshire {
+ // Returns whether we just looped around
+ bool next(Poly &poly);
+
+ Poly atIndex(int index);
+
+ vector<Job> genJobs(int targetJobs);
+ }
+
+ using namespace Derbyshire;
+}
diff --git a/aberth/util.h b/aberth/util.h
index c21a7bb..a276e2a 100644
--- a/aberth/util.h
+++ b/aberth/util.h
@@ -27,5 +27,3 @@ constexpr static T ceil2(T value) {
if (value2 == 0) return value << 1;
}
}
-
-ostream& operator<<(ostream &os, const Poly &p);