aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortomsmeding <tom.smeding@gmail.com>2019-04-19 23:32:06 +0200
committertomsmeding <tom.smeding@gmail.com>2019-04-19 23:34:01 +0200
commitbe2bc936957d4fcbc2001ca48909bb2ce8d3f5c7 (patch)
tree9b2402ff15cc074e9c7be6de14affc57c735f1bf
parentbc72c77a8a3d6a82cd234d40308dd8c47aa0e742 (diff)
Working Futhark! 🎉
That was actually easier than expected, and (surprise!) it runs ORDERS OF MAGNITUDE faster than my C++ implementation... C++ takes ~1min on my i5-4278U CPU @ 2.60GHz using all 4 virtual cores; the Futhark takes ~11sec on 1 core. Not sure whether my own code is so terribly bad or whether Futhark is cool, but this is cool nevertheless.
-rw-r--r--aberth/.gitignore2
-rw-r--r--aberth/Makefile23
-rw-r--r--aberth/aberth.cpp62
-rw-r--r--aberth/aberth_kernel.fut45
4 files changed, 117 insertions, 15 deletions
diff --git a/aberth/.gitignore b/aberth/.gitignore
index 66c7e62..db0dbae 100644
--- a/aberth/.gitignore
+++ b/aberth/.gitignore
@@ -1,2 +1,4 @@
aberth
lib/
+aberth_kernel.c
+aberth_kernel.h
diff --git a/aberth/Makefile b/aberth/Makefile
index c5a67cf..f610c01 100644
--- a/aberth/Makefile
+++ b/aberth/Makefile
@@ -1,20 +1,31 @@
CXX = g++
-CXXFLAGS = -Wall -Wextra -std=c++17 -O3 -fwrapv -ffast-math -march=native -mtune=native
-LDFLAGS = -pthread
+CXXFLAGS = -Wall -Wextra -std=c++17 -fwrapv $(OPTFLAGS)
+OPTFLAGS = -O3 -ffast-math -march=native -mtune=native
+LDFLAGS = -pthread -framework OpenCL
+
+TARGETS = aberth
+
+OBJECTS = $(patsubst %.cpp,%.o,$(wildcard *.cpp))
.PHONY: all clean remake
-all: aberth
+all: $(TARGETS)
clean:
- rm -f aberth *.o
+ rm -f $(TARGETS) *.o aberth_kernel.h aberth_kernel.c
remake: clean
$(MAKE) all
-aberth: $(patsubst %.cpp,%.o,$(wildcard *.cpp)) ../lodepng.o
+aberth: $(OBJECTS) aberth_kernel.o ../lodepng.o
$(CXX) -o $@ $^ $(LDFLAGS)
-%.o: %.cpp $(wildcard *.h)
+aberth_kernel.h: aberth_kernel.fut
+ futhark c --library $^
+
+aberth_kernel.o: aberth_kernel.h
+ gcc -c $(OPTFLAGS) -o $@ aberth_kernel.c
+
+$(OBJECTS): %.o: %.cpp $(wildcard *.h) aberth_kernel.h
$(CXX) $(CXXFLAGS) -c -o $@ $<
diff --git a/aberth/aberth.cpp b/aberth/aberth.cpp
index 2adf207..d002ba4 100644
--- a/aberth/aberth.cpp
+++ b/aberth/aberth.cpp
@@ -16,6 +16,10 @@
#include <cassert>
#include "../lodepng.h"
+extern "C" {
+#include "aberth_kernel.h"
+}
+
using namespace std;
@@ -132,7 +136,7 @@ struct AberthState {
}
}
- // Gauss-Seidel-style step where the updated values are already used in the current iteration
+ // Lagrange-style step where the new elements are computed in parallel from the previous values
bool step() {
array<Com, N * N> pairs;
for (int i = 0; i < N - 1; i++) {
@@ -143,6 +147,7 @@ struct AberthState {
bool allConverged = true;
+ AApprox newapprox;
AApprox offsets;
for (int i = 0; i < N; i++) {
Com pval = eval(poly, approx[i]);
@@ -153,12 +158,15 @@ struct AberthState {
for (int j = i + 1; j < N; j++) sum += pairs[N * i + j];
offsets[i] = quo / (1.0 - quo * sum);
- approx[i] -= offsets[i];
+ // approx[i] -= offsets[i];
+ newapprox[i] = approx[i] - offsets[i];
- double sval = eval(boundPoly, abs(approx[i]));
+ double sval = eval(boundPoly, abs(newapprox[i]));
if (abs(pval) > 1e-9 * sval) allConverged = false;
}
+ approx = newapprox;
+
return allConverged;
}
@@ -332,6 +340,48 @@ static vector<uint8_t> drawImage(int W, int H, const vector<int> &counts, int ma
return image;
}
+static vector<int32_t> invoke_kernel(
+ int32_t width, int32_t height,
+ double left, double top, double right, double bottom,
+ int32_t seed) {
+
+ futhark_context_config *config = futhark_context_config_new();
+ // futhark_context_config_select_device_interactively(config);
+
+ futhark_context *ctx = futhark_context_new(config);
+
+ auto check_ret = [ctx](int ret) {
+ if (ret != 0) {
+ char *str = futhark_context_get_error(ctx);
+ cerr << str << endl;
+ free(str);
+ exit(1);
+ }
+ };
+
+ futhark_i32_1d *dest_arr;
+
+ check_ret(futhark_entry_main_all(
+ ctx, &dest_arr,
+ width, height,
+ left, top, right, bottom,
+ seed));
+
+ check_ret(futhark_context_sync(ctx));
+
+ // Shouldn't free _this_ pointer, apparently
+ int64_t *shape = futhark_shape_i32_1d(ctx, dest_arr);
+ assert(shape[0] == width * height);
+
+ vector<int32_t> buffer(width * height);
+ check_ret(futhark_values_i32_1d(ctx, dest_arr, buffer.data()));
+ check_ret(futhark_free_i32_1d(ctx, dest_arr));
+
+ futhark_context_config_free(config);
+
+ return buffer;
+}
+
int main(int argc, char **argv) {
srandomdev();
@@ -339,7 +389,11 @@ int main(int argc, char **argv) {
vector<int> counts;
if (argc <= 1) {
- tie(W, H, counts) = computeCounts();
+ // tie(W, H, counts) = computeCounts();
+ W = H = 900;
+ Com bottomLeft = Com(-1.5, -1.5);
+ Com topRight = Com(1.5, 1.5);
+ counts = invoke_kernel(W, H, bottomLeft.real(), topRight.imag(), topRight.real(), bottomLeft.imag(), 42);
writeCounts(W, H, counts, "out.txt");
} else if (argc == 2) {
tie(W, H, counts) = readCounts(argv[1]);
diff --git a/aberth/aberth_kernel.fut b/aberth/aberth_kernel.fut
index a52d8b6..815c254 100644
--- a/aberth/aberth_kernel.fut
+++ b/aberth/aberth_kernel.fut
@@ -29,7 +29,7 @@ let evaln_d (p: poly) (nterms: i32) (pt: f64): f64 =
let eval_d (p: poly) (pt: f64): f64 = evaln_d p (length p) pt
let derivative (p: poly): *poly =
- map (\(i, v) -> f64.i32 i * v) (zip (1..<PolyN) (take (PolyN - 1) p)) ++ [0]
+ map (\(i, v) -> f64.i32 i * v) (zip (1..<PolyN) (drop 1 p)) ++ [0]
-- Cauchy's bound: https://en.wikipedia.org/wiki/Geometrical_properties_of_polynomial_roots#Lagrange's_and_Cauchy's_bounds
let max_root_norm (p: poly): f64 =
@@ -37,8 +37,8 @@ let max_root_norm (p: poly): f64 =
module aberth = {
type approx = [N]complex
- -- bound is 's' in the stop condition formulated at p.189-190 of
- -- https://link.springer.com/article/10.1007%2FBF02207694
+ -- bound is 's' in the stop condition formulated at p.189-190 of
+ -- https://link.springer.com/article/10.1007%2FBF02207694
type context = {p: poly, deriv: poly, bound: poly, radius: f64}
let gen_coord (r: f64) (rng: *rand_engine.rng): *(rand_engine.rng, f64) =
@@ -82,7 +82,7 @@ module aberth = {
quos sums
let approx = map2 (c64.-) approx offsets
let svals = map (eval_d ctx.bound <-< c64.mag) approx
- let conditions = map2 (\p s -> c64.mag p > 1e-9 * s) pvals svals
+ let conditions = map2 (\p s -> c64.mag p <= 1e-9 * s) pvals svals
let all_converged = all id conditions
in (all_converged, approx)
@@ -123,4 +123,39 @@ let derbyshire_at_index (index: i32): *poly =
in tabulate PolyN (\i -> f64.i32 (i32.get_bit i bitfield * 2 - 1))
-entry main: i32 = 42
+let point_index
+ (width: i32) (height: i32)
+ (top_left: complex) (bottom_right: complex)
+ (pt: complex)
+ : i32 =
+ let x = (c64.re pt - c64.re top_left) / (c64.re bottom_right - c64.re top_left)
+ let y = (c64.im pt - c64.im top_left) / (c64.im bottom_right - c64.im top_left)
+ let xi = t64 (x * r64 (width - 1))
+ let yi = t64 (y * r64 (height - 1))
+ in width * yi + xi
+
+entry main_job
+ (start_index: i32) (num_polys: i32)
+ (width: i32) (height: i32)
+ (left: f64) (top: f64) (right: f64) (bottom: f64)
+ (seed: i32)
+ : []i32 =
+ -- Unnecessary to give each polynomial a different seed
+ let rng = rand_engine.rng_from_seed [seed]
+ let top_left = c64.mk left top
+ let bottom_right = c64.mk right bottom
+ let indices = flatten
+ (map (\idx ->
+ let p = derbyshire_at_index idx
+ let (_, pts) = aberth.aberth p rng
+ let indices = map (point_index width height top_left bottom_right) pts
+ in indices)
+ (start_index ..< start_index + num_polys))
+ in reduce_by_index (replicate (width * height) 0) (+) 0 indices (replicate (length indices) 1)
+
+entry main_all
+ (width: i32) (height: i32)
+ (left: f64) (top: f64) (right: f64) (bottom: f64)
+ (seed: i32)
+ : []i32 =
+ main_job 0 (1 << N) width height left top right bottom seed