diff options
author | tomsmeding <tom.smeding@gmail.com> | 2019-04-19 23:32:06 +0200 |
---|---|---|
committer | tomsmeding <tom.smeding@gmail.com> | 2019-04-19 23:34:01 +0200 |
commit | be2bc936957d4fcbc2001ca48909bb2ce8d3f5c7 (patch) | |
tree | 9b2402ff15cc074e9c7be6de14affc57c735f1bf | |
parent | bc72c77a8a3d6a82cd234d40308dd8c47aa0e742 (diff) |
Working Futhark! 🎉
That was actually easier than expected, and (surprise!) it runs ORDERS
OF MAGNITUDE faster than my C++ implementation...
C++ takes ~1min on my i5-4278U CPU @ 2.60GHz using all 4 virtual cores;
the Futhark takes ~11sec on 1 core. Not sure whether my own code is so
terribly bad or whether Futhark is cool, but this is cool nevertheless.
-rw-r--r-- | aberth/.gitignore | 2 | ||||
-rw-r--r-- | aberth/Makefile | 23 | ||||
-rw-r--r-- | aberth/aberth.cpp | 62 | ||||
-rw-r--r-- | aberth/aberth_kernel.fut | 45 |
4 files changed, 117 insertions, 15 deletions
diff --git a/aberth/.gitignore b/aberth/.gitignore index 66c7e62..db0dbae 100644 --- a/aberth/.gitignore +++ b/aberth/.gitignore @@ -1,2 +1,4 @@ aberth lib/ +aberth_kernel.c +aberth_kernel.h diff --git a/aberth/Makefile b/aberth/Makefile index c5a67cf..f610c01 100644 --- a/aberth/Makefile +++ b/aberth/Makefile @@ -1,20 +1,31 @@ CXX = g++ -CXXFLAGS = -Wall -Wextra -std=c++17 -O3 -fwrapv -ffast-math -march=native -mtune=native -LDFLAGS = -pthread +CXXFLAGS = -Wall -Wextra -std=c++17 -fwrapv $(OPTFLAGS) +OPTFLAGS = -O3 -ffast-math -march=native -mtune=native +LDFLAGS = -pthread -framework OpenCL + +TARGETS = aberth + +OBJECTS = $(patsubst %.cpp,%.o,$(wildcard *.cpp)) .PHONY: all clean remake -all: aberth +all: $(TARGETS) clean: - rm -f aberth *.o + rm -f $(TARGETS) *.o aberth_kernel.h aberth_kernel.c remake: clean $(MAKE) all -aberth: $(patsubst %.cpp,%.o,$(wildcard *.cpp)) ../lodepng.o +aberth: $(OBJECTS) aberth_kernel.o ../lodepng.o $(CXX) -o $@ $^ $(LDFLAGS) -%.o: %.cpp $(wildcard *.h) +aberth_kernel.h: aberth_kernel.fut + futhark c --library $^ + +aberth_kernel.o: aberth_kernel.h + gcc -c $(OPTFLAGS) -o $@ aberth_kernel.c + +$(OBJECTS): %.o: %.cpp $(wildcard *.h) aberth_kernel.h $(CXX) $(CXXFLAGS) -c -o $@ $< diff --git a/aberth/aberth.cpp b/aberth/aberth.cpp index 2adf207..d002ba4 100644 --- a/aberth/aberth.cpp +++ b/aberth/aberth.cpp @@ -16,6 +16,10 @@ #include <cassert> #include "../lodepng.h" +extern "C" { +#include "aberth_kernel.h" +} + using namespace std; @@ -132,7 +136,7 @@ struct AberthState { } } - // Gauss-Seidel-style step where the updated values are already used in the current iteration + // Lagrange-style step where the new elements are computed in parallel from the previous values bool step() { array<Com, N * N> pairs; for (int i = 0; i < N - 1; i++) { @@ -143,6 +147,7 @@ struct AberthState { bool allConverged = true; + AApprox newapprox; AApprox offsets; for (int i = 0; i < N; i++) { Com pval = eval(poly, approx[i]); @@ -153,12 +158,15 @@ struct AberthState { for (int j = i + 1; j < N; j++) sum += pairs[N * i + j]; offsets[i] = quo / (1.0 - quo * sum); - approx[i] -= offsets[i]; + // approx[i] -= offsets[i]; + newapprox[i] = approx[i] - offsets[i]; - double sval = eval(boundPoly, abs(approx[i])); + double sval = eval(boundPoly, abs(newapprox[i])); if (abs(pval) > 1e-9 * sval) allConverged = false; } + approx = newapprox; + return allConverged; } @@ -332,6 +340,48 @@ static vector<uint8_t> drawImage(int W, int H, const vector<int> &counts, int ma return image; } +static vector<int32_t> invoke_kernel( + int32_t width, int32_t height, + double left, double top, double right, double bottom, + int32_t seed) { + + futhark_context_config *config = futhark_context_config_new(); + // futhark_context_config_select_device_interactively(config); + + futhark_context *ctx = futhark_context_new(config); + + auto check_ret = [ctx](int ret) { + if (ret != 0) { + char *str = futhark_context_get_error(ctx); + cerr << str << endl; + free(str); + exit(1); + } + }; + + futhark_i32_1d *dest_arr; + + check_ret(futhark_entry_main_all( + ctx, &dest_arr, + width, height, + left, top, right, bottom, + seed)); + + check_ret(futhark_context_sync(ctx)); + + // Shouldn't free _this_ pointer, apparently + int64_t *shape = futhark_shape_i32_1d(ctx, dest_arr); + assert(shape[0] == width * height); + + vector<int32_t> buffer(width * height); + check_ret(futhark_values_i32_1d(ctx, dest_arr, buffer.data())); + check_ret(futhark_free_i32_1d(ctx, dest_arr)); + + futhark_context_config_free(config); + + return buffer; +} + int main(int argc, char **argv) { srandomdev(); @@ -339,7 +389,11 @@ int main(int argc, char **argv) { vector<int> counts; if (argc <= 1) { - tie(W, H, counts) = computeCounts(); + // tie(W, H, counts) = computeCounts(); + W = H = 900; + Com bottomLeft = Com(-1.5, -1.5); + Com topRight = Com(1.5, 1.5); + counts = invoke_kernel(W, H, bottomLeft.real(), topRight.imag(), topRight.real(), bottomLeft.imag(), 42); writeCounts(W, H, counts, "out.txt"); } else if (argc == 2) { tie(W, H, counts) = readCounts(argv[1]); diff --git a/aberth/aberth_kernel.fut b/aberth/aberth_kernel.fut index a52d8b6..815c254 100644 --- a/aberth/aberth_kernel.fut +++ b/aberth/aberth_kernel.fut @@ -29,7 +29,7 @@ let evaln_d (p: poly) (nterms: i32) (pt: f64): f64 = let eval_d (p: poly) (pt: f64): f64 = evaln_d p (length p) pt let derivative (p: poly): *poly = - map (\(i, v) -> f64.i32 i * v) (zip (1..<PolyN) (take (PolyN - 1) p)) ++ [0] + map (\(i, v) -> f64.i32 i * v) (zip (1..<PolyN) (drop 1 p)) ++ [0] -- Cauchy's bound: https://en.wikipedia.org/wiki/Geometrical_properties_of_polynomial_roots#Lagrange's_and_Cauchy's_bounds let max_root_norm (p: poly): f64 = @@ -37,8 +37,8 @@ let max_root_norm (p: poly): f64 = module aberth = { type approx = [N]complex - -- bound is 's' in the stop condition formulated at p.189-190 of - -- https://link.springer.com/article/10.1007%2FBF02207694 + -- bound is 's' in the stop condition formulated at p.189-190 of + -- https://link.springer.com/article/10.1007%2FBF02207694 type context = {p: poly, deriv: poly, bound: poly, radius: f64} let gen_coord (r: f64) (rng: *rand_engine.rng): *(rand_engine.rng, f64) = @@ -82,7 +82,7 @@ module aberth = { quos sums let approx = map2 (c64.-) approx offsets let svals = map (eval_d ctx.bound <-< c64.mag) approx - let conditions = map2 (\p s -> c64.mag p > 1e-9 * s) pvals svals + let conditions = map2 (\p s -> c64.mag p <= 1e-9 * s) pvals svals let all_converged = all id conditions in (all_converged, approx) @@ -123,4 +123,39 @@ let derbyshire_at_index (index: i32): *poly = in tabulate PolyN (\i -> f64.i32 (i32.get_bit i bitfield * 2 - 1)) -entry main: i32 = 42 +let point_index + (width: i32) (height: i32) + (top_left: complex) (bottom_right: complex) + (pt: complex) + : i32 = + let x = (c64.re pt - c64.re top_left) / (c64.re bottom_right - c64.re top_left) + let y = (c64.im pt - c64.im top_left) / (c64.im bottom_right - c64.im top_left) + let xi = t64 (x * r64 (width - 1)) + let yi = t64 (y * r64 (height - 1)) + in width * yi + xi + +entry main_job + (start_index: i32) (num_polys: i32) + (width: i32) (height: i32) + (left: f64) (top: f64) (right: f64) (bottom: f64) + (seed: i32) + : []i32 = + -- Unnecessary to give each polynomial a different seed + let rng = rand_engine.rng_from_seed [seed] + let top_left = c64.mk left top + let bottom_right = c64.mk right bottom + let indices = flatten + (map (\idx -> + let p = derbyshire_at_index idx + let (_, pts) = aberth.aberth p rng + let indices = map (point_index width height top_left bottom_right) pts + in indices) + (start_index ..< start_index + num_polys)) + in reduce_by_index (replicate (width * height) 0) (+) 0 indices (replicate (length indices) 1) + +entry main_all + (width: i32) (height: i32) + (left: f64) (top: f64) (right: f64) (bottom: f64) + (seed: i32) + : []i32 = + main_job 0 (1 << N) width height left top right bottom seed |