diff options
| author | tomsmeding <tom.smeding@gmail.com> | 2019-04-20 21:15:46 +0200 | 
|---|---|---|
| committer | tomsmeding <tom.smeding@gmail.com> | 2019-04-20 21:16:10 +0200 | 
| commit | 284ce56948aae60134a1073c6499c7fb97ffb326 (patch) | |
| tree | 174a34d94072f4ee819a89101ef6ff182e5bcf4b /aberth | |
| parent | ccade8e6ed96fb48b329d22aaa4c8d0826b3c8d1 (diff) | |
More reorganisation
Diffstat (limited to 'aberth')
| -rw-r--r-- | aberth/Makefile | 6 | ||||
| -rw-r--r-- | aberth/compute_host.cpp | 58 | ||||
| -rw-r--r-- | aberth/compute_host.h | 9 | ||||
| -rw-r--r-- | aberth/defs.h | 2 | ||||
| -rw-r--r-- | aberth/host_aberth.cpp | 96 | ||||
| -rw-r--r-- | aberth/host_aberth.h | 12 | ||||
| -rw-r--r-- | aberth/kernel.h | 1 | ||||
| -rw-r--r-- | aberth/main.cpp | 224 | ||||
| -rw-r--r-- | aberth/poly.cpp (renamed from aberth/util.cpp) | 3 | ||||
| -rw-r--r-- | aberth/poly.h | 42 | ||||
| -rw-r--r-- | aberth/polygen.cpp | 43 | ||||
| -rw-r--r-- | aberth/polygen.h | 25 | ||||
| -rw-r--r-- | aberth/util.h | 2 | 
13 files changed, 297 insertions, 226 deletions
| diff --git a/aberth/Makefile b/aberth/Makefile index 99f0e16..cd59309 100644 --- a/aberth/Makefile +++ b/aberth/Makefile @@ -1,6 +1,6 @@  CXX = g++ -CXXFLAGS = -Wall -Wextra -std=c++17 -fwrapv $(OPTFLAGS) -OPTFLAGS = -O3 -ffast-math -march=native -mtune=native +CXXFLAGS = -Wall -Wextra -std=c++17 $(OPTFLAGS) +OPTFLAGS = -O3 -ffast-math -march=native -mtune=native -flto  LDFLAGS = -pthread  ifeq ($(shell uname),Darwin)  	LDFLAGS += -framework OpenCL @@ -24,7 +24,7 @@ remake: clean  aberth: $(OBJECTS) aberth_kernel.o ../lodepng.o -	$(CXX) -o $@ $^ $(LDFLAGS) +	$(CXX) $(CXXFLAGS) -o $@ $^ $(LDFLAGS)  aberth_kernel.h: aberth_kernel.fut  	futhark opencl --library $^ diff --git a/aberth/compute_host.cpp b/aberth/compute_host.cpp new file mode 100644 index 0000000..38bf43d --- /dev/null +++ b/aberth/compute_host.cpp @@ -0,0 +1,58 @@ +#include <thread> +#include <mutex> +#include <tuple> +#include <cassert> +#include "compute_host.h" +#include "host_aberth.h" +#include "polygen.h" +#include "util.h" + +using namespace std; + + +vector<int> computeHost(int W, int H, Com bottomLeft, Com topRight) { +	constexpr const int numThreads = 4; +	static_assert(ispow2(numThreads)); + +	vector<int> counts(W * H); +	mutex countsMutex; + +	vector<PolyGen::Job> jobs = PolyGen::genJobs(numThreads); +	assert(jobs.size() == numThreads); + +	vector<thread> threads(jobs.size()); +	for (int i = 0; i < (int)jobs.size(); i++) { +		threads[i] = thread([W, H, &counts, &countsMutex, job = jobs[i], bottomLeft, topRight]() { +			auto calcIndex = [](double value, double left, double right, int steps) -> int { +				return (value - left) / (right - left) * (steps - 1) + 0.5; +			}; +			auto calcPos = [W, H, bottomLeft, topRight, &calcIndex](Com z) -> pair<int, int> { +				return make_pair( +					calcIndex(z.real(), bottomLeft.real(), topRight.real(), W), +					calcIndex(z.imag(), bottomLeft.imag(), topRight.imag(), H) +				); +			}; + +			vector<int> localCounts(W * H); + +			Poly poly = job.init; +			for (int i = 0; i < job.numItems; i++) { +				for (Com z : aberth(poly)) { +					int x, y; +					tie(x, y) = calcPos(z); +					if (0 <= x && x < W && 0 <= y && y < H) { +						localCounts[W * y + x]++; +					} +				} +				PolyGen::next(poly); +			} + +			lock_guard<mutex> guard(countsMutex); +			for (int i = 0; i < W * H; i++) counts[i] += localCounts[i]; +		}); +	} + +	for (thread &th : threads) th.join(); + +	return counts; +} diff --git a/aberth/compute_host.h b/aberth/compute_host.h new file mode 100644 index 0000000..7a2ad36 --- /dev/null +++ b/aberth/compute_host.h @@ -0,0 +1,9 @@ +#pragma once + +#include <vector> +#include "defs.h" + +using namespace std; + + +vector<int> computeHost(int W, int H, Com bottomLeft, Com topRight); diff --git a/aberth/defs.h b/aberth/defs.h index 40eacf0..41d1f89 100644 --- a/aberth/defs.h +++ b/aberth/defs.h @@ -9,5 +9,3 @@ using namespace std;  constexpr const int N = 18;  using Com = complex<double>; -using Poly = array<int, N + 1>; -using AApprox = array<Com, N>; diff --git a/aberth/host_aberth.cpp b/aberth/host_aberth.cpp new file mode 100644 index 0000000..c567f30 --- /dev/null +++ b/aberth/host_aberth.cpp @@ -0,0 +1,96 @@ +#include <random> +#include <cmath> +#include "host_aberth.h" + +using namespace std; + + +struct AberthState { +	// boundPoly is 's' in the stop condition formulated at p.189-190 of +	// https://link.springer.com/article/10.1007%2FBF02207694 + +	const Poly &poly; +	Poly deriv; +	Poly boundPoly; +	AApprox approx; +	double radius; + +	AberthState(const Poly &poly); +	void regenerate(); +	bool step(); +	void iterate(); +}; + +static thread_local minstd_rand randgenerator = minstd_rand(random_device()()); + +AberthState::AberthState(const Poly &poly) +        : poly(poly), deriv(derivative(poly)), radius(maxRootNorm(poly)) { + +    regenerate(); +    for (int i = 0; i <= N; i++) { +        boundPoly[i] = abs(poly[i]) * (4 * i + 1); +    } +} + +void AberthState::regenerate() { +    auto genCoord = [this]() { +        return uniform_real_distribution<double>(-radius, radius)(randgenerator); +    }; + +    for (int i = 0; i < N; i++) { +        approx[i] = Com(genCoord(), genCoord()); +    } +} + +// Lagrange-style step where the new elements are computed in parallel from the previous values +bool AberthState::step() { +    array<Com, N * N> pairs; +    for (int i = 0; i < N - 1; i++) { +        for (int j = i + 1; j < N; j++) { +            pairs[N * i + j] = 1.0 / (approx[i] - approx[j]); +        } +    } + +    bool allConverged = true; + +    AApprox newapprox; +    AApprox offsets; +    for (int i = 0; i < N; i++) { +        Com pval = eval(poly, approx[i]); +        Com derivval = eval(deriv, poly.size() - 1, approx[i]); +        Com quo = pval / derivval; +        Com sum = 0; +        for (int j = 0; j < i; j++) sum -= pairs[N * j + i]; +        for (int j = i + 1; j < N; j++) sum += pairs[N * i + j]; +        offsets[i] = quo / (1.0 - quo * sum); + +        newapprox[i] = approx[i] - offsets[i]; + +        double sval = eval(boundPoly, abs(newapprox[i])); +        if (abs(pval) > 1e-5 * sval) allConverged = false; +    } + +    approx = newapprox; + +    return allConverged; +} + +void AberthState::iterate() { +    int tries = 1, stepIdx = 1; + +    while (!step()) { +        stepIdx++; + +        if (stepIdx > tries * 100) { +            regenerate(); +            stepIdx = 0; +            tries++; +        } +    } +} + +AApprox aberth(const Poly &poly) { +	AberthState state(poly); +	state.iterate(); +	return state.approx; +} diff --git a/aberth/host_aberth.h b/aberth/host_aberth.h new file mode 100644 index 0000000..566a84e --- /dev/null +++ b/aberth/host_aberth.h @@ -0,0 +1,12 @@ +#pragma once + +#include <array> +#include "poly.h" + +using namespace std; + + +using AApprox = array<Com, N>; + + +AApprox aberth(const Poly &poly); diff --git a/aberth/kernel.h b/aberth/kernel.h index bd8ea32..3ab2772 100644 --- a/aberth/kernel.h +++ b/aberth/kernel.h @@ -1,6 +1,7 @@  #pragma once  #include <vector> +#include <type_traits>  #include "defs.h"  extern "C" { diff --git a/aberth/main.cpp b/aberth/main.cpp index 1cdea3a..152640f 100644 --- a/aberth/main.cpp +++ b/aberth/main.cpp @@ -1,232 +1,20 @@  #include <iostream>  #include <fstream> -#include <sstream> -#include <vector> -#include <array>  #include <string> -#include <complex> -#include <random> -#include <utility> -#include <tuple> +#include <vector>  #include <algorithm> -#include <thread> -#include <mutex> -#include <type_traits> +#include <tuple>  #include <cstdint> +#include <cstring>  #include <cassert>  #include "../lodepng.h" +#include "compute_host.h"  #include "defs.h"  #include "kernel.h" -#include "util.h"  using namespace std; -template <typename T> -static T eval(const Poly &p, int nterms, T pt) { -	T value = p[nterms - 1]; -	for (int i = nterms - 2; i >= 0; i--) { -		value = pt * value + (double)p[i]; -	} -	return value; -} - -template <typename T> -static T eval(const Poly &p, T pt) { -	return eval(p, p.size(), pt); -} - -static Poly derivative(const Poly &p) { -	Poly res; -	for (int i = res.size() - 2; i >= 0; i--) { -		res[i] = (i+1) * p[i+1]; -	} -	return res; -} - -static double maxRootNorm(const Poly &poly) { -	// Cauchy's bound: https://en.wikipedia.org/wiki/Geometrical_properties_of_polynomial_roots#Lagrange's_and_Cauchy's_bounds - -	double value = 0; -	double last = (double)poly.back(); -	for (int i = 0; i < (int)poly.size() - 1; i++) { -		value = max(value, abs(poly[i] / last)); -	} -	return 1 + value; -} - -static thread_local minstd_rand randgenerator = minstd_rand(random_device()()); - -struct AberthState { -	const Poly &poly; -	Poly deriv; -	Poly boundPoly; -	AApprox approx; -	double radius; - -	void regenerate() { -		auto genCoord = [this]() { -			return uniform_real_distribution<double>(-radius, radius)(randgenerator); -		}; -		for (int i = 0; i < N; i++) { -			approx[i] = Com(genCoord(), genCoord()); -		} -	} - -	// boundPoly is 's' in the stop condition formulated at p.189-190 of -	// https://link.springer.com/article/10.1007%2FBF02207694 - -	AberthState(const Poly &poly) -			: poly(poly), deriv(derivative(poly)), radius(maxRootNorm(poly)) { - -		regenerate(); -		for (int i = 0; i <= N; i++) { -			boundPoly[i] = abs(poly[i]) * (4 * i + 1); -		} -	} - -	// Lagrange-style step where the new elements are computed in parallel from the previous values -	bool step() { -		array<Com, N * N> pairs; -		for (int i = 0; i < N - 1; i++) { -			for (int j = i + 1; j < N; j++) { -				pairs[N * i + j] = 1.0 / (approx[i] - approx[j]); -			} -		} - -		bool allConverged = true; - -		AApprox newapprox; -		AApprox offsets; -		for (int i = 0; i < N; i++) { -			Com pval = eval(poly, approx[i]); -			Com derivval = eval(deriv, poly.size() - 1, approx[i]); -			Com quo = pval / derivval; -			Com sum = 0; -			for (int j = 0; j < i; j++) sum -= pairs[N * j + i]; -			for (int j = i + 1; j < N; j++) sum += pairs[N * i + j]; -			offsets[i] = quo / (1.0 - quo * sum); - -			// approx[i] -= offsets[i]; -			newapprox[i] = approx[i] - offsets[i]; - -			double sval = eval(boundPoly, abs(newapprox[i])); -			if (abs(pval) > 1e-5 * sval) allConverged = false; -		} - -		approx = newapprox; - -		return allConverged; -	} - -	void iterate() { -		int tries = 1, stepIdx = 1; -		while (!step()) { -			stepIdx++; - -			if (stepIdx > tries * 100) { -				regenerate(); -				stepIdx = 0; -				tries++; -			} -		} -	} -}; - -static AApprox aberth(const Poly &poly) { -	AberthState state(poly); -	state.iterate(); -	return state.approx; -} - -// Returns whether we just looped around -static bool nextDerbyshire(Poly &poly) { -	for (int i = 1; i < (int)poly.size(); i++) { -		if (poly[i] == -1) { -			poly[i] = 1; -			return false; -		} -		poly[i] = -1; -	} -	return true; -} - -static Poly derbyshireAtIndex(int index) { -	Poly poly; -	poly[0] = 1; -	for (int i = 1; i <= N; i++) { -		poly[i] = index & 1 ? 1 : -1; -		index >>= 1; -	} -	assert(index == 0); -	return poly; -} - -struct Job { -	Poly init; -	int numItems; -}; - -static vector<Job> derbyshireJobs(int targetJobs) { -	int njobs = min(1 << N, ceil2(targetJobs)); -	int jobsize = (1 << N) / njobs; - -	vector<Job> jobs(njobs); -	for (int i = 0; i < njobs; i++) { -		jobs[i].init = derbyshireAtIndex(i * jobsize); -		jobs[i].numItems = jobsize; -	} - -	return jobs; -} - -static vector<int> computeCounts(int W, int H, Com bottomLeft, Com topRight) { -	constexpr const int numThreads = 4; -	static_assert(ispow2(numThreads)); - -	vector<int> counts(W * H); -	mutex countsMutex; - -	vector<Job> jobs = derbyshireJobs(numThreads); -	assert(jobs.size() == numThreads); - -	vector<thread> threads(jobs.size()); -	for (int i = 0; i < (int)jobs.size(); i++) { -		threads[i] = thread([W, H, &counts, &countsMutex, job = jobs[i], bottomLeft, topRight]() { -			auto calcIndex = [](double value, double left, double right, int steps) -> int { -				return (value - left) / (right - left) * (steps - 1) + 0.5; -			}; -			auto calcPos = [W, H, bottomLeft, topRight, &calcIndex](Com z) -> pair<int, int> { -				return make_pair( -					calcIndex(z.real(), bottomLeft.real(), topRight.real(), W), -					calcIndex(z.imag(), bottomLeft.imag(), topRight.imag(), H) -				); -			}; - -			vector<int> localCounts(W * H); - -			Poly poly = job.init; -			for (int i = 0; i < job.numItems; i++) { -				for (Com z : aberth(poly)) { -					int x, y; -					tie(x, y) = calcPos(z); -					if (0 <= x && x < W && 0 <= y && y < H) { -						localCounts[W * y + x]++; -					} -				} -				nextDerbyshire(poly); -			} - -			lock_guard<mutex> guard(countsMutex); -			for (int i = 0; i < W * H; i++) counts[i] += localCounts[i]; -		}); -	} - -	for (thread &th : threads) th.join(); - -	return counts; -} -  static void writeCounts(int W, int H, const vector<int> &counts, const char *fname) {  	ofstream f(fname);  	f << W << ' ' << H << '\n'; @@ -289,12 +77,12 @@ int main(int argc, char **argv) {  		const Com bottomLeft = Com(-1.5, -1.5);  		const Com topRight = Com(1.5, 1.5); -		// counts = computeCounts(W, H, bottomLeft, topRight); +		// counts = computeHost(W, H, bottomLeft, topRight);  		Kernel().run_chunked(counts, W, H, bottomLeft, topRight, 42, 1 << 14);  		// Kernel().run_all(counts, W, H, bottomLeft, topRight, 42);  		writeCounts(W, H, counts, "out.txt"); -	} else if (argc == 2) { +	} else if (argc == 2 && strcmp(argv[1], "-h") != 0) {  		tie(W, H, counts) = readCounts(argv[1]);  	} else {  		cerr << "Usage: " << argv[0] << "            -- compute and draw" << endl; diff --git a/aberth/util.cpp b/aberth/poly.cpp index b8f6edc..cf19816 100644 --- a/aberth/util.cpp +++ b/aberth/poly.cpp @@ -1,4 +1,5 @@ -#include "util.h" +#include <sstream> +#include "poly.h"  ostream& operator<<(ostream &os, const Poly &p) { diff --git a/aberth/poly.h b/aberth/poly.h new file mode 100644 index 0000000..82f04a2 --- /dev/null +++ b/aberth/poly.h @@ -0,0 +1,42 @@ +#pragma once + +#include "defs.h" + + +using Poly = array<int, N + 1>; + + +template <typename T> +T eval(const Poly &p, int nterms, T pt) { +	T value = p[nterms - 1]; +	for (int i = nterms - 2; i >= 0; i--) { +		value = pt * value + (double)p[i]; +	} +	return value; +} + +template <typename T> +T eval(const Poly &p, T pt) { +	return eval(p, p.size(), pt); +} + +inline Poly derivative(const Poly &p) { +	Poly res; +	for (int i = res.size() - 2; i >= 0; i--) { +		res[i] = (i+1) * p[i+1]; +	} +	return res; +} + +inline double maxRootNorm(const Poly &poly) { +	// Cauchy's bound: https://en.wikipedia.org/wiki/Geometrical_properties_of_polynomial_roots#Lagrange's_and_Cauchy's_bounds + +	double value = 0; +	double last = (double)poly.back(); +	for (int i = 0; i < (int)poly.size() - 1; i++) { +		value = max(value, abs(poly[i] / last)); +	} +	return 1 + value; +} + +ostream& operator<<(ostream &os, const Poly &p); diff --git a/aberth/polygen.cpp b/aberth/polygen.cpp new file mode 100644 index 0000000..6df89db --- /dev/null +++ b/aberth/polygen.cpp @@ -0,0 +1,43 @@ +#include "polygen.h" +#include "util.h" + +using namespace std; + + +namespace PolyGen::Derbyshire { +    // Returns whether we just looped around +    bool next(Poly &poly) { +        for (int i = 1; i < (int)poly.size(); i++) { +            if (poly[i] == -1) { +                poly[i] = 1; +                return false; +            } +            poly[i] = -1; +        } +        return true; +    } + +    Poly atIndex(int index) { +        Poly poly; +        poly[0] = 1; +        for (int i = 1; i <= N; i++) { +            poly[i] = index & 1 ? 1 : -1; +            index >>= 1; +        } +        assert(index == 0); +        return poly; +    } + +    vector<Job> genJobs(int targetJobs) { +        int njobs = min(1 << N, ceil2(targetJobs)); +        int jobsize = (1 << N) / njobs; + +        vector<Job> jobs(njobs); +        for (int i = 0; i < njobs; i++) { +            jobs[i].init = atIndex(i * jobsize); +            jobs[i].numItems = jobsize; +        } + +        return jobs; +    } +} diff --git a/aberth/polygen.h b/aberth/polygen.h new file mode 100644 index 0000000..1381444 --- /dev/null +++ b/aberth/polygen.h @@ -0,0 +1,25 @@ +#pragma once + +#include <vector> +#include "poly.h" + +using namespace std; + + +namespace PolyGen { +    struct Job { +        Poly init; +        int numItems; +    }; + +    namespace Derbyshire { +        // Returns whether we just looped around +        bool next(Poly &poly); + +        Poly atIndex(int index); + +        vector<Job> genJobs(int targetJobs); +    } + +    using namespace Derbyshire; +} diff --git a/aberth/util.h b/aberth/util.h index c21a7bb..a276e2a 100644 --- a/aberth/util.h +++ b/aberth/util.h @@ -27,5 +27,3 @@ constexpr static T ceil2(T value) {  		if (value2 == 0) return value << 1;  	}  } - -ostream& operator<<(ostream &os, const Poly &p); | 
