aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortomsmeding <tom.smeding@gmail.com>2019-04-20 10:27:13 +0200
committertomsmeding <tom.smeding@gmail.com>2019-04-20 10:27:13 +0200
commitd705388634c4df25f800e7cf05e88609bfbc2068 (patch)
treef8dfd4a83426a5e18a272f949407f64985e444f6
parent3a7e1c80cb28a5d63da056f4889eb207140c0e67 (diff)
Switch to f32 for intel iris (which doesn't work)
-rw-r--r--aberth/aberth_kernel.fut70
1 files changed, 35 insertions, 35 deletions
diff --git a/aberth/aberth_kernel.fut b/aberth/aberth_kernel.fut
index 58b8838..f09db56 100644
--- a/aberth/aberth_kernel.fut
+++ b/aberth/aberth_kernel.fut
@@ -1,53 +1,53 @@
import "lib/github.com/diku-dk/complex/complex"
import "lib/github.com/diku-dk/cpprandom/random"
-module uniform_real = uniform_real_distribution f64 minstd_rand
module rand_engine = minstd_rand
+module uniform_real = uniform_real_distribution f32 rand_engine
-module c64 = mk_complex f64
-type complex = c64.complex
+module cplx = mk_complex f32
+type complex = cplx.complex
-let N = 20i32
+let N = 18i32
let PolyN = N + 1
-type poly = [PolyN]f64
+type poly = [PolyN]f32
-- First element of pair steps fastest
let iota2 (n: i32) (m: i32): [](i32, i32) =
flatten (map (\y -> map (\x -> (x, y)) (iota n)) (iota m))
let evaln_c (p: poly) (nterms: i32) (pt: complex): complex =
- foldr (\coef accum -> c64.mk_re coef c64.+ pt c64.* accum)
- (c64.mk_re p[nterms-1]) (take (nterms - 1) p)
+ foldr (\coef accum -> cplx.mk_re coef cplx.+ pt cplx.* accum)
+ (cplx.mk_re p[nterms-1]) (take (nterms - 1) p)
let eval_c (p: poly) (pt: complex): complex = evaln_c p (length p) pt
-let evaln_d (p: poly) (nterms: i32) (pt: f64): f64 =
+let evaln_d (p: poly) (nterms: i32) (pt: f32): f32 =
foldr (\coef accum -> coef + pt * accum)
p[nterms-1] (take (nterms - 1) p)
-let eval_d (p: poly) (pt: f64): f64 = evaln_d p (length p) pt
+let eval_d (p: poly) (pt: f32): f32 = evaln_d p (length p) pt
let derivative (p: poly): *poly =
- map (\(i, v) -> f64.i32 i * v) (zip (1..<PolyN) (drop 1 p)) ++ [0]
+ map (\(i, v) -> f32.i32 i * v) (zip (1..<PolyN) (drop 1 p)) ++ [0]
-- Cauchy's bound: https://en.wikipedia.org/wiki/Geometrical_properties_of_polynomial_roots#Lagrange's_and_Cauchy's_bounds
-let max_root_norm (p: poly): f64 =
- 1 + f64.maximum (map (\coef -> f64.abs (coef / p[PolyN-1])) p)
+let max_root_norm (p: poly): f32 =
+ 1 + f32.maximum (map (\coef -> f32.abs (coef / p[PolyN-1])) p)
module aberth = {
type approx = [N]complex
-- bound is 's' in the stop condition formulated at p.189-190 of
-- https://link.springer.com/article/10.1007%2FBF02207694
- type context = {p: poly, deriv: poly, bound: poly, radius: f64}
+ type context = {p: poly, deriv: poly, bound: poly, radius: f32}
- let gen_coord (r: f64) (rng: *rand_engine.rng): *(rand_engine.rng, f64) =
+ let gen_coord (r: f32) (rng: *rand_engine.rng): *(rand_engine.rng, f32) =
uniform_real.rand (-r, r) rng
- let gen_coord_c (r: f64) (rng: rand_engine.rng): (rand_engine.rng, complex) =
+ let gen_coord_c (r: f32) (rng: rand_engine.rng): (rand_engine.rng, complex) =
let (rng, x) = gen_coord r rng
let (rng, y) = gen_coord r rng
- in (rng, c64.mk x y)
+ in (rng, cplx.mk x y)
let generate (ctx: context) (rng: *rand_engine.rng): *(rand_engine.rng, approx) =
let rngs = rand_engine.split_rng N rng
@@ -56,7 +56,7 @@ module aberth = {
in (rng, approx)
let compute_bound_poly (p: poly): *poly =
- map2 (\coef i -> f64.abs coef * f64.i32 (4 * i + 1)) p (0..<PolyN)
+ map2 (\coef i -> f32.abs coef * f32.i32 (4 * i + 1)) p (0..<PolyN)
let initialise (p: *poly): *context =
let deriv = derivative p
@@ -69,20 +69,20 @@ module aberth = {
let step (ctx: context) (approx: *approx): *(bool, approx) =
let pvals = map (eval_c ctx.p) approx
let derivvals = map (evaln_c ctx.deriv (PolyN - 1)) approx
- let quos = map2 (c64./) pvals derivvals
+ let quos = map2 (cplx./) pvals derivvals
let sums = map (\i ->
- reduce_comm (c64.+) (c64.mk_re 0.0)
+ reduce_comm (cplx.+) (cplx.mk_re 0.0)
(map (\j ->
- if i == j then c64.mk_re 0.0
- else c64.mk_re 1.0 c64./
- (approx[i] c64.- approx[j]))
+ if i == j then cplx.mk_re 0.0
+ else cplx.mk_re 1.0 cplx./
+ (approx[i] cplx.- approx[j]))
(0..<N)))
(0..<N)
- let offsets = map2 (\quo sum -> quo c64./ (c64.mk_re 1.0 c64.- quo c64.* sum))
+ let offsets = map2 (\quo sum -> quo cplx./ (cplx.mk_re 1.0 cplx.- quo cplx.* sum))
quos sums
- let approx = map2 (c64.-) approx offsets
- let svals = map (eval_d ctx.bound <-< c64.mag) approx
- let conditions = map2 (\p s -> c64.mag p <= 1e-9 * s) pvals svals
+ let approx = map2 (cplx.-) approx offsets
+ let svals = map (eval_d ctx.bound <-< cplx.mag) approx
+ let conditions = map2 (\p s -> cplx.mag p <= 1e-9 * s) pvals svals
let all_converged = all id conditions
in (all_converged, approx)
@@ -120,11 +120,11 @@ let next_derbyshire (p: *poly): *(bool, poly) =
let derbyshire_at_index (index: i32): *poly =
let bitfield = (index << 1) + 1
- in tabulate PolyN (\i -> f64.i32 (i32.get_bit i bitfield * 2 - 1))
+ in tabulate PolyN (\i -> f32.i32 (i32.get_bit i bitfield * 2 - 1))
-let calc_index (value: f64) (left: f64) (right: f64) (steps: i32): i32 =
- t64 ((value - left) / (right - left) * (r64 steps - 1) + 0.5)
+let calc_index (value: f32) (left: f32) (right: f32) (steps: i32): i32 =
+ i32.f32 ((value - left) / (right - left) * (f32.i32 steps - 1) + 0.5)
let point_index
(width: i32) (height: i32)
@@ -133,8 +133,8 @@ let point_index
: i32 =
-- Range for 'yi' is reversed because image coordinates go down in the y
-- direction, while complex coordinates go up in the y direction
- let xi = calc_index (c64.re pt) (c64.re bottom_left) (c64.re top_right) width
- let yi = calc_index (c64.im pt) (c64.im top_right) (c64.im bottom_left) height
+ let xi = calc_index (cplx.re pt) (cplx.re bottom_left) (cplx.re top_right) width
+ let yi = calc_index (cplx.im pt) (cplx.im top_right) (cplx.im bottom_left) height
in if 0 <= xi && xi < width && 0 <= yi && yi < height
then width * yi + xi
else -1
@@ -142,13 +142,13 @@ let point_index
entry main_job
(start_index: i32) (num_polys: i32)
(width: i32) (height: i32)
- (left: f64) (top: f64) (right: f64) (bottom: f64)
+ (left: f32) (top: f32) (right: f32) (bottom: f32)
(seed: i32)
: []i32 =
-- Unnecessary to give each polynomial a different seed
let rng = rand_engine.rng_from_seed [seed]
- let bottom_left = c64.mk left bottom
- let top_right = c64.mk right top
+ let bottom_left = cplx.mk left bottom
+ let top_right = cplx.mk right top
let indices = flatten
(map (\idx ->
let p = derbyshire_at_index idx
@@ -163,7 +163,7 @@ entry main_job
entry main_all
(width: i32) (height: i32)
- (left: f64) (top: f64) (right: f64) (bottom: f64)
+ (left: f32) (top: f32) (right: f32) (bottom: f32)
(seed: i32)
: []i32 =
main_job 0 (1 << N) width height left top right bottom seed