From d705388634c4df25f800e7cf05e88609bfbc2068 Mon Sep 17 00:00:00 2001 From: tomsmeding Date: Sat, 20 Apr 2019 10:27:13 +0200 Subject: Switch to f32 for intel iris (which doesn't work) --- aberth/aberth_kernel.fut | 70 ++++++++++++++++++++++++------------------------ 1 file changed, 35 insertions(+), 35 deletions(-) (limited to 'aberth') diff --git a/aberth/aberth_kernel.fut b/aberth/aberth_kernel.fut index 58b8838..f09db56 100644 --- a/aberth/aberth_kernel.fut +++ b/aberth/aberth_kernel.fut @@ -1,53 +1,53 @@ import "lib/github.com/diku-dk/complex/complex" import "lib/github.com/diku-dk/cpprandom/random" -module uniform_real = uniform_real_distribution f64 minstd_rand module rand_engine = minstd_rand +module uniform_real = uniform_real_distribution f32 rand_engine -module c64 = mk_complex f64 -type complex = c64.complex +module cplx = mk_complex f32 +type complex = cplx.complex -let N = 20i32 +let N = 18i32 let PolyN = N + 1 -type poly = [PolyN]f64 +type poly = [PolyN]f32 -- First element of pair steps fastest let iota2 (n: i32) (m: i32): [](i32, i32) = flatten (map (\y -> map (\x -> (x, y)) (iota n)) (iota m)) let evaln_c (p: poly) (nterms: i32) (pt: complex): complex = - foldr (\coef accum -> c64.mk_re coef c64.+ pt c64.* accum) - (c64.mk_re p[nterms-1]) (take (nterms - 1) p) + foldr (\coef accum -> cplx.mk_re coef cplx.+ pt cplx.* accum) + (cplx.mk_re p[nterms-1]) (take (nterms - 1) p) let eval_c (p: poly) (pt: complex): complex = evaln_c p (length p) pt -let evaln_d (p: poly) (nterms: i32) (pt: f64): f64 = +let evaln_d (p: poly) (nterms: i32) (pt: f32): f32 = foldr (\coef accum -> coef + pt * accum) p[nterms-1] (take (nterms - 1) p) -let eval_d (p: poly) (pt: f64): f64 = evaln_d p (length p) pt +let eval_d (p: poly) (pt: f32): f32 = evaln_d p (length p) pt let derivative (p: poly): *poly = - map (\(i, v) -> f64.i32 i * v) (zip (1.. f32.i32 i * v) (zip (1.. f64.abs (coef / p[PolyN-1])) p) +let max_root_norm (p: poly): f32 = + 1 + f32.maximum (map (\coef -> f32.abs (coef / p[PolyN-1])) p) module aberth = { type approx = [N]complex -- bound is 's' in the stop condition formulated at p.189-190 of -- https://link.springer.com/article/10.1007%2FBF02207694 - type context = {p: poly, deriv: poly, bound: poly, radius: f64} + type context = {p: poly, deriv: poly, bound: poly, radius: f32} - let gen_coord (r: f64) (rng: *rand_engine.rng): *(rand_engine.rng, f64) = + let gen_coord (r: f32) (rng: *rand_engine.rng): *(rand_engine.rng, f32) = uniform_real.rand (-r, r) rng - let gen_coord_c (r: f64) (rng: rand_engine.rng): (rand_engine.rng, complex) = + let gen_coord_c (r: f32) (rng: rand_engine.rng): (rand_engine.rng, complex) = let (rng, x) = gen_coord r rng let (rng, y) = gen_coord r rng - in (rng, c64.mk x y) + in (rng, cplx.mk x y) let generate (ctx: context) (rng: *rand_engine.rng): *(rand_engine.rng, approx) = let rngs = rand_engine.split_rng N rng @@ -56,7 +56,7 @@ module aberth = { in (rng, approx) let compute_bound_poly (p: poly): *poly = - map2 (\coef i -> f64.abs coef * f64.i32 (4 * i + 1)) p (0.. f32.abs coef * f32.i32 (4 * i + 1)) p (0.. - reduce_comm (c64.+) (c64.mk_re 0.0) + reduce_comm (cplx.+) (cplx.mk_re 0.0) (map (\j -> - if i == j then c64.mk_re 0.0 - else c64.mk_re 1.0 c64./ - (approx[i] c64.- approx[j])) + if i == j then cplx.mk_re 0.0 + else cplx.mk_re 1.0 cplx./ + (approx[i] cplx.- approx[j])) (0.. quo c64./ (c64.mk_re 1.0 c64.- quo c64.* sum)) + let offsets = map2 (\quo sum -> quo cplx./ (cplx.mk_re 1.0 cplx.- quo cplx.* sum)) quos sums - let approx = map2 (c64.-) approx offsets - let svals = map (eval_d ctx.bound <-< c64.mag) approx - let conditions = map2 (\p s -> c64.mag p <= 1e-9 * s) pvals svals + let approx = map2 (cplx.-) approx offsets + let svals = map (eval_d ctx.bound <-< cplx.mag) approx + let conditions = map2 (\p s -> cplx.mag p <= 1e-9 * s) pvals svals let all_converged = all id conditions in (all_converged, approx) @@ -120,11 +120,11 @@ let next_derbyshire (p: *poly): *(bool, poly) = let derbyshire_at_index (index: i32): *poly = let bitfield = (index << 1) + 1 - in tabulate PolyN (\i -> f64.i32 (i32.get_bit i bitfield * 2 - 1)) + in tabulate PolyN (\i -> f32.i32 (i32.get_bit i bitfield * 2 - 1)) -let calc_index (value: f64) (left: f64) (right: f64) (steps: i32): i32 = - t64 ((value - left) / (right - left) * (r64 steps - 1) + 0.5) +let calc_index (value: f32) (left: f32) (right: f32) (steps: i32): i32 = + i32.f32 ((value - left) / (right - left) * (f32.i32 steps - 1) + 0.5) let point_index (width: i32) (height: i32) @@ -133,8 +133,8 @@ let point_index : i32 = -- Range for 'yi' is reversed because image coordinates go down in the y -- direction, while complex coordinates go up in the y direction - let xi = calc_index (c64.re pt) (c64.re bottom_left) (c64.re top_right) width - let yi = calc_index (c64.im pt) (c64.im top_right) (c64.im bottom_left) height + let xi = calc_index (cplx.re pt) (cplx.re bottom_left) (cplx.re top_right) width + let yi = calc_index (cplx.im pt) (cplx.im top_right) (cplx.im bottom_left) height in if 0 <= xi && xi < width && 0 <= yi && yi < height then width * yi + xi else -1 @@ -142,13 +142,13 @@ let point_index entry main_job (start_index: i32) (num_polys: i32) (width: i32) (height: i32) - (left: f64) (top: f64) (right: f64) (bottom: f64) + (left: f32) (top: f32) (right: f32) (bottom: f32) (seed: i32) : []i32 = -- Unnecessary to give each polynomial a different seed let rng = rand_engine.rng_from_seed [seed] - let bottom_left = c64.mk left bottom - let top_right = c64.mk right top + let bottom_left = cplx.mk left bottom + let top_right = cplx.mk right top let indices = flatten (map (\idx -> let p = derbyshire_at_index idx @@ -163,7 +163,7 @@ entry main_job entry main_all (width: i32) (height: i32) - (left: f64) (top: f64) (right: f64) (bottom: f64) + (left: f32) (top: f32) (right: f32) (bottom: f32) (seed: i32) : []i32 = main_job 0 (1 << N) width height left top right bottom seed -- cgit v1.2.3-70-g09d2