diff options
author | tomsmeding <tom.smeding@gmail.com> | 2019-04-20 10:27:13 +0200 |
---|---|---|
committer | tomsmeding <tom.smeding@gmail.com> | 2019-04-20 10:27:13 +0200 |
commit | d705388634c4df25f800e7cf05e88609bfbc2068 (patch) | |
tree | f8dfd4a83426a5e18a272f949407f64985e444f6 | |
parent | 3a7e1c80cb28a5d63da056f4889eb207140c0e67 (diff) |
Switch to f32 for intel iris (which doesn't work)
-rw-r--r-- | aberth/aberth_kernel.fut | 70 |
1 files changed, 35 insertions, 35 deletions
diff --git a/aberth/aberth_kernel.fut b/aberth/aberth_kernel.fut index 58b8838..f09db56 100644 --- a/aberth/aberth_kernel.fut +++ b/aberth/aberth_kernel.fut @@ -1,53 +1,53 @@ import "lib/github.com/diku-dk/complex/complex" import "lib/github.com/diku-dk/cpprandom/random" -module uniform_real = uniform_real_distribution f64 minstd_rand module rand_engine = minstd_rand +module uniform_real = uniform_real_distribution f32 rand_engine -module c64 = mk_complex f64 -type complex = c64.complex +module cplx = mk_complex f32 +type complex = cplx.complex -let N = 20i32 +let N = 18i32 let PolyN = N + 1 -type poly = [PolyN]f64 +type poly = [PolyN]f32 -- First element of pair steps fastest let iota2 (n: i32) (m: i32): [](i32, i32) = flatten (map (\y -> map (\x -> (x, y)) (iota n)) (iota m)) let evaln_c (p: poly) (nterms: i32) (pt: complex): complex = - foldr (\coef accum -> c64.mk_re coef c64.+ pt c64.* accum) - (c64.mk_re p[nterms-1]) (take (nterms - 1) p) + foldr (\coef accum -> cplx.mk_re coef cplx.+ pt cplx.* accum) + (cplx.mk_re p[nterms-1]) (take (nterms - 1) p) let eval_c (p: poly) (pt: complex): complex = evaln_c p (length p) pt -let evaln_d (p: poly) (nterms: i32) (pt: f64): f64 = +let evaln_d (p: poly) (nterms: i32) (pt: f32): f32 = foldr (\coef accum -> coef + pt * accum) p[nterms-1] (take (nterms - 1) p) -let eval_d (p: poly) (pt: f64): f64 = evaln_d p (length p) pt +let eval_d (p: poly) (pt: f32): f32 = evaln_d p (length p) pt let derivative (p: poly): *poly = - map (\(i, v) -> f64.i32 i * v) (zip (1..<PolyN) (drop 1 p)) ++ [0] + map (\(i, v) -> f32.i32 i * v) (zip (1..<PolyN) (drop 1 p)) ++ [0] -- Cauchy's bound: https://en.wikipedia.org/wiki/Geometrical_properties_of_polynomial_roots#Lagrange's_and_Cauchy's_bounds -let max_root_norm (p: poly): f64 = - 1 + f64.maximum (map (\coef -> f64.abs (coef / p[PolyN-1])) p) +let max_root_norm (p: poly): f32 = + 1 + f32.maximum (map (\coef -> f32.abs (coef / p[PolyN-1])) p) module aberth = { type approx = [N]complex -- bound is 's' in the stop condition formulated at p.189-190 of -- https://link.springer.com/article/10.1007%2FBF02207694 - type context = {p: poly, deriv: poly, bound: poly, radius: f64} + type context = {p: poly, deriv: poly, bound: poly, radius: f32} - let gen_coord (r: f64) (rng: *rand_engine.rng): *(rand_engine.rng, f64) = + let gen_coord (r: f32) (rng: *rand_engine.rng): *(rand_engine.rng, f32) = uniform_real.rand (-r, r) rng - let gen_coord_c (r: f64) (rng: rand_engine.rng): (rand_engine.rng, complex) = + let gen_coord_c (r: f32) (rng: rand_engine.rng): (rand_engine.rng, complex) = let (rng, x) = gen_coord r rng let (rng, y) = gen_coord r rng - in (rng, c64.mk x y) + in (rng, cplx.mk x y) let generate (ctx: context) (rng: *rand_engine.rng): *(rand_engine.rng, approx) = let rngs = rand_engine.split_rng N rng @@ -56,7 +56,7 @@ module aberth = { in (rng, approx) let compute_bound_poly (p: poly): *poly = - map2 (\coef i -> f64.abs coef * f64.i32 (4 * i + 1)) p (0..<PolyN) + map2 (\coef i -> f32.abs coef * f32.i32 (4 * i + 1)) p (0..<PolyN) let initialise (p: *poly): *context = let deriv = derivative p @@ -69,20 +69,20 @@ module aberth = { let step (ctx: context) (approx: *approx): *(bool, approx) = let pvals = map (eval_c ctx.p) approx let derivvals = map (evaln_c ctx.deriv (PolyN - 1)) approx - let quos = map2 (c64./) pvals derivvals + let quos = map2 (cplx./) pvals derivvals let sums = map (\i -> - reduce_comm (c64.+) (c64.mk_re 0.0) + reduce_comm (cplx.+) (cplx.mk_re 0.0) (map (\j -> - if i == j then c64.mk_re 0.0 - else c64.mk_re 1.0 c64./ - (approx[i] c64.- approx[j])) + if i == j then cplx.mk_re 0.0 + else cplx.mk_re 1.0 cplx./ + (approx[i] cplx.- approx[j])) (0..<N))) (0..<N) - let offsets = map2 (\quo sum -> quo c64./ (c64.mk_re 1.0 c64.- quo c64.* sum)) + let offsets = map2 (\quo sum -> quo cplx./ (cplx.mk_re 1.0 cplx.- quo cplx.* sum)) quos sums - let approx = map2 (c64.-) approx offsets - let svals = map (eval_d ctx.bound <-< c64.mag) approx - let conditions = map2 (\p s -> c64.mag p <= 1e-9 * s) pvals svals + let approx = map2 (cplx.-) approx offsets + let svals = map (eval_d ctx.bound <-< cplx.mag) approx + let conditions = map2 (\p s -> cplx.mag p <= 1e-9 * s) pvals svals let all_converged = all id conditions in (all_converged, approx) @@ -120,11 +120,11 @@ let next_derbyshire (p: *poly): *(bool, poly) = let derbyshire_at_index (index: i32): *poly = let bitfield = (index << 1) + 1 - in tabulate PolyN (\i -> f64.i32 (i32.get_bit i bitfield * 2 - 1)) + in tabulate PolyN (\i -> f32.i32 (i32.get_bit i bitfield * 2 - 1)) -let calc_index (value: f64) (left: f64) (right: f64) (steps: i32): i32 = - t64 ((value - left) / (right - left) * (r64 steps - 1) + 0.5) +let calc_index (value: f32) (left: f32) (right: f32) (steps: i32): i32 = + i32.f32 ((value - left) / (right - left) * (f32.i32 steps - 1) + 0.5) let point_index (width: i32) (height: i32) @@ -133,8 +133,8 @@ let point_index : i32 = -- Range for 'yi' is reversed because image coordinates go down in the y -- direction, while complex coordinates go up in the y direction - let xi = calc_index (c64.re pt) (c64.re bottom_left) (c64.re top_right) width - let yi = calc_index (c64.im pt) (c64.im top_right) (c64.im bottom_left) height + let xi = calc_index (cplx.re pt) (cplx.re bottom_left) (cplx.re top_right) width + let yi = calc_index (cplx.im pt) (cplx.im top_right) (cplx.im bottom_left) height in if 0 <= xi && xi < width && 0 <= yi && yi < height then width * yi + xi else -1 @@ -142,13 +142,13 @@ let point_index entry main_job (start_index: i32) (num_polys: i32) (width: i32) (height: i32) - (left: f64) (top: f64) (right: f64) (bottom: f64) + (left: f32) (top: f32) (right: f32) (bottom: f32) (seed: i32) : []i32 = -- Unnecessary to give each polynomial a different seed let rng = rand_engine.rng_from_seed [seed] - let bottom_left = c64.mk left bottom - let top_right = c64.mk right top + let bottom_left = cplx.mk left bottom + let top_right = cplx.mk right top let indices = flatten (map (\idx -> let p = derbyshire_at_index idx @@ -163,7 +163,7 @@ entry main_job entry main_all (width: i32) (height: i32) - (left: f64) (top: f64) (right: f64) (bottom: f64) + (left: f32) (top: f32) (right: f32) (bottom: f32) (seed: i32) : []i32 = main_job 0 (1 << N) width height left top right bottom seed |