From d705388634c4df25f800e7cf05e88609bfbc2068 Mon Sep 17 00:00:00 2001
From: tomsmeding <tom.smeding@gmail.com>
Date: Sat, 20 Apr 2019 10:27:13 +0200
Subject: Switch to f32 for intel iris (which doesn't work)

---
 aberth/aberth_kernel.fut | 70 ++++++++++++++++++++++++------------------------
 1 file changed, 35 insertions(+), 35 deletions(-)

(limited to 'aberth')

diff --git a/aberth/aberth_kernel.fut b/aberth/aberth_kernel.fut
index 58b8838..f09db56 100644
--- a/aberth/aberth_kernel.fut
+++ b/aberth/aberth_kernel.fut
@@ -1,53 +1,53 @@
 import "lib/github.com/diku-dk/complex/complex"
 import "lib/github.com/diku-dk/cpprandom/random"
 
-module uniform_real = uniform_real_distribution f64 minstd_rand
 module rand_engine = minstd_rand
+module uniform_real = uniform_real_distribution f32 rand_engine
 
-module c64 = mk_complex f64
-type complex = c64.complex
+module cplx = mk_complex f32
+type complex = cplx.complex
 
-let N = 20i32
+let N = 18i32
 let PolyN = N + 1
 
-type poly = [PolyN]f64
+type poly = [PolyN]f32
 
 -- First element of pair steps fastest
 let iota2 (n: i32) (m: i32): [](i32, i32) =
     flatten (map (\y -> map (\x -> (x, y)) (iota n)) (iota m))
 
 let evaln_c (p: poly) (nterms: i32) (pt: complex): complex =
-    foldr (\coef accum -> c64.mk_re coef c64.+ pt c64.* accum)
-          (c64.mk_re p[nterms-1]) (take (nterms - 1) p)
+    foldr (\coef accum -> cplx.mk_re coef cplx.+ pt cplx.* accum)
+          (cplx.mk_re p[nterms-1]) (take (nterms - 1) p)
 
 let eval_c (p: poly) (pt: complex): complex = evaln_c p (length p) pt
 
-let evaln_d (p: poly) (nterms: i32) (pt: f64): f64 =
+let evaln_d (p: poly) (nterms: i32) (pt: f32): f32 =
     foldr (\coef accum -> coef + pt * accum)
           p[nterms-1] (take (nterms - 1) p)
 
-let eval_d (p: poly) (pt: f64): f64 = evaln_d p (length p) pt
+let eval_d (p: poly) (pt: f32): f32 = evaln_d p (length p) pt
 
 let derivative (p: poly): *poly = 
-    map (\(i, v) -> f64.i32 i * v) (zip (1..<PolyN) (drop 1 p)) ++ [0]
+    map (\(i, v) -> f32.i32 i * v) (zip (1..<PolyN) (drop 1 p)) ++ [0]
 
 -- Cauchy's bound: https://en.wikipedia.org/wiki/Geometrical_properties_of_polynomial_roots#Lagrange's_and_Cauchy's_bounds
-let max_root_norm (p: poly): f64 =
-    1 + f64.maximum (map (\coef -> f64.abs (coef / p[PolyN-1])) p)
+let max_root_norm (p: poly): f32 =
+    1 + f32.maximum (map (\coef -> f32.abs (coef / p[PolyN-1])) p)
 
 module aberth = {
     type approx = [N]complex
     -- bound is 's' in the stop condition formulated at p.189-190 of
     -- https://link.springer.com/article/10.1007%2FBF02207694
-    type context = {p: poly, deriv: poly, bound: poly, radius: f64}
+    type context = {p: poly, deriv: poly, bound: poly, radius: f32}
 
-    let gen_coord (r: f64) (rng: *rand_engine.rng): *(rand_engine.rng, f64) =
+    let gen_coord (r: f32) (rng: *rand_engine.rng): *(rand_engine.rng, f32) =
         uniform_real.rand (-r, r) rng
 
-    let gen_coord_c (r: f64) (rng: rand_engine.rng): (rand_engine.rng, complex) =
+    let gen_coord_c (r: f32) (rng: rand_engine.rng): (rand_engine.rng, complex) =
         let (rng, x) = gen_coord r rng
         let (rng, y) = gen_coord r rng
-        in (rng, c64.mk x y)
+        in (rng, cplx.mk x y)
 
     let generate (ctx: context) (rng: *rand_engine.rng): *(rand_engine.rng, approx) =
         let rngs = rand_engine.split_rng N rng
@@ -56,7 +56,7 @@ module aberth = {
         in (rng, approx)
 
     let compute_bound_poly (p: poly): *poly =
-        map2 (\coef i -> f64.abs coef * f64.i32 (4 * i + 1)) p (0..<PolyN)
+        map2 (\coef i -> f32.abs coef * f32.i32 (4 * i + 1)) p (0..<PolyN)
 
     let initialise (p: *poly): *context =
         let deriv = derivative p
@@ -69,20 +69,20 @@ module aberth = {
     let step (ctx: context) (approx: *approx): *(bool, approx) =
         let pvals = map (eval_c ctx.p) approx
         let derivvals = map (evaln_c ctx.deriv (PolyN - 1)) approx
-        let quos = map2 (c64./) pvals derivvals
+        let quos = map2 (cplx./) pvals derivvals
         let sums = map (\i ->
-                            reduce_comm (c64.+) (c64.mk_re 0.0)
+                            reduce_comm (cplx.+) (cplx.mk_re 0.0)
                                 (map (\j ->
-                                        if i == j then c64.mk_re 0.0
-                                        else c64.mk_re 1.0 c64./
-                                                (approx[i] c64.- approx[j]))
+                                        if i == j then cplx.mk_re 0.0
+                                        else cplx.mk_re 1.0 cplx./
+                                                (approx[i] cplx.- approx[j]))
                                      (0..<N)))
                        (0..<N)
-        let offsets = map2 (\quo sum -> quo c64./ (c64.mk_re 1.0 c64.- quo c64.* sum))
+        let offsets = map2 (\quo sum -> quo cplx./ (cplx.mk_re 1.0 cplx.- quo cplx.* sum))
                            quos sums
-        let approx = map2 (c64.-) approx offsets
-        let svals = map (eval_d ctx.bound <-< c64.mag) approx
-        let conditions = map2 (\p s -> c64.mag p <= 1e-9 * s) pvals svals
+        let approx = map2 (cplx.-) approx offsets
+        let svals = map (eval_d ctx.bound <-< cplx.mag) approx
+        let conditions = map2 (\p s -> cplx.mag p <= 1e-9 * s) pvals svals
         let all_converged = all id conditions
         in (all_converged, approx)
 
@@ -120,11 +120,11 @@ let next_derbyshire (p: *poly): *(bool, poly) =
 
 let derbyshire_at_index (index: i32): *poly =
     let bitfield = (index << 1) + 1
-    in tabulate PolyN (\i -> f64.i32 (i32.get_bit i bitfield * 2 - 1))
+    in tabulate PolyN (\i -> f32.i32 (i32.get_bit i bitfield * 2 - 1))
 
 
-let calc_index (value: f64) (left: f64) (right: f64) (steps: i32): i32 =
-    t64 ((value - left) / (right - left) * (r64 steps - 1) + 0.5)
+let calc_index (value: f32) (left: f32) (right: f32) (steps: i32): i32 =
+    i32.f32 ((value - left) / (right - left) * (f32.i32 steps - 1) + 0.5)
 
 let point_index
         (width: i32) (height: i32)
@@ -133,8 +133,8 @@ let point_index
         : i32 =
     -- Range for 'yi' is reversed because image coordinates go down in the y
     -- direction, while complex coordinates go up in the y direction
-    let xi = calc_index (c64.re pt) (c64.re bottom_left) (c64.re top_right) width
-    let yi = calc_index (c64.im pt) (c64.im top_right) (c64.im bottom_left) height
+    let xi = calc_index (cplx.re pt) (cplx.re bottom_left) (cplx.re top_right) width
+    let yi = calc_index (cplx.im pt) (cplx.im top_right) (cplx.im bottom_left) height
     in if 0 <= xi && xi < width && 0 <= yi && yi < height
         then width * yi + xi
         else -1
@@ -142,13 +142,13 @@ let point_index
 entry main_job
         (start_index: i32) (num_polys: i32)
         (width: i32) (height: i32)
-        (left: f64) (top: f64) (right: f64) (bottom: f64)
+        (left: f32) (top: f32) (right: f32) (bottom: f32)
         (seed: i32)
         : []i32 =
     -- Unnecessary to give each polynomial a different seed
     let rng = rand_engine.rng_from_seed [seed]
-    let bottom_left = c64.mk left bottom
-    let top_right = c64.mk right top
+    let bottom_left = cplx.mk left bottom
+    let top_right = cplx.mk right top
     let indices = flatten
             (map (\idx ->
                     let p = derbyshire_at_index idx
@@ -163,7 +163,7 @@ entry main_job
 
 entry main_all
         (width: i32) (height: i32)
-        (left: f64) (top: f64) (right: f64) (bottom: f64)
+        (left: f32) (top: f32) (right: f32) (bottom: f32)
         (seed: i32)
         : []i32 =
     main_job 0 (1 << N) width height left top right bottom seed
-- 
cgit v1.2.3-70-g09d2