aberth/aberth_kernel.fut


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164

import "lib/github.com/diku-dk/complex/complex"
import "lib/github.com/diku-dk/cpprandom/random"

module uniform_real = uniform_real_distribution f64 minstd_rand
module rand_engine = minstd_rand

module c64 = mk_complex f64
type complex = c64.complex

let N = 18i32
let PolyN = 19i32

type poly = [PolyN]f64

-- First element of pair steps fastest
let iota2 (n: i32) (m: i32): [](i32, i32) =
    flatten (map (\y -> map (\x -> (x, y)) (iota n)) (iota m))

let evaln_c (p: poly) (nterms: i32) (pt: complex): complex =
    foldr (\coef accum -> c64.mk_re coef c64.+ pt c64.* accum)
          (c64.mk_re p[nterms-1]) (take (nterms - 1) p)

let eval_c (p: poly) (pt: complex): complex = evaln_c p (length p) pt

let evaln_d (p: poly) (nterms: i32) (pt: f64): f64 =
    foldr (\coef accum -> coef + pt * accum)
          p[nterms-1] (take (nterms - 1) p)

let eval_d (p: poly) (pt: f64): f64 = evaln_d p (length p) pt

let derivative (p: poly): *poly = 
    map (\(i, v) -> f64.i32 i * v) (zip (1..<PolyN) (drop 1 p)) ++ [0]

-- Cauchy's bound: https://en.wikipedia.org/wiki/Geometrical_properties_of_polynomial_roots#Lagrange's_and_Cauchy's_bounds
let max_root_norm (p: poly): f64 =
    1 + f64.maximum (map (\coef -> f64.abs (coef / p[PolyN-1])) p)

module aberth = {
    type approx = [N]complex
    -- bound is 's' in the stop condition formulated at p.189-190 of
    -- https://link.springer.com/article/10.1007%2FBF02207694
    type context = {p: poly, deriv: poly, bound: poly, radius: f64}

    let gen_coord (r: f64) (rng: *rand_engine.rng): *(rand_engine.rng, f64) =
        uniform_real.rand (-r, r) rng

    let gen_coord_c (r: f64) (rng: rand_engine.rng): (rand_engine.rng, complex) =
        let (rng, x) = gen_coord r rng
        let (rng, y) = gen_coord r rng
        in (rng, c64.mk x y)

    let generate (ctx: context) (rng: *rand_engine.rng): *(rand_engine.rng, approx) =
        let rngs = rand_engine.split_rng N rng
        let (rngs, approx) = unzip (map (gen_coord_c ctx.radius) rngs)
        let rng = rand_engine.join_rng rngs
        in (rng, approx)

    let compute_bound_poly (p: poly): *poly =
        map2 (\coef i -> f64.abs coef * f64.i32 (4 * i + 1)) p (0..<PolyN)

    let initialise (p: *poly): *context =
        let deriv = derivative p
        let bound = compute_bound_poly p
        let radius = max_root_norm p
        in {p, deriv, bound, radius}

    -- Lagrange-style step where the new elements are computed in parallel from
    -- the previous values
    let step (ctx: context) (approx: *approx): *(bool, approx) =
        let pvals = map (eval_c ctx.p) approx
        let derivvals = map (evaln_c ctx.deriv (PolyN - 1)) approx
        let quos = map2 (c64./) pvals derivvals
        let sums = map (\i ->
                            reduce_comm (c64.+) (c64.mk_re 0.0)
                                (map (\j ->
                                        if i == j then c64.mk_re 0.0
                                        else c64.mk_re 1.0 c64./
                                                (approx[i] c64.- approx[j]))
                                     (0..<N)))
                       (0..<N)
        let offsets = map2 (\quo sum -> quo c64./ (c64.mk_re 1.0 c64.- quo c64.* sum))
                           quos sums
        let approx = map2 (c64.-) approx offsets
        let svals = map (eval_d ctx.bound <-< c64.mag) approx
        let conditions = map2 (\p s -> c64.mag p <= 1e-9 * s) pvals svals
        let all_converged = all id conditions
        in (all_converged, approx)

    let iterate (ctx: context) (rng: *rand_engine.rng): (rand_engine.rng, *approx) =
        let (rng, approx) = generate ctx rng
        let (init_conv, approx) = step ctx approx
        let (rng, _, _, _, approx) =
            loop (rng, conv, tries, step_idx, approx) =
                    (rng, init_conv, 1, 1: i32, approx)
            while !conv
            do if step_idx + 1 > tries * 100
               then let (rng, approx) = generate ctx rng
                    let (conv, approx) = step ctx approx
                    in (rng, conv, tries + 1, 0, approx)
               else let (conv, approx) = step ctx approx
                    in (rng, conv, tries, step_idx + 1, approx)
        in (rng, approx)

    let aberth (p: *poly) (rng: *rand_engine.rng): *(rand_engine.rng, approx) =
        iterate (initialise p) rng
}

-- Set the constant coefficient to 1; nextDerbyshire will never change it
let init_derbyshire: poly =
    [1] ++ replicate (PolyN - 1) (-1)

let next_derbyshire (p: *poly): *(bool, poly) =
    let (_, p, looped) =
        loop (i, p, cont) = (0, p, true)
        while cont && i < length p
        do if p[i] == -1
           then (i,     p with [i] =  1, false)
           else (i + 1, p with [i] = -1, true)
    in (looped, p)

let derbyshire_at_index (index: i32): *poly =
    let bitfield = (index << 1) + 1
    in tabulate PolyN (\i -> f64.i32 (i32.get_bit i bitfield * 2 - 1))


let calc_index (value: f64) (left: f64) (right: f64) (steps: i32): i32 =
    t64 ((value - left) / (right - left) * (r64 steps - 1) + 0.5)

let point_index
        (width: i32) (height: i32)
        (top_left: complex) (bottom_right: complex)
        (pt: complex)
        : i32 =
    let xi = calc_index (c64.re pt) (c64.re top_left) (c64.re bottom_right) width
    let yi = calc_index (c64.im pt) (c64.im bottom_right) (c64.im top_left) height
    in if 0 <= xi && xi < width && 0 <= yi && yi < height
        then width * yi + xi
        else -1

entry main_job
        (start_index: i32) (num_polys: i32)
        (width: i32) (height: i32)
        (left: f64) (top: f64) (right: f64) (bottom: f64)
        (seed: i32)
        : []i32 =
    -- Unnecessary to give each polynomial a different seed
    let rng = rand_engine.rng_from_seed [seed]
    let top_left = c64.mk left top
    let bottom_right = c64.mk right bottom
    let indices = filter (\i -> i != -1) (flatten
            (map (\idx ->
                    let p = derbyshire_at_index idx
                    let (_, pts) = aberth.aberth p rng
                    let indices = map (point_index width height top_left bottom_right) pts
                    in indices)
                 (start_index ..< start_index + num_polys)))
    in reduce_by_index (replicate (width * height) 0) (+) 0 indices (replicate (length indices) 1)

entry main_all
        (width: i32) (height: i32)
        (left: f64) (top: f64) (right: f64) (bottom: f64)
        (seed: i32)
        : []i32 =
    main_job 0 (1 << N) width height left top right bottom seed