1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
|
import "lib/github.com/diku-dk/complex/complex"
import "lib/github.com/diku-dk/cpprandom/random"
module rand_engine = minstd_rand
module uniform_real = uniform_real_distribution f32 rand_engine
module cplx = mk_complex f32
type complex = cplx.complex
let N = 22i32
let PolyN = N + 1
type poly = [PolyN]f32
-- First element of pair steps fastest
let iota2 (n: i32) (m: i32): [](i32, i32) =
flatten (map (\y -> map (\x -> (x, y)) (iota n)) (iota m))
let evaln_c (p: poly) (nterms: i32) (pt: complex): complex =
foldr (\coef accum -> cplx.mk_re coef cplx.+ pt cplx.* accum)
(cplx.mk_re p[nterms-1]) (take (nterms - 1) p)
let eval_c (p: poly) (pt: complex): complex = evaln_c p (length p) pt
let evaln_d (p: poly) (nterms: i32) (pt: f32): f32 =
foldr (\coef accum -> coef + pt * accum)
p[nterms-1] (take (nterms - 1) p)
let eval_d (p: poly) (pt: f32): f32 = evaln_d p (length p) pt
let derivative (p: poly): *poly =
map (\(i, v) -> f32.i32 i * v) (zip (1..<PolyN) (tail p)) ++ [0]
-- Cauchy's bound: https://en.wikipedia.org/wiki/Geometrical_properties_of_polynomial_roots#Lagrange's_and_Cauchy's_bounds
let max_root_norm (p: poly): f32 =
1 + f32.maximum (map (\coef -> f32.abs (coef / p[PolyN-1])) (init p))
module aberth = {
type approx = [N]complex
-- bound is 's' in the stop condition formulated at p.189-190 of
-- https://link.springer.com/article/10.1007%2FBF02207694
type context = {p: poly, deriv: poly, bound: poly, radius: f32}
let gen_coord (r: f32) (rng: *rand_engine.rng): *(rand_engine.rng, f32) =
uniform_real.rand (-r, r) rng
let gen_coord_c (r: f32) (rng: rand_engine.rng): (rand_engine.rng, complex) =
let (rng, x) = gen_coord r rng
let (rng, y) = gen_coord r rng
in (rng, cplx.mk x y)
let generate (ctx: context) (rng: *rand_engine.rng): *(rand_engine.rng, approx) =
let rngs = rand_engine.split_rng N rng
let (rngs, approx) = unzip (map (gen_coord_c ctx.radius) rngs)
let rng = rand_engine.join_rng rngs
in (rng, approx)
let compute_bound_poly (p: poly): *poly =
map2 (\coef i -> f32.abs coef * f32.i32 (4 * i + 1)) p (0..<PolyN)
let initialise (p: *poly): *context =
let deriv = derivative p
let bound = compute_bound_poly p
let radius = max_root_norm p
in {p, deriv, bound, radius}
-- Jacobi-style step where the new elements are computed in parallel from
-- the previous values
let step (ctx: context) (approx: *approx): *(bool, approx) =
let pvals = map (eval_c ctx.p) approx
let derivvals = map (evaln_c ctx.deriv (PolyN - 1)) approx
let quos = map2 (cplx./) pvals derivvals
let sums = map (\i ->
reduce_comm (cplx.+) (cplx.mk_re 0.0)
(map (\j ->
if i == j then cplx.mk_re 0.0
else cplx.mk_re 1.0 cplx./
(approx[i] cplx.- approx[j]))
(0..<N)))
(0..<N)
let offsets = map2 (\quo sum -> quo cplx./ (cplx.mk_re 1.0 cplx.- quo cplx.* sum))
quos sums
let approx = map2 (cplx.-) approx offsets
let svals = map (eval_d ctx.bound <-< cplx.mag) approx
let conditions = map2 (\p s -> cplx.mag p <= 1e-5 * s) pvals svals
let all_converged = all id conditions
in (all_converged, approx)
let iterate (ctx: context) (rng: *rand_engine.rng): (rand_engine.rng, *approx) =
let (rng, approx) = generate ctx rng
let (init_conv, approx) = step ctx approx
let (rng, _, _, _, approx) =
loop (rng, conv, tries, step_idx, approx) =
(rng, init_conv, 1, 1: i32, approx)
while !conv
do if step_idx + 1 > tries * 100
then let (rng, approx) = generate ctx rng
let (conv, approx) = step ctx approx
in (rng, conv, tries + 1, 0, approx)
else let (conv, approx) = step ctx approx
in (rng, conv, tries, step_idx + 1, approx)
in (rng, approx)
let aberth (p: *poly) (rng: *rand_engine.rng): *(rand_engine.rng, approx) =
iterate (initialise p) rng
}
-- Set the constant coefficient to 1; nextDerbyshire will never change it
let init_derbyshire: poly =
[1] ++ replicate (PolyN - 1) (-1)
let next_derbyshire (p: *poly): *(bool, poly) =
let (_, p, looped) =
loop (i, p, cont) = (0, p, true)
while cont && i < length p
do if p[i] == -1
then (i, p with [i] = 1, false)
else (i + 1, p with [i] = -1, true)
in (looped, p)
let derbyshire_at_index (index: i32): *poly =
let bitfield = (index << 1) + 1
in tabulate PolyN (\i -> f32.i32 (i32.get_bit i bitfield * 2 - 1))
let calc_index (value: f32) (left: f32) (right: f32) (steps: i32): i32 =
i32.f32 ((value - left) / (right - left) * (f32.i32 steps - 1) + 0.5)
let point_index
(width: i32) (height: i32)
(bottom_left: complex) (top_right: complex)
(pt: complex)
: i32 =
-- Range for 'yi' is reversed because image coordinates go down in the y
-- direction, while complex coordinates go up in the y direction
let xi = calc_index (cplx.re pt) (cplx.re bottom_left) (cplx.re top_right) width
let yi = calc_index (cplx.im pt) (cplx.im top_right) (cplx.im bottom_left) height
in if 0 <= xi && xi < width && 0 <= yi && yi < height
then width * yi + xi
else -1
entry main_job
(start_index: i32) (num_polys: i32)
(width: i32) (height: i32)
(left: f32) (top: f32) (right: f32) (bottom: f32)
(seed: i32)
: []i32 =
-- Unnecessary to give each polynomial a different seed
let rng = rand_engine.rng_from_seed [seed]
let bottom_left = cplx.mk left bottom
let top_right = cplx.mk right top
let indices = flatten
(map (\idx ->
let p = derbyshire_at_index idx
let (_, pts) = aberth.aberth p rng
in map (point_index width height bottom_left top_right) pts)
(start_index ..< start_index + num_polys))
let filtered = filter (\i -> i != -1) indices
in reduce_by_index (replicate (width * height) 0)
(+) 0
filtered
(replicate (length filtered) 1)
entry main_all
(width: i32) (height: i32)
(left: f32) (top: f32) (right: f32) (bottom: f32)
(seed: i32)
: []i32 =
main_job 0 (1 << N) width height left top right bottom seed
entry get_N: i32 = N
|