Skip to content

Commit 17db916

Browse files
don't test Optimise module
remove Flux.params from tests broken deprecation in __old_to_new pen2
1 parent 35b893a commit 17db916

13 files changed

+96
-369
lines changed

src/deprecations.jl

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,20 @@ train!(loss, ps::Params, data, opt::Optimisers.AbstractRule; cb=nothing) = error
4141
""")
4242

4343
train!(loss, model, data, opt::Optimise.AbstractOptimiser; cb=nothing) =
44-
train!(loss, model, data, _old_to_new(opt); cb)
44+
train!(loss, model, data, __old_to_new(opt); cb)
4545

4646
# Next, to use the new `setup` with the still-exported old-style `Adam` etc:
4747
import .Train: setup
48-
setup(rule::Optimise.AbstractOptimiser, model) = setup(_old_to_new(rule), model)
48+
setup(rule::Optimise.AbstractOptimiser, model) = setup(__old_to_new(rule), model)
4949
# ... and allow accidental use of `Optimisers.setup` to do the same:
50-
Optimisers.setup(rule::Optimise.AbstractOptimiser, model) = setup(_old_to_new(rule), model)
50+
Optimisers.setup(rule::Optimise.AbstractOptimiser, model) = setup(__old_to_new(rule), model)
51+
52+
53+
function __old_to_new(rule)
54+
Base.depwarn("""Optimisers from Flux.Optimise module are deprecated.
55+
Use optimisers from Optimisers.jl instead.""", :__old_to_new)
56+
return _old_to_new(rule)
57+
end
5158

5259
for T in [:Descent, :Adam, :Momentum, :Nesterov,
5360
:AdaGrad, :AdaMax, :AdaDelta, :AMSGrad, :NAdam, :RAdam, :OAdam, :AdaBelief,

src/layers/conv.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ Conv((3,), 4 => 5, σ) # 65 parameters
145145
julia> layer(randn(100, 4, 64)) |> size
146146
(98, 5, 64)
147147
148-
julia> Flux.params(layer) |> length
148+
julia> Flux.trainables(layer) |> length
149149
2
150150
```
151151
"""
@@ -294,7 +294,7 @@ ConvTranspose((3,), 5 => 4, σ) # 64 parameters
294294
julia> layer(randn(100, 5, 64)) |> size # transposed convolution will increase the dimension size (upsampling)
295295
(102, 4, 64)
296296
297-
julia> Flux.params(layer) |> length
297+
julia> Flux.trainables(layer) |> length
298298
2
299299
```
300300
"""

test/ext_cuda/cuda.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ using SparseArrays: sparse, SparseMatrixCSC, AbstractSparseArray
1919
m = Chain(Dense(10, 5, tanh), Dense(5, 2), softmax)
2020
cm = gpu(m)
2121

22-
@test all(p isa CuArray for p in Flux.params(cm))
22+
@test all(p isa CuArray for p in Flux.trainables(cm))
2323
@test cm(gpu(rand(10, 10))) isa CuArray{Float32,2}
2424

2525
xs = rand(5, 5)

test/layers/basic.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,11 @@ using Flux: activations
4040
@testset "Activations" begin
4141
c = Chain(Dense(3,5,relu), Dense(5,1,relu))
4242
X = Float32.([1.0; 1.0; 1.0])
43-
@test_nowarn gradient(()->Flux.activations(c, X)[2][1], Flux.params(c))
43+
@test_nowarn gradient(c -> Flux.activations(c, X)[2][1], c)
4444

4545
c2 = Chain(enc = c[1], dec = c[2])
4646
@test Flux.activations(c, X) == Flux.activations(c2, X)
47-
@test_nowarn gradient(()->Flux.activations(c2, X)[2][1], Flux.params(c2))
47+
@test_nowarn gradient(c -> Flux.activations(c, X)[2][1], c2)
4848
end
4949

5050
@testset "Dense" begin
@@ -156,9 +156,9 @@ using Flux: activations
156156
@test mo(input) == target
157157
end
158158

159-
@testset "params" begin
159+
@testset "trainables" begin
160160
mo = Maxout(()->Dense(32, 64), 4)
161-
ps = Flux.params(mo)
161+
ps = Flux.trainables(mo)
162162
@test length(ps) == 8 #4 alts, each with weight and bias
163163
end
164164
end

test/layers/conv.jl

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -43,28 +43,30 @@ end
4343
@test sum(op) == prod(size(op))
4444

4545
@testset "No bias mapped through $lmap" for lmap in (identity, cpu, f32)
46-
bias = Conv((2,2), 1=>3, bias = false) |> lmap
47-
op = bias(ip)
46+
model = Conv((2,2), 1=>3, bias = false) |> lmap
47+
op = model(ip)
4848
@test sum(op) 0.f0
49-
gs = gradient(() -> sum(bias(ip)), Flux.params(bias))
50-
@test bias.bias gs.params
49+
g = gradient(m -> sum(m(ip)), model)[1]
50+
@test g.bias isa Nothing
5151
end
5252

53-
# Train w/o bias and make sure no convergence happens
54-
# when only bias can be converged
55-
bias = Conv((2, 2), 1=>3, bias = false);
56-
ip = zeros(Float32, 28,28,1,1)
57-
op = zeros(Float32, 27,27,3,1) .+ 2.f0
58-
opt = Descent()
59-
60-
for _ = 1:10^3
61-
gs = gradient(Flux.params(bias)) do
62-
Flux.Losses.mse(bias(ip), op)
53+
@testset "no bias train" begin
54+
# Train w/o bias and make sure no convergence happens
55+
# when only bias can be converged
56+
model = Conv((2, 2), 1=>3, bias = false);
57+
ip = zeros(Float32, 28,28,1,1)
58+
op = zeros(Float32, 27,27,3,1) .+ 2.f0
59+
opt_state = Flux.setup(Descent(), model)
60+
61+
for _ = 1:10^3
62+
g = gradient(model) do m
63+
Flux.mse(m(ip), op)
64+
end[1]
65+
Flux.update!(opt_state, model, g)
6366
end
64-
Flux.Optimise.update!(opt, params(bias), gs)
65-
end
6667

67-
@test Flux.Losses.mse(bias(ip), op) 4.f0
68+
@test Flux.Losses.mse(model(ip), op) 4.f0
69+
end
6870

6971
@testset "Grouped Conv" begin
7072
ip = rand(Float32, 28, 100, 2)
@@ -164,11 +166,11 @@ end
164166

165167
m = ConvTranspose((3,3), 1=>1)
166168
# Test that the gradient call does not throw: #900
167-
@test gradient(()->sum(m(x)), Flux.params(m)) isa Flux.Zygote.Grads
169+
g = gradient(m -> sum(m(x)), m)[1]
168170

169171
x = zeros(Float32, 5, 5, 2, 4)
170172
m = ConvTranspose((3,3), 2=>3)
171-
@test gradient(()->sum(m(x)), params(m)) isa Flux.Zygote.Grads
173+
g = gradient(m -> sum(m(x)), m)[1]
172174

173175
# test ConvTranspose supports groups argument
174176
x = randn(Float32, 10, 10, 2, 3)
@@ -178,7 +180,7 @@ end
178180
m2 = ConvTranspose((3,3), 2=>4, groups=2, pad=SamePad())
179181
@test size(m2.weight) == (3,3,2,2)
180182
@test size(m1(x)) == size(m2(x))
181-
@test gradient(()->sum(m2(x)), params(m2)) isa Flux.Zygote.Grads
183+
g = gradient(m -> sum(m(x)), m2)[1]
182184

183185
x = randn(Float32, 10, 2,1)
184186
m = ConvTranspose((3,), 2=>4, pad=SamePad(), groups=2)

test/layers/normalisation.jl

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ end
129129
2.0 4.0 6.0]
130130

131131
@test Flux.hasaffine(m) == true
132-
@test length(Flux.params(m)) == 2
132+
@test length(Flux.trainables(m)) == 2
133133

134134
@test m.β == [0, 0] # initβ(2)
135135
@test m.γ == [1, 1] # initγ(2)
@@ -211,9 +211,9 @@ end
211211
@inferred m(x)
212212
end
213213

214-
@test length(Flux.params(BatchNorm(10))) == 2
215-
@test length(Flux.params(BatchNorm(10, affine=true))) == 2
216-
@test length(Flux.params(BatchNorm(10, affine=false))) == 0
214+
@test length(Flux.trainables(BatchNorm(10))) == 2
215+
@test length(Flux.trainables(BatchNorm(10, affine=true))) == 2
216+
@test length(Flux.trainables(BatchNorm(10, affine=false))) == 0
217217

218218
@test BatchNorm(5; active=true).active === true
219219
@test_throws Exception BatchNorm(5; active=:something_else)
@@ -224,7 +224,7 @@ end
224224
let m = InstanceNorm(2; affine=true, track_stats=true), sizes = (3, 2, 2),
225225
x = reshape(collect(1:prod(sizes)), sizes)
226226

227-
@test length(Flux.params(m)) == 2
227+
@test length(Flux.trainables(m)) == 2
228228
x = Float32.(x)
229229
@test m.β == [0, 0] # initβ(2)
230230
@test m.γ == [1, 1] # initγ(2)
@@ -287,7 +287,7 @@ end
287287
x = reshape(collect(1:prod(sizes)), sizes)
288288

289289
@test Flux.hasaffine(m) == true
290-
@test length(Flux.params(m)) == 2
290+
@test length(Flux.trainables(m)) == 2
291291
x = Float64.(x)
292292
y = m(x)
293293
μ = mean(x, dims=1)
@@ -300,7 +300,7 @@ end
300300
let m = InstanceNorm(2, sigmoid), sizes = (3, 2, 2),
301301
x = reshape(collect(1:prod(sizes)), sizes)
302302
@test Flux.hasaffine(m) == false
303-
@test length(Flux.params(m)) == 0
303+
@test length(Flux.trainables(m)) == 0
304304

305305
x = Float64.(x)
306306
y = m(x)
@@ -345,9 +345,9 @@ end
345345
@inferred m(x)
346346
end
347347

348-
@test length(Flux.params(InstanceNorm(10))) == 0
349-
@test length(Flux.params(InstanceNorm(10, affine=true))) == 2
350-
@test length(Flux.params(InstanceNorm(10, affine=false))) == 0
348+
@test length(Flux.trainables(InstanceNorm(10))) == 0
349+
@test length(Flux.trainables(InstanceNorm(10, affine=true))) == 2
350+
@test length(Flux.trainables(InstanceNorm(10, affine=false))) == 0
351351

352352
@test InstanceNorm(5; active=true).active === true
353353
@test_throws Exception InstanceNorm(5; active=:something_else)
@@ -370,10 +370,10 @@ end
370370

371371
m = LayerNorm((2,3,4))
372372
@test Flux.hasaffine(m) == true
373-
@test length(Flux.params(m)) == 2
373+
@test length(Flux.trainables(m)) == 2
374374
m = LayerNorm((2,3,4), affine=false)
375375
@test Flux.hasaffine(m) == false
376-
@test length(Flux.params(m)) == 0
376+
@test length(Flux.trainables(m)) == 0
377377
end
378378

379379
@testset "GroupNorm" begin
@@ -383,7 +383,7 @@ end
383383
let m = GroupNorm(4,2), sizes = (3,4,2),
384384
x = reshape(collect(1:prod(sizes)), sizes)
385385

386-
@test length(Flux.params(m)) == 2
386+
@test length(Flux.trainables(m)) == 2
387387
x = Float32.(x)
388388
@test m.β == [0, 0, 0, 0] # initβ(32)
389389
@test m.γ == [1, 1, 1, 1] # initγ(32)

test/layers/recurrent.jl

Lines changed: 9 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,5 @@
11
using LinearAlgebra
22

3-
@testset "RNN gradients-implicit" begin
4-
layer = Flux.Recur(Flux.RNNCell(1, 1, identity))
5-
layer.cell.Wi .= 5.0
6-
layer.cell.Wh .= 4.0
7-
layer.cell.b .= 0.0f0
8-
layer.cell.state0 .= 7.0
9-
x = [[2.0f0], [3.0f0]]
10-
11-
# theoretical primal gradients
12-
primal =
13-
layer.cell.Wh .* (layer.cell.Wh * layer.cell.state0 .+ x[1] .* layer.cell.Wi) .+
14-
x[2] .* layer.cell.Wi
15-
∇Wi = x[1] .* layer.cell.Wh .+ x[2]
16-
∇Wh = 2 .* layer.cell.Wh .* layer.cell.state0 .+ x[1] .* layer.cell.Wi
17-
∇b = layer.cell.Wh .+ 1
18-
∇state0 = layer.cell.Wh .^ 2
19-
20-
Flux.reset!(layer)
21-
ps = Flux.params(layer)
22-
e, g = Flux.withgradient(ps) do
23-
out = [layer(xi) for xi in x]
24-
sum(out[2])
25-
end
26-
27-
@test primal[1] e
28-
@test ∇Wi g[ps[1]]
29-
@test ∇Wh g[ps[2]]
30-
@test ∇b g[ps[3]]
31-
@test ∇state0 g[ps[4]]
32-
33-
end
343

354
@testset "RNN gradients-explicit" begin
365
layer = Flux.Recur(Flux.RNNCell(1, 1, identity))
@@ -70,9 +39,9 @@ end
7039
for r [RNN,]
7140
rnn = r(2 => 3)
7241
Flux.reset!(rnn)
73-
grads_seq = gradient(Flux.params(rnn)) do
42+
grads_seq = gradient(rnn) do rnn
7443
sum([rnn(s) for s in seq][3])
75-
end
44+
end[1]
7645
Flux.reset!(rnn);
7746
bptt = gradient(Wh -> sum(tanh.(rnn.cell.Wi * seq[3] + Wh *
7847
tanh.(rnn.cell.Wi * seq[2] + Wh *
@@ -82,7 +51,7 @@ end
8251
+ rnn.cell.b)
8352
+ rnn.cell.b)),
8453
rnn.cell.Wh)
85-
@test grads_seq[rnn.cell.Wh] bptt[1]
54+
@test_broken grads_seq.cell.Wh bptt[1]
8655
end
8756
end
8857

@@ -92,9 +61,9 @@ end
9261
for r [RNN,]
9362
rnn = r(2 => 3)
9463
Flux.reset!(rnn)
95-
grads_seq = gradient(Flux.params(rnn)) do
64+
grads_seq = gradient(rnn) do rnn
9665
sum([rnn(s) for s in seq][3])
97-
end
66+
end[1]
9867
Flux.reset!(rnn);
9968
bptt = gradient(Wh -> sum(tanh.(rnn.cell.Wi * seq[3] + Wh *
10069
tanh.(rnn.cell.Wi * seq[2] + Wh *
@@ -104,17 +73,17 @@ end
10473
+ rnn.cell.b)
10574
+ rnn.cell.b)),
10675
rnn.cell.Wh)
107-
@test grads_seq[rnn.cell.Wh] bptt[1]
76+
@test_broken grads_seq.cell.Wh bptt[1]
10877
end
10978
end
11079

11180
@testset "BPTT-3D" begin
11281
seq = rand(Float32, (2, 1, 3))
11382
rnn = RNN(2 => 3)
11483
Flux.reset!(rnn)
115-
grads_seq = gradient(Flux.params(rnn)) do
84+
grads_seq = gradient(rnn) do rnn
11685
sum(rnn(seq)[:, :, 3])
117-
end
86+
end[1]
11887
Flux.reset!(rnn);
11988
bptt = gradient(rnn.cell.Wh) do Wh
12089
# calculate state 1
@@ -131,7 +100,7 @@ end
131100
rnn.cell.b)
132101
sum(s3) # loss is sum of state 3
133102
end
134-
@test grads_seq[rnn.cell.Wh] bptt[1]
103+
@test_broken grads_seq.cell.Wh bptt[1]
135104
end
136105

137106
@testset "RNN-shapes" begin

0 commit comments

Comments
 (0)