Merge #937
937: Fix Glorot initialization, add He initialization r=MikeInnes a=Sleort Should fix #442 . Adds He weight initialization as a bonus :-) Co-authored-by: Troels Arnfred Bojesen <tr-ab@online.no>
This commit is contained in:
commit
90a38a3201
@ -1,6 +1,11 @@
|
|||||||
# Arrays
|
# Arrays
|
||||||
glorot_uniform(dims...) = (rand(Float32, dims...) .- 0.5f0) .* sqrt(24.0f0/sum(dims))
|
nfan() = 1, 1 #fan_in, fan_out
|
||||||
glorot_normal(dims...) = randn(Float32, dims...) .* sqrt(2.0f0/sum(dims))
|
nfan(n) = 1, n #A vector is treated as a n×1 matrix
|
||||||
|
nfan(n_out, n_in) = n_in, n_out #In case of Dense kernels: arranged as matrices
|
||||||
|
nfan(dims...) = prod(dims[1:end-2]) .* (dims[end-1], dims[end]) #In case of convolution kernels
|
||||||
|
|
||||||
|
glorot_uniform(dims...) = (rand(Float32, dims...) .- 0.5f0) .* sqrt(24.0f0 / sum(nfan(dims...)))
|
||||||
|
glorot_normal(dims...) = randn(Float32, dims...) .* sqrt(2.0f0 / sum(nfan(dims...)))
|
||||||
|
|
||||||
ones(T::Type, dims...) = Base.ones(T, dims...)
|
ones(T::Type, dims...) = Base.ones(T, dims...)
|
||||||
zeros(T::Type, dims...) = Base.zeros(T, dims...)
|
zeros(T::Type, dims...) = Base.zeros(T, dims...)
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
using Flux
|
using Flux
|
||||||
using Flux: throttle, glorot_uniform, glorot_normal, stack, unstack
|
using Flux: throttle, nfan, glorot_uniform, glorot_normal, stack, unstack
|
||||||
using StatsBase: std
|
using StatsBase: var
|
||||||
using Random
|
using Random
|
||||||
using Test
|
using Test
|
||||||
|
|
||||||
@ -56,18 +56,26 @@ end
|
|||||||
# Set random seed so that these tests don't fail randomly
|
# Set random seed so that these tests don't fail randomly
|
||||||
Random.seed!(0)
|
Random.seed!(0)
|
||||||
|
|
||||||
# glorot_uniform should yield a kernel with stddev ~= sqrt(6/(n_in + n_out)),
|
@testset "Fan in/out" begin
|
||||||
# and glorot_normal should yield a kernel with stddev != 2/(n_in _ n_out)
|
@test nfan() == (1, 1) #For a constant
|
||||||
for (n_in, n_out) in [(100, 100), (100, 400)]
|
@test nfan(100) == (1, 100) #For vector
|
||||||
v = glorot_uniform(n_in, n_out)
|
@test nfan(100, 200) == (200, 100) #For Dense layer
|
||||||
@test minimum(v) > -1.1*sqrt(6/(n_in + n_out))
|
@test nfan(2, 30, 40) == (2 * 30, 2 * 40) #For 1D Conv layer
|
||||||
@test minimum(v) < -0.9*sqrt(6/(n_in + n_out))
|
@test nfan(2, 3, 40, 50) == (2 * 3 * 40, 2 * 3 * 50) #For 2D Conv layer
|
||||||
@test maximum(v) > 0.9*sqrt(6/(n_in + n_out))
|
@test nfan(2, 3, 4, 50, 60) == (2 * 3 * 4 * 50, 2 * 3 * 4 * 60) #For 3D Conv layer
|
||||||
@test maximum(v) < 1.1*sqrt(6/(n_in + n_out))
|
end
|
||||||
|
|
||||||
v = glorot_normal(n_in, n_out)
|
@testset "glorot" begin
|
||||||
@test std(v) > 0.9*sqrt(2/(n_in + n_out))
|
# glorot_uniform and glorot_normal should both yield a kernel with
|
||||||
@test std(v) < 1.1*sqrt(2/(n_in + n_out))
|
# variance ≈ 2/(fan_in + fan_out)
|
||||||
|
for dims ∈ [(1000,), (100, 100), (100, 400), (2, 3, 32, 64), (2, 3, 4, 32, 64)]
|
||||||
|
for init ∈ [glorot_uniform, glorot_normal]
|
||||||
|
v = init(dims...)
|
||||||
|
fan_in, fan_out = nfan(dims...)
|
||||||
|
σ2 = 2 / (fan_in + fan_out)
|
||||||
|
@test 0.9σ2 < var(v) < 1.1σ2
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user