Updated tests of normalisation layers.

This commit is contained in:
thebhatman 2019-06-11 20:05:07 +05:30
parent a782524a0e
commit 94a2d1987d

View File

@ -27,286 +27,254 @@ trainmode(f, x...) = forward(f, x...)[1]
@test count(a->a == 0, y) == 0 @test count(a->a == 0, y) == 0
end end
# @testset "BatchNorm" begin @testset "BatchNorm" begin
# let m = BatchNorm(2), x = [1 3 5; let m = BatchNorm(2), x = [1.0 3.0 5.0;
# 2 4 6] 2.0 4.0 6.0]
#
# @test m.β.data == [0, 0] # initβ(2) @test m.β == [0, 0] # initβ(2)
# @test m.γ.data == [1, 1] # initγ(2) @test m.γ == [1, 1] # initγ(2)
# # initial m.σ is 1 # initial m.σ is 1
# # initial m.μ is 0 # initial m.μ is 0
# @test m.active
# y = trainmode(m, x)
# # @test m(x).data ≈ [-1 -1; 0 0; 1 1]' @test y [-1.22474 0 1.22474; -1.22474 0 1.22474]
# m(x) # julia> x
# # 2×3 Array{Float64,2}:
# # julia> x # 1.0 3.0 5.0
# # 2×3 Array{Float64,2}: # 2.0 4.0 6.0
# # 1.0 3.0 5.0 #
# # 2.0 4.0 6.0 # μ of batch will be
# # # (1. + 3. + 5.) / 3 = 3
# # μ of batch will be # (2. + 4. + 6.) / 3 = 4
# # (1. + 3. + 5.) / 3 = 3 #
# # (2. + 4. + 6.) / 3 = 4 # ∴ update rule with momentum:
# # # .1 * 3 + 0 = .3
# # ∴ update rule with momentum: # .1 * 4 + 0 = .4
# # .1 * 3 + 0 = .3 @test m.μ reshape([0.3, 0.4], 2, 1)
# # .1 * 4 + 0 = .4
# @test m.μ ≈ reshape([0.3, 0.4], 2, 1) # julia> .1 .* var(x, dims = 2, corrected=false) .* (3 / 2).+ .9 .* [1., 1.]
# # 2×1 Array{Float64,2}:
# # julia> .1 .* var(x, dims = 2, corrected=false) .* (3 / 2).+ .9 .* [1., 1.] # 1.3
# # 2×1 Array{Float64,2}: # 1.3
# # 1.3 @test m.σ² .1 .* var(x, dims = 2, corrected=false) .* (3 / 2).+ .9 .* [1., 1.]
# # 1.3
# @test m.σ² ≈ .1 .* var(x.data, dims = 2, corrected=false) .* (3 / 2).+ .9 .* [1., 1.] x = m(x)
# @test isapprox(x[1], (1 .- 0.3) / sqrt(1.3), atol = 1.0e-5)
# testmode!(m) end
# @test !m.active
# # with activation function
# x = m(x).data let m = BatchNorm(2, sigmoid), x = param([1.0 3.0 5.0;
# @test isapprox(x[1], (1 .- 0.3) / sqrt(1.3), atol = 1.0e-5) 2.0 4.0 6.0])
# end y = trainmode(m, x)
# y = m(x)
# # with activation function @test isapprox(y, data(sigmoid.((x .- m.μ) ./ sqrt.(m.σ² .+ m.ϵ))), atol = 1.0e-7)
# let m = BatchNorm(2, sigmoid), x = param([1 3 5; end
# 2 4 6])
# @test m.active let m = BatchNorm(2), x = param(reshape(1:6, 3, 2, 1))
# m(x) y = reshape(permutedims(x, [2, 1, 3]), 2, :)
# y = permutedims(reshape(m(y), 2, 3, 1), [2, 1, 3])
# testmode!(m) @test m(x) == y
# @test !m.active end
#
# y = m(x).data let m = BatchNorm(2), x = param(reshape(1:12, 2, 3, 2, 1))
# @test isapprox(y, data(sigmoid.((x .- m.μ) ./ sqrt.(m.σ² .+ m.ϵ))), atol = 1.0e-7) y = reshape(permutedims(x, [3, 1, 2, 4]), 2, :)
# end y = permutedims(reshape(m(y), 2, 2, 3, 1), [2, 3, 1, 4])
# @test m(x) == y
# let m = BatchNorm(2), x = param(reshape(1:6, 3, 2, 1)) end
# y = reshape(permutedims(x, [2, 1, 3]), 2, :)
# y = permutedims(reshape(m(y), 2, 3, 1), [2, 1, 3]) let m = BatchNorm(2), x = param(reshape(1:24, 2, 2, 3, 2, 1))
# @test m(x) == y y = reshape(permutedims(x, [4, 1, 2, 3, 5]), 2, :)
# end y = permutedims(reshape(m(y), 2, 2, 2, 3, 1), [2, 3, 4, 1, 5])
# @test m(x) == y
# let m = BatchNorm(2), x = param(reshape(1:12, 2, 3, 2, 1)) end
# y = reshape(permutedims(x, [3, 1, 2, 4]), 2, :)
# y = permutedims(reshape(m(y), 2, 2, 3, 1), [2, 3, 1, 4]) let m = BatchNorm(32), x = randn(Float32, 416, 416, 32, 1);
# @test m(x) == y m(x)
# end @test (@allocated m(x)) < 100_000_000
# end
# let m = BatchNorm(2), x = param(reshape(1:24, 2, 2, 3, 2, 1)) end
# y = reshape(permutedims(x, [4, 1, 2, 3, 5]), 2, :)
# y = permutedims(reshape(m(y), 2, 2, 2, 3, 1), [2, 3, 4, 1, 5]) @testset "InstanceNorm" begin
# @test m(x) == y # helper functions
# end expand_inst = (x, as) -> reshape(repeat(x, outer=[1, as[length(as)]]), as...)
# # begin tests
# let m = BatchNorm(32), x = randn(Float32, 416, 416, 32, 1); let m = InstanceNorm(2), sizes = (3, 2, 2),
# m(x) x = reshape(collect(1:prod(sizes)), sizes)
# @test (@allocated m(x)) < 100_000_000 x = Float64.(x)
# end @test m.β == [0, 0] # initβ(2)
# end @test m.γ == [1, 1] # initγ(2)
# y = trainmode(m, x)
#
# @testset "InstanceNorm" begin #julia> x
# # helper functions #[:, :, 1] =
# expand_inst = (x, as) -> reshape(repeat(x, outer=[1, as[length(as)]]), as...) # 1.0 4.0
# # begin tests # 2.0 5.0
# let m = InstanceNorm(2), sizes = (3, 2, 2), # 3.0 6.0
# x = reshape(collect(1:prod(sizes)), sizes) #
# #[:, :, 2] =
# @test m.β.data == [0, 0] # initβ(2) # 7.0 10.0
# @test m.γ.data == [1, 1] # initγ(2) # 8.0 11.0
# # 9.0 12.0
# @test m.active #
# # μ will be
# m(x) # (1. + 2. + 3.) / 3 = 2.
# # (4. + 5. + 6.) / 3 = 5.
# #julia> x #
# #[:, :, 1] = # (7. + 8. + 9.) / 3 = 8.
# # 1.0 4.0 # (10. + 11. + 12.) / 3 = 11.
# # 2.0 5.0 #
# # 3.0 6.0 # ∴ update rule with momentum:
# # # (1. - .1) * 0 + .1 * (2. + 8.) / 2 = .5
# #[:, :, 2] = # (1. - .1) * 0 + .1 * (5. + 11.) / 2 = .8
# # 7.0 10.0 @test m.μ [0.5, 0.8]
# # 8.0 11.0 # momentum * var * num_items / (num_items - 1) + (1 - momentum) * sigma_sq
# # 9.0 12.0 # julia> reshape(mean(.1 .* var(x, dims = 1, corrected=false) .* (3 / 2), dims=3), :) .+ .9 .* 1.
# # # 2-element Array{Float64,1}:
# # μ will be # 1.
# # (1. + 2. + 3.) / 3 = 2. # 1.
# # (4. + 5. + 6.) / 3 = 5. @test m.σ² reshape(mean(.1 .* var(x, dims = 1, corrected=false) .* (3 / 2), dims=3), :) .+ .9 .* 1.
# #
# # (7. + 8. + 9.) / 3 = 8. x = m(x)
# # (10. + 11. + 12.) / 3 = 11. @test isapprox(x[1], (1 - 0.5) / sqrt(1. + 1f-5), atol = 1.0e-5)
# # end
# # ∴ update rule with momentum: # with activation function
# # (1. - .1) * 0 + .1 * (2. + 8.) / 2 = .5 let m = InstanceNorm(2, sigmoid), sizes = (3, 2, 2),
# # (1. - .1) * 0 + .1 * (5. + 11.) / 2 = .8 x = reshape(collect(1:prod(sizes)), sizes)
# @test m.μ ≈ [0.5, 0.8] x = Float64.(x)
# # momentum * var * num_items / (num_items - 1) + (1 - momentum) * sigma_sq affine_shape = collect(sizes)
# # julia> reshape(mean(.1 .* var(x.data, dims = 1, corrected=false) .* (3 / 2), dims=3), :) .+ .9 .* 1. affine_shape[1] = 1
# # 2-element Array{Float64,1}:
# # 1. y = trainmode(m, x)
# # 1. y = m(x)
# @test m.σ² ≈ reshape(mean(.1 .* var(x.data, dims = 1, corrected=false) .* (3 / 2), dims=3), :) .+ .9 .* 1. @test isapprox(y, data(sigmoid.((x .- expand_inst(m.μ, affine_shape)) ./ sqrt.(expand_inst(m.σ², affine_shape) .+ m.ϵ))), atol = 1.0e-7)
# end
# testmode!(m)
# @test !m.active let m = InstanceNorm(2), sizes = (2, 4, 1, 2, 3),
# x = reshape(collect(1:prod(sizes)), sizes)
# x = m(x).data y = reshape(permutedims(x, [3, 1, 2, 4, 5]), :, 2, 3)
# @test isapprox(x[1], (1 - 0.5) / sqrt(1. + 1f-5), atol = 1.0e-5) y = reshape(m(y), sizes...)
# end @test m(x) == y
# # with activation function end
# let m = InstanceNorm(2, sigmoid), sizes = (3, 2, 2),
# x = reshape(collect(1:prod(sizes)), sizes) # check that μ, σ², and the output are the correct size for higher rank tensors
# let m = InstanceNorm(2), sizes = (5, 5, 3, 4, 2, 6),
# affine_shape = collect(sizes) x = reshape(collect(1:prod(sizes)), sizes)
# affine_shape[1] = 1 y = m(x)
# @test size(m.μ) == (sizes[end - 1], )
# @test m.active @test size(m.σ²) == (sizes[end - 1], )
# m(x) @test size(y) == sizes
# end
# testmode!(m)
# @test !m.active # show that instance norm is equal to batch norm when channel and batch dims are squashed
# let m_inorm = InstanceNorm(2), m_bnorm = BatchNorm(12), sizes = (5, 5, 3, 4, 2, 6),
# y = m(x).data x = reshape(collect(1:prod(sizes)), sizes)
# @test isapprox(y, data(sigmoid.((x .- expand_inst(m.μ, affine_shape)) ./ sqrt.(expand_inst(m.σ², affine_shape) .+ m.ϵ))), atol = 1.0e-7) @test m_inorm(x) == reshape(m_bnorm(reshape(x, (sizes[1:end - 2]..., :, 1))), sizes)
# end end
#
# let m = InstanceNorm(2), sizes = (2, 4, 1, 2, 3), let m = InstanceNorm(32), x = randn(Float32, 416, 416, 32, 1);
# x = reshape(collect(1:prod(sizes)), sizes) m(x)
# y = reshape(permutedims(x, [3, 1, 2, 4, 5]), :, 2, 3) @test (@allocated m(x)) < 100_000_000
# y = reshape(m(y), sizes...) end
# @test m(x) == y
# end end
#
# # check that μ, σ², and the output are the correct size for higher rank tensors @testset "GroupNorm" begin
# let m = InstanceNorm(2), sizes = (5, 5, 3, 4, 2, 6), # begin tests
# x = reshape(collect(1:prod(sizes)), sizes) squeeze(x) = dropdims(x, dims = tuple(findall(size(x) .== 1)...)) # To remove all singular dimensions
# y = m(x)
# @test size(m.μ) == (sizes[end - 1], ) let m = GroupNorm(4,2), sizes = (3,4,2),
# @test size(m.σ²) == (sizes[end - 1], ) x = param(reshape(collect(1:prod(sizes)), sizes))
# @test size(y) == sizes x = Float64.(x)
# end @test m.β == [0, 0, 0, 0] # initβ(32)
# @test m.γ == [1, 1, 1, 1] # initγ(32)
# # show that instance norm is equal to batch norm when channel and batch dims are squashed
# let m_inorm = InstanceNorm(2), m_bnorm = BatchNorm(12), sizes = (5, 5, 3, 4, 2, 6), y = trainmode(m, x)
# x = reshape(collect(1:prod(sizes)), sizes)
# @test m_inorm(x) == reshape(m_bnorm(reshape(x, (sizes[1:end - 2]..., :, 1))), sizes) #julia> x
# end #[:, :, 1] =
# # 1.0 4.0 7.0 10.0
# let m = InstanceNorm(32), x = randn(Float32, 416, 416, 32, 1); # 2.0 5.0 8.0 11.0
# m(x) # 3.0 6.0 9.0 12.0
# @test (@allocated m(x)) < 100_000_000 #
# end #[:, :, 2] =
# # 13.0 16.0 19.0 22.0
# end # 14.0 17.0 20.0 23.0
# # 15.0 18.0 21.0 24.0
# @testset "GroupNorm" begin #
# # begin tests # μ will be
# squeeze(x) = dropdims(x, dims = tuple(findall(size(x) .== 1)...)) # To remove all singular dimensions # (1. + 2. + 3. + 4. + 5. + 6.) / 6 = 3.5
# # (7. + 8. + 9. + 10. + 11. + 12.) / 6 = 9.5
# let m = GroupNorm(4,2), sizes = (3,4,2), #
# x = param(reshape(collect(1:prod(sizes)), sizes)) # (13. + 14. + 15. + 16. + 17. + 18.) / 6 = 15.5
# # (19. + 20. + 21. + 22. + 23. + 24.) / 6 = 21.5
# @test m.β.data == [0, 0, 0, 0] # initβ(32) #
# @test m.γ.data == [1, 1, 1, 1] # initγ(32) # μ =
# # 3.5 15.5
# @test m.active # 9.5 21.5
# #
# m(x) # ∴ update rule with momentum:
# # (1. - .1) * 0 + .1 * (3.5 + 15.5) / 2 = 0.95
# #julia> x # (1. - .1) * 0 + .1 * (9.5 + 21.5) / 2 = 1.55
# #[:, :, 1] = @test m.μ [0.95, 1.55]
# # 1.0 4.0 7.0 10.0
# # 2.0 5.0 8.0 11.0 # julia> mean(var(reshape(x,3,2,2,2),dims=(1,2)).* .1,dims=2) .+ .9*1.
# # 3.0 6.0 9.0 12.0 # 2-element Array{Float64,1}:
# # # 1.25
# #[:, :, 2] = # 1.25
# # 13.0 16.0 19.0 22.0 @test m.σ² mean(squeeze(var(reshape(x,3,2,2,2),dims=(1,2))).*.1,dims=2) .+ .9*1.
# # 14.0 17.0 20.0 23.0
# # 15.0 18.0 21.0 24.0 x = m(x)
# # println(x[1])
# # μ will be @test isapprox(x[1], (1 - 0.95) / sqrt(1.25 + 1f-5), atol = 1.0e-5)
# # (1. + 2. + 3. + 4. + 5. + 6.) / 6 = 3.5 end
# # (7. + 8. + 9. + 10. + 11. + 12.) / 6 = 9.5 # with activation function
# # let m = GroupNorm(4,2, sigmoid), sizes = (3, 4, 2),
# # (13. + 14. + 15. + 16. + 17. + 18.) / 6 = 15.5 x = param(reshape(collect(1:prod(sizes)), sizes))
# # (19. + 20. + 21. + 22. + 23. + 24.) / 6 = 21.5 x = Float64.(x)
# # μ_affine_shape = ones(Int,length(sizes) + 1)
# # μ = μ_affine_shape[end-1] = 2 # Number of groups
# # 3.5 15.5
# # 9.5 21.5 affine_shape = ones(Int,length(sizes) + 1)
# # affine_shape[end-2] = 2 # Channels per group
# # ∴ update rule with momentum: affine_shape[end-1] = 2 # Number of groups
# # (1. - .1) * 0 + .1 * (3.5 + 15.5) / 2 = 0.95 affine_shape[1] = sizes[1]
# # (1. - .1) * 0 + .1 * (9.5 + 21.5) / 2 = 1.55 affine_shape[end] = sizes[end]
# @test m.μ ≈ [0.95, 1.55]
# og_shape = size(x)
# # julia> mean(var(reshape(x,3,2,2,2),dims=(1,2)).* .1,dims=2) .+ .9*1.
# # 2-element Array{Tracker.TrackedReal{Float64},1}: y = trainmode(m, x)
# # 1.25 y = m(x)
# # 1.25 x_ = reshape(x,affine_shape...)
# @test m.σ² ≈ mean(squeeze(var(reshape(x,3,2,2,2),dims=(1,2))).*.1,dims=2) .+ .9*1. out = reshape(data(sigmoid.((x_ .- reshape(m.μ,μ_affine_shape...)) ./ sqrt.(reshape(m.σ²,μ_affine_shape...) .+ m.ϵ))),og_shape)
# @test isapprox(y, out, atol = 1.0e-7)
# testmode!(m) end
# @test !m.active
# let m = GroupNorm(2,2), sizes = (2, 4, 1, 2, 3),
# x = m(x).data x = param(reshape(collect(1:prod(sizes)), sizes))
# println(x[1]) y = reshape(permutedims(x, [3, 1, 2, 4, 5]), :, 2, 3)
# @test isapprox(x[1], (1 - 0.95) / sqrt(1.25 + 1f-5), atol = 1.0e-5) y = reshape(m(y), sizes...)
# end @test m(x) == y
# # with activation function end
# let m = GroupNorm(4,2, sigmoid), sizes = (3, 4, 2),
# x = param(reshape(collect(1:prod(sizes)), sizes)) # check that μ, σ², and the output are the correct size for higher rank tensors
# let m = GroupNorm(4,2), sizes = (5, 5, 3, 4, 4, 6),
# μ_affine_shape = ones(Int,length(sizes) + 1) x = param(reshape(collect(1:prod(sizes)), sizes))
# μ_affine_shape[end-1] = 2 # Number of groups y = m(x)
# @test size(m.μ) == (m.G,1)
# affine_shape = ones(Int,length(sizes) + 1) @test size(m.σ²) == (m.G,1)
# affine_shape[end-2] = 2 # Channels per group @test size(y) == sizes
# affine_shape[end-1] = 2 # Number of groups end
# affine_shape[1] = sizes[1]
# affine_shape[end] = sizes[end] # show that group norm is the same as instance norm when the group size is the same as the number of channels
# let IN = InstanceNorm(4), GN = GroupNorm(4,4), sizes = (2,2,3,4,5),
# og_shape = size(x) x = param(reshape(collect(1:prod(sizes)), sizes))
# @test IN(x) GN(x)
# @test m.active end
# m(x)
# # show that group norm is the same as batch norm for a group of size 1 and batch of size 1
# testmode!(m) let BN = BatchNorm(4), GN = GroupNorm(4,4), sizes = (2,2,3,4,1),
# @test !m.active x = param(reshape(collect(1:prod(sizes)), sizes))
# @test BN(x) GN(x)
# y = m(x) end
# x_ = reshape(x,affine_shape...)
# out = reshape(data(sigmoid.((x_ .- reshape(m.μ,μ_affine_shape...)) ./ sqrt.(reshape(m.σ²,μ_affine_shape...) .+ m.ϵ))),og_shape) end
# @test isapprox(y, out, atol = 1.0e-7)
# end
#
# let m = GroupNorm(2,2), sizes = (2, 4, 1, 2, 3),
# x = param(reshape(collect(1:prod(sizes)), sizes))
# y = reshape(permutedims(x, [3, 1, 2, 4, 5]), :, 2, 3)
# y = reshape(m(y), sizes...)
# @test m(x) == y
# end
#
# # check that μ, σ², and the output are the correct size for higher rank tensors
# let m = GroupNorm(4,2), sizes = (5, 5, 3, 4, 4, 6),
# x = param(reshape(collect(1:prod(sizes)), sizes))
# y = m(x)
# @test size(m.μ) == (m.G,1)
# @test size(m.σ²) == (m.G,1)
# @test size(y) == sizes
# end
#
# # show that group norm is the same as instance norm when the group size is the same as the number of channels
# let IN = InstanceNorm(4), GN = GroupNorm(4,4), sizes = (2,2,3,4,5),
# x = param(reshape(collect(1:prod(sizes)), sizes))
# @test IN(x) ≈ GN(x)
# end
#
# # show that group norm is the same as batch norm for a group of size 1 and batch of size 1
# let BN = BatchNorm(4), GN = GroupNorm(4,4), sizes = (2,2,3,4,1),
# x = param(reshape(collect(1:prod(sizes)), sizes))
# @test BN(x) ≈ GN(x)
# end
#
# end