Merge #957
957: Add some gradient checking tests on GPUs r=dhairyagandhi96 a=dhairyagandhi96 Good to add generic tests for tracking gradients through the various layers on the GPU. Co-authored-by: Dhairya Gandhi <dhairya@juliacopmuting.com> Co-authored-by: Dhairya Gandhi <dhairya@juliacomputing.com>
This commit is contained in:
commit
472e1fbf5e
|
@ -69,6 +69,7 @@ if CuArrays.has_cudnn()
|
|||
@info "Testing Flux/CUDNN"
|
||||
include("cudnn.jl")
|
||||
include("curnn.jl")
|
||||
include("layers.jl")
|
||||
else
|
||||
@warn "CUDNN unavailable, not testing GPU DNN support"
|
||||
end
|
||||
|
|
|
@ -0,0 +1,98 @@
|
|||
# Test layers and data/model movements on and off the GPU
|
||||
# Add tests for layers and their gradients on the GPU
|
||||
# Most of the forward passes should be fine being applied
|
||||
# to bitstype objects, but this gives higher coverage for our use-cases
|
||||
# Check that getting the gradients does not throw
|
||||
|
||||
# generic movement tests
|
||||
@testset "Basic GPU Movement" begin
|
||||
@test gradient(x -> sum(gpu(x)), rand(3,3)) isa Tuple
|
||||
@test gradient(x -> sum(cpu(x)), gpu(rand(3,3))) isa Tuple
|
||||
end
|
||||
|
||||
# TODO: These layers get into scalar indexing
|
||||
# `AlphaDropout` throws a compilation error on GPUs,
|
||||
# whereas, the rest are scalar indexing issues.
|
||||
const BROKEN_LAYERS = [DepthwiseConv,
|
||||
AlphaDropout,
|
||||
InstanceNorm,
|
||||
GroupNorm]
|
||||
|
||||
function gradtest(name::String, layers::Vector, xs = nothing, args...)
|
||||
isnothing(xs) && error("Missing input to test the layers against.")
|
||||
@testset "$name GPU grad tests" begin
|
||||
for layer in layers
|
||||
@testset "$layer GPU grad test" begin
|
||||
l = gpu(layer(args...))
|
||||
xs = gpu(xs)
|
||||
if any(x -> isa(l, x), BROKEN_LAYERS)
|
||||
ps = Flux.params(l)
|
||||
@test_broken gradient(() -> sum(l(xs)), ps) isa Flux.Zygote.Grads
|
||||
else
|
||||
ps = Flux.params(l)
|
||||
@test gradient(() -> sum(l(xs)), ps) isa Flux.Zygote.Grads
|
||||
gs = gradient(() -> sum(l(xs)), ps)
|
||||
|
||||
# Handle pooling layers
|
||||
if !isempty(ps)
|
||||
@test gs[first(ps)] isa Flux.CuArrays.CuArray
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Repeats from Conv, CrossCor
|
||||
|
||||
r = rand(Float32, 28, 28, 1, 1)
|
||||
conv_layers = [Conv, ConvTranspose, CrossCor, DepthwiseConv]
|
||||
gradtest("Conv", conv_layers, r, (2,2), 1=>3)
|
||||
|
||||
pooling_layers = [MaxPool, MeanPool]
|
||||
gradtest("Pooling", pooling_layers, r, (2,2))
|
||||
|
||||
dropout_layers = [Dropout, AlphaDropout]
|
||||
gradtest("Dropout", dropout_layers, r, 0.5f0)
|
||||
|
||||
norm_layers = [LayerNorm, BatchNorm]
|
||||
gradtest("Normalising", norm_layers, rand(Float32, 28,28,3,1), 1)
|
||||
|
||||
instancenorm = [InstanceNorm]
|
||||
gradtest("InstanceNorm", instancenorm, r, 1)
|
||||
|
||||
groupnorm = [GroupNorm]
|
||||
gradtest("GroupNorm", groupnorm, rand(Float32, 28,28,3,1), 3, 1)
|
||||
|
||||
const stateless_layers = [Flux.mse,
|
||||
Flux.crossentropy,
|
||||
Flux.logitcrossentropy,
|
||||
Flux.normalise]
|
||||
|
||||
const stateless_layers_broadcasted = [Flux.binarycrossentropy,
|
||||
Flux.logitbinarycrossentropy]
|
||||
|
||||
function stateless_gradtest(f, args...)
|
||||
@test gradient((args...) -> sum(f(args...)), args...)[1] isa CuArray
|
||||
end
|
||||
|
||||
function stateless_gradtest_broadcasted(f, args...)
|
||||
@test gradient((args...) -> sum(f.(args...)), args...)[1] isa CuArray
|
||||
end
|
||||
|
||||
@testset "Stateless GPU grad tests" begin
|
||||
x = gpu(rand(3,3))
|
||||
y = gpu(rand(3,3))
|
||||
|
||||
for layer in stateless_layers
|
||||
if layer == Flux.normalise
|
||||
stateless_gradtest(layer, x)
|
||||
else
|
||||
stateless_gradtest(layer, x, y)
|
||||
end
|
||||
end
|
||||
|
||||
for layer in stateless_layers_broadcasted
|
||||
stateless_gradtest_broadcasted(layer, x, y)
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue