Merge #957

957: Add some gradient checking tests on GPUs r=dhairyagandhi96 a=dhairyagandhi96 Good to add generic tests for tracking gradients through the various layers on the GPU. Co-authored-by: Dhairya Gandhi <dhairya@juliacopmuting.com> Co-authored-by: Dhairya Gandhi <dhairya@juliacomputing.com>
2020-05-21 12:25:53 +00:00 · 2020-05-21 12:25:53 +00:00 · 472e1fbf5e
commit 472e1fbf5e
parent 87ba651add 0801064d50
2 changed files with 99 additions and 0 deletions
--- a/test/cuda/cuda.jl
+++ b/test/cuda/cuda.jl
@ -69,6 +69,7 @@ if CuArrays.has_cudnn()
  @info "Testing Flux/CUDNN"
  include("cudnn.jl")
  include("curnn.jl")
+  include("layers.jl")
 else
  @warn "CUDNN unavailable, not testing GPU DNN support"
 end
--- a/test/cuda/layers.jl
+++ b/test/cuda/layers.jl
@ -0,0 +1,98 @@
+# Test layers and data/model movements on and off the GPU
+# Add tests for layers and their gradients on the GPU
+# Most of the forward passes should be fine being applied
+# to bitstype objects, but this gives higher coverage for our use-cases
+# Check that getting the gradients does not throw
+
+# generic movement tests
+@testset "Basic GPU Movement" begin
+  @test gradient(x -> sum(gpu(x)), rand(3,3)) isa Tuple
+  @test gradient(x -> sum(cpu(x)), gpu(rand(3,3))) isa Tuple
+end
+
+# TODO: These layers get into scalar indexing
+# `AlphaDropout` throws a compilation error on GPUs,
+# whereas, the rest are scalar indexing issues.
+const BROKEN_LAYERS = [DepthwiseConv,
+		       AlphaDropout,
+                       InstanceNorm,
+                       GroupNorm]
+
+function gradtest(name::String, layers::Vector, xs = nothing, args...)
+  isnothing(xs) && error("Missing input to test the layers against.")
+  @testset "$name GPU grad tests" begin
+    for layer in layers
+      @testset "$layer GPU grad test" begin
+        l = gpu(layer(args...))
+        xs = gpu(xs)
+        if any(x -> isa(l, x), BROKEN_LAYERS)
+          ps = Flux.params(l)
+          @test_broken gradient(() -> sum(l(xs)), ps) isa Flux.Zygote.Grads
+        else
+          ps = Flux.params(l)
+          @test gradient(() -> sum(l(xs)), ps) isa Flux.Zygote.Grads
+          gs = gradient(() -> sum(l(xs)), ps)
+
+          # Handle pooling layers
+          if !isempty(ps)
+            @test gs[first(ps)] isa Flux.CuArrays.CuArray
+          end
+        end
+      end
+    end
+  end
+end
+
+# Repeats from Conv, CrossCor
+
+r = rand(Float32, 28, 28, 1, 1)
+conv_layers = [Conv, ConvTranspose, CrossCor, DepthwiseConv]
+gradtest("Conv", conv_layers, r, (2,2), 1=>3)
+
+pooling_layers = [MaxPool, MeanPool]
+gradtest("Pooling", pooling_layers, r, (2,2))
+
+dropout_layers = [Dropout, AlphaDropout]
+gradtest("Dropout", dropout_layers, r, 0.5f0)
+
+norm_layers = [LayerNorm, BatchNorm]
+gradtest("Normalising", norm_layers, rand(Float32, 28,28,3,1), 1)
+
+instancenorm = [InstanceNorm]
+gradtest("InstanceNorm", instancenorm, r, 1)
+
+groupnorm = [GroupNorm]
+gradtest("GroupNorm", groupnorm, rand(Float32, 28,28,3,1), 3, 1)
+
+const stateless_layers = [Flux.mse,
+                          Flux.crossentropy,
+                          Flux.logitcrossentropy,
+                          Flux.normalise]
+
+const stateless_layers_broadcasted = [Flux.binarycrossentropy,
+                                      Flux.logitbinarycrossentropy]
+
+function stateless_gradtest(f, args...)
+  @test gradient((args...) -> sum(f(args...)), args...)[1] isa CuArray
+end
+
+function stateless_gradtest_broadcasted(f, args...)
+  @test gradient((args...) -> sum(f.(args...)), args...)[1] isa CuArray
+end
+
+@testset "Stateless GPU grad tests" begin
+  x = gpu(rand(3,3))
+  y = gpu(rand(3,3))
+
+  for layer in stateless_layers
+    if layer == Flux.normalise
+      stateless_gradtest(layer, x)
+    else
+      stateless_gradtest(layer, x, y)
+    end
+  end
+
+  for layer in stateless_layers_broadcasted
+    stateless_gradtest_broadcasted(layer, x, y)
+  end
+end