Flux.jl/test/cuda/layers.jl

99 lines
3.0 KiB
Julia
Raw Normal View History

2019-12-05 12:42:23 +00:00
# Test layers and data/model movements on and off the GPU
# Add tests for layers and their gradients on the GPU
# Most of the forward passes should be fine being applied
# to bitstype objects, but this gives higher coverage for our use-cases
# Check that getting the gradients does not throw
# generic movement tests
2020-05-19 18:24:18 +00:00
@testset "Basic GPU Movement" begin
@test gradient(x -> sum(gpu(x)), rand(3,3)) isa Tuple
@test gradient(x -> sum(cpu(x)), gpu(rand(3,3))) isa Tuple
end
2020-05-19 18:41:38 +00:00
# TODO: These layers get into scalar indexing
# `AlphaDropout` throws a compilation error on GPUs,
# whereas, the rest are scalar indexing issues.
2020-05-19 18:24:18 +00:00
const BROKEN_LAYERS = [DepthwiseConv,
2020-05-19 18:41:38 +00:00
AlphaDropout,
InstanceNorm,
GroupNorm]
2019-12-05 12:42:23 +00:00
function gradtest(name::String, layers::Vector, xs = nothing, args...)
isnothing(xs) && error("Missing input to test the layers against.")
2019-12-05 12:44:04 +00:00
@testset "$name GPU grad tests" begin
for layer in layers
@testset "$layer GPU grad test" begin
l = gpu(layer(args...))
xs = gpu(xs)
2020-05-19 18:24:18 +00:00
if any(x -> isa(l, x), BROKEN_LAYERS)
ps = Flux.params(l)
@test_broken gradient(() -> sum(l(xs)), ps) isa Flux.Zygote.Grads
2019-12-05 12:44:04 +00:00
else
ps = Flux.params(l)
@test gradient(() -> sum(l(xs)), ps) isa Flux.Zygote.Grads
gs = gradient(() -> sum(l(xs)), ps)
# Handle pooling layers
if !isempty(ps)
@test gs[first(ps)] isa Flux.CuArrays.CuArray
end
2019-12-05 12:44:04 +00:00
end
end
end
end
2019-12-05 12:42:23 +00:00
end
# Repeats from Conv, CrossCor
r = rand(Float32, 28, 28, 1, 1)
2019-12-05 12:42:23 +00:00
conv_layers = [Conv, ConvTranspose, CrossCor, DepthwiseConv]
gradtest("Conv", conv_layers, r, (2,2), 1=>3)
2019-12-05 12:42:23 +00:00
pooling_layers = [MaxPool, MeanPool]
gradtest("Pooling", pooling_layers, r, (2,2))
2019-12-05 12:42:23 +00:00
dropout_layers = [Dropout, AlphaDropout]
gradtest("Dropout", dropout_layers, r, 0.5f0)
2019-12-05 12:42:23 +00:00
norm_layers = [LayerNorm, BatchNorm]
2020-05-19 18:24:18 +00:00
gradtest("Normalising", norm_layers, rand(Float32, 28,28,3,1), 1)
2019-12-05 12:42:23 +00:00
instancenorm = [InstanceNorm]
2020-05-19 18:24:18 +00:00
gradtest("InstanceNorm", instancenorm, r, 1)
2019-12-05 12:42:23 +00:00
groupnorm = [GroupNorm]
gradtest("GroupNorm", groupnorm, rand(Float32, 28,28,3,1), 3, 1)
2019-12-05 12:42:23 +00:00
const stateless_layers = [Flux.mse,
2019-12-05 12:44:04 +00:00
Flux.crossentropy,
Flux.logitcrossentropy,
2019-12-05 12:44:04 +00:00
Flux.normalise]
2019-12-05 12:42:23 +00:00
const stateless_layers_broadcasted = [Flux.binarycrossentropy,
2019-12-05 12:44:04 +00:00
Flux.logitbinarycrossentropy]
2019-12-05 12:42:23 +00:00
function stateless_gradtest(f, args...)
2019-12-05 12:44:04 +00:00
@test gradient((args...) -> sum(f(args...)), args...)[1] isa CuArray
2019-12-05 12:42:23 +00:00
end
function stateless_gradtest_broadcasted(f, args...)
2020-05-19 18:24:18 +00:00
@test gradient((args...) -> sum(f.(args...)), args...)[1] isa CuArray
2019-12-05 12:42:23 +00:00
end
@testset "Stateless GPU grad tests" begin
2019-12-05 12:44:04 +00:00
x = gpu(rand(3,3))
y = gpu(rand(3,3))
for layer in stateless_layers
if layer == Flux.normalise
stateless_gradtest(layer, x)
else
stateless_gradtest(layer, x, y)
end
end
for layer in stateless_layers_broadcasted
stateless_gradtest_broadcasted(layer, x, y)
end
2019-12-05 12:42:23 +00:00
end