Convert OneHot CuArrays to dense CuArrays before passing to CUDNN methods

2018-02-19 17:32:15 -08:00 · 2018-02-19 17:32:15 -08:00 · e6f556411a
commit e6f556411a
parent 4035745f6e
3 changed files with 38 additions and 16 deletions
--- a/src/cuda/cudnn.jl
+++ b/src/cuda/cudnn.jl
@ -325,6 +325,10 @@ function (m::CuLSTM{T})(h::NTuple{2,CuParam{T}}, x::CuParam{T}) where T <: Union
  return (result[2], result[3]), result[1]
 end

+(m::CuRNN{T})(h::CuParam{T}, x) where T <: Union{Float32,Float64} = m(h, CuArray{T}(x))
+(m::CuGRU{T})(h::CuParam{T}, x) where T <: Union{Float32,Float64} = m(h, CuArray{T}(x))
+(m::CuLSTM{T})(h::NTuple{2,CuParam{T}}, x) where T <: Union{Float32,Float64} = m(h, CuArray{T}(x))
+
 function accum_transpose!(dst::CuArray, src::CuArray)
  function kernel(dst, src)
    I = @cuindex dst
--- a/src/onehot.jl
+++ b/src/onehot.jl
@ -36,6 +36,7 @@ adapt(T, xs::OneHotMatrix) = OneHotMatrix(xs.height, adapt(T, xs.data))
  import CuArrays: CuArray, cudaconvert
  Base.Broadcast._containertype(::Type{<:OneHotMatrix{<:CuArray}}) = CuArray
  cudaconvert(x::OneHotMatrix{<:CuArray}) = OneHotMatrix(x.height, cudaconvert(x.data))
+  (::Type{<:CuArray{T}})(x::OneHotMatrix{<:CuArray}) where {T} = broadcast(y -> T(y), x)
 end

 function onehot(l, labels)
--- a/test/cuda/cudnn.jl
+++ b/test/cuda/cudnn.jl
@ -4,28 +4,45 @@ info("Testing Flux/CUDNN")

@testset "RNN" begin
  @testset for R in [RNN, GRU, LSTM]
-    x = param(rand(10,5))
-    cux = cu(x)
    rnn = R(10, 5)
    curnn = mapleaves(cu, rnn)
-    y = (rnn(x); rnn(x))
-    cuy = (curnn(cux); curnn(cux))
+    @testset for batch_size in (1, 5)
+      Flux.reset!(rnn)
+      Flux.reset!(curnn)
+      x = batch_size == 1 ?
+        param(rand(10)) :
+        param(rand(10,batch_size))
+      cux = cu(x)
+      y = (rnn(x); rnn(x))
+      cuy = (curnn(cux); curnn(cux))

-    @test y.data ≈ collect(cuy.data)
-    @test haskey(Flux.CUDA.descs, curnn.cell)
+      @test y.data ≈ collect(cuy.data)
+      @test haskey(Flux.CUDA.descs, curnn.cell)

-    Δ = randn(size(y))
+      Δ = randn(size(y))

-    Flux.back!(y, Δ)
-    Flux.back!(cuy, cu(Δ))
+      Flux.back!(y, Δ)
+      Flux.back!(cuy, cu(Δ))

-    @test x.grad ≈ collect(cux.grad)
-    @test rnn.cell.Wi.grad ≈ collect(curnn.cell.Wi.grad)
-    @test rnn.cell.Wh.grad ≈ collect(curnn.cell.Wh.grad)
-    @test rnn.cell.b.grad ≈ collect(curnn.cell.b.grad)
-    @test rnn.cell.h.grad ≈ collect(curnn.cell.h.grad)
-    if isdefined(rnn.cell, :c)
-      @test rnn.cell.c.grad ≈ collect(curnn.cell.c.grad)
+      @test x.grad ≈ collect(cux.grad)
+      @test rnn.cell.Wi.grad ≈ collect(curnn.cell.Wi.grad)
+      @test rnn.cell.Wh.grad ≈ collect(curnn.cell.Wh.grad)
+      @test rnn.cell.b.grad ≈ collect(curnn.cell.b.grad)
+      @test rnn.cell.h.grad ≈ collect(curnn.cell.h.grad)
+      if isdefined(rnn.cell, :c)
+        @test rnn.cell.c.grad ≈ collect(curnn.cell.c.grad)
+      end
+
+      Flux.reset!(rnn)
+      Flux.reset!(curnn)
+      ohx = batch_size == 1 ?
+        Flux.onehot(rand(1:10), 1:10) :
+        Flux.onehotbatch(rand(1:10, batch_size), 1:10)
+      cuohx = cu(ohx)
+      y = (rnn(ohx); rnn(ohx))
+      cuy = (curnn(cuohx); curnn(cuohx))
+
+      @test y.data ≈ collect(cuy.data)
    end
  end
 end