1.0 fix for conv transpose

2018-09-08 15:44:06 -04:00 · 2018-09-08 15:44:06 -04:00 · a71ee386d0
commit a71ee386d0
parent a32c8a2e60
4 changed files with 63 additions and 7 deletions
--- a/src/Flux.jl
+++ b/src/Flux.jl
@ -6,7 +6,7 @@ using Base: tail
 using MacroTools, Juno, Requires, Reexport, Statistics, Random
 using MacroTools: @forward

-export Chain, Dense, RNN, LSTM, GRU, Conv, MaxPool, MeanPool,
+export Chain, Dense, RNN, LSTM, GRU, Conv, ConvTranspose, MaxPool, MeanPool,
       DepthwiseConv, Dropout, LayerNorm, BatchNorm,
       params, mapleaves, cpu, gpu

--- a/src/layers/conv.jl
+++ b/src/layers/conv.jl
@ -1,4 +1,4 @@
-using NNlib: conv, depthwiseconv
+using NNlib: conv, ∇conv_data, depthwiseconv

@generated sub2(::Val{N}) where N = :(Val($(N-2)))

@ -51,6 +51,7 @@ function Base.show(io::IO, l::Conv)
  print(io, ")")
 end

+<<<<<<< HEAD
 (a::Conv{<:Any,<:Any,W})(x::AbstractArray{T}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} =
  invoke(a, Tuple{AbstractArray}, x)

@ -77,6 +78,7 @@ struct DepthwiseConv{N,F,A,V}
  bias::V
  stride::NTuple{N,Int}
  pad::NTuple{N,Int}
+  dilation::NTuple{N,Int}
 end

 DepthwiseConv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
@ -108,6 +110,46 @@ function Base.show(io::IO, l::DepthwiseConv)
  print(io, ")")
 end

+"""
+    ConvTranspose(size, in=>out)
+    ConvTranspose(size, in=>out, relu)
+
+Standard convolutional transpose layer. `size` should be a tuple like `(2, 2)`.
+`in` and `out` specify the number of input and output channels respectively.
+Data should be stored in WHCN order. In other words, a 100×100 RGB image would
+be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array.
+Takes the keyword arguments `pad`, `stride` and `dilation`.
+"""
+struct ConvTranspose{N,F,A,V}
+  σ::F
+  weight::A
+  bias::V
+  stride::NTuple{N,Int}
+  pad::NTuple{N,Int}
+  dilation::NTuple{N,Int}
+end
+
+ConvTranspose(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
+              stride = 1, pad = 0, dilation = 1) where {T,N} =
+  ConvTranspose(σ, w, b, expand.(sub2(Val(N)), (stride, pad, dilation))...)
+
+ConvTranspose(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; init = initn,
+              stride = 1, pad = 0, dilation = 1) where N =
+ConvTranspose(param(init(k..., reverse(ch)...)), param(zeros(ch[2])), σ,
+              stride = stride, pad = pad, dilation = dilation)
+
+@treelike ConvTranspose
+
+function (c::ConvTranspose)(x)
+  # ndims(x) == ndims(c.weight)-1 && return squeezebatch(c(reshape(x, size(x)..., 1)))
+  σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1)
+  σ.(∇conv_data(x, c.weight, stride = c.stride, pad = c.pad, dilation = c.dilation) .+ b)
+end
+
+function Base.show(io::IO, l::ConvTranspose)
+  print(io, "ConvTranspose(", size(l.weight)[1:ndims(l.weight)-2])
+end
+
 """
    MaxPool(k)

--- a/src/tracker/lib/array.jl
+++ b/src/tracker/lib/array.jl
@ -356,7 +356,7 @@ x::TrackedVector  * y::TrackedVector  = track(*, x, y)
 # NNlib

 using NNlib
-import NNlib: softmax, ∇softmax, logsoftmax, ∇logsoftmax, conv, depthwiseconv, maxpool, meanpool
+import NNlib: softmax, ∇softmax, logsoftmax, ∇logsoftmax, conv, ∇conv_data, depthwiseconv, maxpool, meanpool

 softmax(xs::TrackedArray) = track(softmax, xs)

@ -386,6 +386,16 @@ conv(x::TrackedArray,  w::AbstractArray; kw...) = track(conv, x, w; kw...)
      (NNlib.∇conv_data(data.((Δ, x, w))...; kw...),
       NNlib.∇conv_filter(data.((Δ, x, w))...; kw...)))

+∇conv_data(x::TrackedArray,  w::TrackedArray;  kw...) = track(∇conv_data, x, w; kw...)
+∇conv_data(x::AbstractArray, w::TrackedArray;  kw...) = track(∇conv_data, x, w; kw...)
+∇conv_data(x::TrackedArray,  w::AbstractArray; kw...) = track(∇conv_data, x, w; kw...)
+
+@grad ∇conv_data(x, w; kw...) =
+  ∇conv_data(data(x), data(w); kw...),
+    Δ -> nobacksies(:conv,
+      (NNlib.conv(data.((x, Δ, w))...; kw...),
+       NNlib.∇conv_filter(data.((x, Δ, w))...; kw...)))
+
 maxpool(x::TrackedArray, k; kw...) = track(maxpool, x, k; kw...)

@grad function maxpool(x, k; kw...)
--- a/test/tracker.jl
+++ b/test/tracker.jl
@ -1,7 +1,7 @@
 using Flux
 using Flux.Tracker, Test, NNlib
 using Flux.Tracker: TrackedReal, gradcheck, grad, checkpoint
-using NNlib: conv, depthwiseconv
+using NNlib: conv, ∇conv_data, depthwiseconv
 using Printf: @sprintf
 using LinearAlgebra: diagm, dot, LowerTriangular, norm
 using Statistics: mean, std
@ -182,12 +182,16 @@ end
  2y + x
 end

-@test gradtest(conv, rand(10, 3, 2), randn(Float64,2, 3, 2))
-@test gradtest(conv, rand(10, 10, 3, 2), randn(Float64,2, 2, 3, 2))
-@test gradtest(conv, rand(10, 10, 10, 3, 2), randn(Float64,2, 2, 2, 3, 2))
+@test gradtest(conv, rand(10, 3, 2), randn(Float64, 2, 3, 2))
+@test gradtest(conv, rand(10, 10, 3, 2), randn(Float64, 2, 2, 3, 2))
+@test gradtest(conv, rand(10, 10, 10, 3, 2), randn(Float64, 2, 2, 2, 3, 2))

@test gradtest(depthwiseconv, rand(10,10,3,2), randn(2, 2, 2, 3))

+@test gradtest(∇conv_data, rand(10, 3, 2), randn(Float64, 2, 2, 3))
+@test gradtest(∇conv_data, rand(10, 10, 3, 2), randn(Float64, 2, 2, 2, 3))
+@test gradtest(∇conv_data, rand(10, 10, 10, 3, 2), randn(Float64, 2, 2, 2, 2, 3))
+
@test gradtest(x -> maxpool(x, (2,2)), rand(10, 10, 3, 2))
@test gradtest(x -> maxpool(x, (2,2,2)), rand(10, 10, 10, 3, 2))