Merge #901

901: Add option for "Same" padding to conv and pooling layers r=dhairyagandhi96 a=DrChainsaw Fixes #813 This adds the possibility to set "pad=SamePad()" to automatically calculate the amount of padding to apply so that outputsize==inputsize (assuming stide == 1). Comments on API more than welcome. I considered the following options: * Call the type just Same and export it, but I was afraid to cause name collisions due to a too generic name * Call the type Same and not export it * Dispatch on type instead of instance (so that one can type pad=Same instead of pad=Same()) * Supply a method instead of a type, giving a similar API as above. Happy to change to any of the above or to anything else. I don't think that same padding is common for pooling layers, but I added it just for the sake of consistency. It is a separate commit so it can easily be removed if not wanted. Co-authored-by: DrChainsaw <Christian.kyril.skarby@gmail.com>
2020-04-25 04:39:18 +00:00 · 2020-04-25 04:39:18 +00:00 · 9237cdaf5b
commit 9237cdaf5b
parent cdada06472 4e4f6d9d1f
4 changed files with 70 additions and 9 deletions
--- a/NEWS.md
+++ b/NEWS.md
@ -1,3 +1,6 @@
+# v0.10.5
+* Add option for [same padding](https://github.com/FluxML/Flux.jl/pull/901) to conv and pooling layers by setting `pad=SamePad()`.
+
 # v0.10.0
 * The default AD engine has switched from [Tracker to Zygote.jl](https://github.com/FluxML/Flux.jl/pull/669)
  - The dependency on Tracker.jl has been removed.
--- a/src/Flux.jl
+++ b/src/Flux.jl
@ -10,7 +10,7 @@ using Zygote: Params, @adjoint, gradient, pullback, @nograd

 export gradient

-export Chain, Dense, Maxout, RNN, LSTM, GRU, Conv, CrossCor, ConvTranspose,
+export Chain, Dense, Maxout, RNN, LSTM, GRU, SamePad, Conv, CrossCor, ConvTranspose,
       GlobalMaxPool, GlobalMeanPool, MaxPool, MeanPool, flatten,
       DepthwiseConv, Dropout, AlphaDropout, LayerNorm, BatchNorm, InstanceNorm, GroupNorm,
       SkipConnection, params, fmap, cpu, gpu, f32, f64, testmode!, trainmode!
--- a/src/layers/conv.jl
+++ b/src/layers/conv.jl
@ -7,6 +7,28 @@ _convtransoutdims(isize, ksize, ssize, dsize, pad) = (isize .- 1).*ssize .+ 1 .+

 expand(N, i::Tuple) = i
 expand(N, i::Integer) = ntuple(_ -> i, N)
+
+"""
+    SamePad
+
+Padding for convolutional layers will be calculated so that outputshape == inputshape when stride = 1.
+
+For stride > 1 the output shape depends on the type of convolution layer.
+"""
+struct SamePad end
+
+calc_padding(pad, k::NTuple{N,T}, dilation, stride) where {T,N}= expand(Val(2*N), pad)
+function calc_padding(::SamePad, k::NTuple{N,T}, dilation, stride) where {N,T}
+  #Ref: "A guide to convolution arithmetic for deep learning" https://arxiv.org/pdf/1603.07285
+
+  # Effective kernel size, including dilation
+  k_eff = @. k + (k - 1) * (dilation - 1)
+  # How much total padding needs to be applied?
+  pad_amt = @. k_eff - 1
+  # In case amount of padding is odd we need to apply different amounts to each side.
+  return Tuple(mapfoldl(i -> [ceil(Int, i/2), floor(Int, i/2)], vcat, pad_amt))
+end
+
 """
    Conv(size, in => out, σ = identity; init = glorot_uniform,
         stride = 1, pad = 0, dilation = 1)
@ -18,6 +40,8 @@ Data should be stored in WHCN order (width, height, # channels, batch size).
 In other words, a 100×100 RGB image would be a `100×100×3×1` array,
 and a batch of 50 would be a `100×100×3×50` array.

+Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride.
+
 # Examples

 Apply a `Conv` layer to a 1-channel input using a 2×2 window size, giving us a
@ -41,8 +65,8 @@ end
 function Conv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
              stride = 1, pad = 0, dilation = 1) where {T,N}
  stride = expand(Val(N-2), stride)
-  pad = expand(Val(2*(N-2)), pad)
  dilation = expand(Val(N-2), dilation)
+  pad = calc_padding(pad, size(w)[1:N-2], dilation, stride)
  return Conv(σ, w, b, stride, pad, dilation)
 end

@ -99,6 +123,8 @@ Standard convolutional transpose layer. `size` should be a tuple like `(2, 2)`.
 Data should be stored in WHCN order (width, height, # channels, batch size).
 In other words, a 100×100 RGB image would be a `100×100×3×1` array,
 and a batch of 50 would be a `100×100×3×50` array.
+
+Use `pad=SamePad()` to apply padding so that outputsize == stride * inputsize - stride + 1.
 """
 struct ConvTranspose{N,M,F,A,V}
  σ::F
@ -112,8 +138,8 @@ end
 function ConvTranspose(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
              stride = 1, pad = 0, dilation = 1) where {T,N}
  stride = expand(Val(N-2), stride)
-  pad = expand(Val(2*(N-2)), pad)
  dilation = expand(Val(N-2), dilation)
+  pad = calc_padding(pad, size(w)[1:N-2], dilation, stride)
  return ConvTranspose(σ, w, b, stride, pad, dilation)
 end

@ -174,6 +200,8 @@ Note that `out` must be an integer multiple of `in`.
 Data should be stored in WHCN order (width, height, # channels, batch size).
 In other words, a 100×100 RGB image would be a `100×100×3×1` array,
 and a batch of 50 would be a `100×100×3×50` array.
+
+Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride.
 """
 struct DepthwiseConv{N,M,F,A,V}
  σ::F
@ -187,8 +215,8 @@ end
 function DepthwiseConv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
                       stride = 1, pad = 0, dilation = 1) where {T,N}
  stride = expand(Val(N-2), stride)
-  pad = expand(Val(2*(N-2)), pad)
  dilation = expand(Val(N-2), dilation)
+  pad = calc_padding(pad, size(w)[1:N-2], dilation, stride)
  return DepthwiseConv(σ, w, b, stride, pad, dilation)
 end

@ -240,6 +268,8 @@ Data should be stored in WHCN order (width, height, # channels, batch size).
 In other words, a 100×100 RGB image would be a `100×100×3×1` array,
 and a batch of 50 would be a `100×100×3×50` array.

+Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride.
+
 # Examples

 Apply a `CrossCor` layer to a 1-channel input using a 2×2 window size, giving us a
@ -263,8 +293,8 @@ end
 function CrossCor(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
              stride = 1, pad = 0, dilation = 1) where {T,N}
  stride = expand(Val(N-2), stride)
-  pad = expand(Val(2*(N-2)), pad)
  dilation = expand(Val(N-2), dilation)
+  pad = calc_padding(pad, size(w)[1:N-2], dilation, stride)
  return CrossCor(σ, w, b, stride, pad, dilation)
 end

@ -358,6 +388,9 @@ end
    MaxPool(k; pad = 0, stride = k)

 Max pooling layer. `k` is the size of the window for each dimension of the input.
+
+Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride.
+=======
 """
 struct MaxPool{N,M}
  k::NTuple{N,Int}
@ -367,8 +400,7 @@ end

 function MaxPool(k::NTuple{N,Integer}; pad = 0, stride = k) where N
  stride = expand(Val(N), stride)
-  pad = expand(Val(2*N), pad)
-
+  pad = calc_padding(pad, k, 1, stride)
  return MaxPool(k, pad, stride)
 end

@ -387,6 +419,8 @@ outdims(l::MaxPool{N}, isize) where N = output_size(PoolDims(_paddims(isize, (l.
    MeanPool(k; pad = 0, stride = k)

 Mean pooling layer. `k` is the size of the window for each dimension of the input.
+
+Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride.
 """
 struct MeanPool{N,M}
    k::NTuple{N,Int}
@ -396,7 +430,7 @@ end

 function MeanPool(k::NTuple{N,Integer}; pad = 0, stride = k) where N
  stride = expand(Val(N), stride)
-  pad = expand(Val(2*N), pad)
+  pad = calc_padding(pad, k, 1, stride)
  return MeanPool(k, pad, stride)
 end

--- a/test/layers/conv.jl
+++ b/test/layers/conv.jl
@ -162,4 +162,28 @@ end
  @test Flux.outdims(m, (5, 5)) == (4, 4)
  m = MeanPool((2, 2); stride = 2, pad = 3)
  @test Flux.outdims(m, (5, 5)) == (5, 5)
-end
+end
+
+@testset "$ltype SamePad kernelsize $k" for ltype in (Conv, ConvTranspose, DepthwiseConv, CrossCor), k in ( (1,), (2,), (3,), (4,5), (6,7,8))
+  data = ones(Float32, (k .+ 3)..., 1,1)
+  l = ltype(k, 1=>1, pad=SamePad())
+  @test size(l(data)) == size(data)
+
+  l = ltype(k, 1=>1, pad=SamePad(), dilation = k .÷ 2)
+  @test size(l(data)) == size(data)
+
+  stride = 3
+  l = ltype(k, 1=>1, pad=SamePad(), stride = stride)
+  if ltype == ConvTranspose
+    @test size(l(data))[1:end-2] == stride .* size(data)[1:end-2] .- stride .+ 1
+  else
+    @test size(l(data))[1:end-2] == ceil.(Int, size(data)[1:end-2] ./ stride)
+  end
+end
+
+@testset "$ltype SamePad windowsize $k" for ltype in (MeanPool, MaxPool), k in ( (1,), (2,), (3,), (4,5), (6,7,8))
+  data = ones(Float32, (k .+ 3)..., 1,1)
+
+  l = ltype(k, pad=SamePad())
+  @test size(l(data))[1:end-2] == ceil.(Int, size(data)[1:end-2] ./ k)
+end