901: Add option for "Same" padding to conv and pooling layers r=dhairyagandhi96 a=DrChainsaw

Fixes #813 

This adds the possibility to set "pad=SamePad()" to automatically calculate the amount of padding to apply so that outputsize==inputsize (assuming stide == 1).

Comments on API more than welcome. I considered the following options:

* Call the type just Same and export it, but I was afraid to cause name collisions due to a too generic name
* Call the type Same and not export it
* Dispatch on type instead of instance (so that one can type pad=Same instead of pad=Same())
* Supply a method instead of a type, giving a similar API as above. 

Happy to change to any of the above or to anything else.

I don't think that same padding is common for pooling layers, but I added it just for the sake of consistency. It is a separate commit so it can easily be removed if not wanted.

Co-authored-by: DrChainsaw <Christian.kyril.skarby@gmail.com>
This commit is contained in:
bors[bot] 2020-04-25 04:39:18 +00:00 committed by GitHub
commit 9237cdaf5b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 70 additions and 9 deletions

View File

@ -1,3 +1,6 @@
# v0.10.5
* Add option for [same padding](https://github.com/FluxML/Flux.jl/pull/901) to conv and pooling layers by setting `pad=SamePad()`.
# v0.10.0 # v0.10.0
* The default AD engine has switched from [Tracker to Zygote.jl](https://github.com/FluxML/Flux.jl/pull/669) * The default AD engine has switched from [Tracker to Zygote.jl](https://github.com/FluxML/Flux.jl/pull/669)
- The dependency on Tracker.jl has been removed. - The dependency on Tracker.jl has been removed.

View File

@ -10,7 +10,7 @@ using Zygote: Params, @adjoint, gradient, pullback, @nograd
export gradient export gradient
export Chain, Dense, Maxout, RNN, LSTM, GRU, Conv, CrossCor, ConvTranspose, export Chain, Dense, Maxout, RNN, LSTM, GRU, SamePad, Conv, CrossCor, ConvTranspose,
GlobalMaxPool, GlobalMeanPool, MaxPool, MeanPool, flatten, GlobalMaxPool, GlobalMeanPool, MaxPool, MeanPool, flatten,
DepthwiseConv, Dropout, AlphaDropout, LayerNorm, BatchNorm, InstanceNorm, GroupNorm, DepthwiseConv, Dropout, AlphaDropout, LayerNorm, BatchNorm, InstanceNorm, GroupNorm,
SkipConnection, params, fmap, cpu, gpu, f32, f64, testmode!, trainmode! SkipConnection, params, fmap, cpu, gpu, f32, f64, testmode!, trainmode!

View File

@ -7,6 +7,28 @@ _convtransoutdims(isize, ksize, ssize, dsize, pad) = (isize .- 1).*ssize .+ 1 .+
expand(N, i::Tuple) = i expand(N, i::Tuple) = i
expand(N, i::Integer) = ntuple(_ -> i, N) expand(N, i::Integer) = ntuple(_ -> i, N)
"""
SamePad
Padding for convolutional layers will be calculated so that outputshape == inputshape when stride = 1.
For stride > 1 the output shape depends on the type of convolution layer.
"""
struct SamePad end
calc_padding(pad, k::NTuple{N,T}, dilation, stride) where {T,N}= expand(Val(2*N), pad)
function calc_padding(::SamePad, k::NTuple{N,T}, dilation, stride) where {N,T}
#Ref: "A guide to convolution arithmetic for deep learning" https://arxiv.org/pdf/1603.07285
# Effective kernel size, including dilation
k_eff = @. k + (k - 1) * (dilation - 1)
# How much total padding needs to be applied?
pad_amt = @. k_eff - 1
# In case amount of padding is odd we need to apply different amounts to each side.
return Tuple(mapfoldl(i -> [ceil(Int, i/2), floor(Int, i/2)], vcat, pad_amt))
end
""" """
Conv(size, in => out, σ = identity; init = glorot_uniform, Conv(size, in => out, σ = identity; init = glorot_uniform,
stride = 1, pad = 0, dilation = 1) stride = 1, pad = 0, dilation = 1)
@ -18,6 +40,8 @@ Data should be stored in WHCN order (width, height, # channels, batch size).
In other words, a 100×100 RGB image would be a `100×100×3×1` array, In other words, a 100×100 RGB image would be a `100×100×3×1` array,
and a batch of 50 would be a `100×100×3×50` array. and a batch of 50 would be a `100×100×3×50` array.
Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride.
# Examples # Examples
Apply a `Conv` layer to a 1-channel input using a 2×2 window size, giving us a Apply a `Conv` layer to a 1-channel input using a 2×2 window size, giving us a
@ -41,8 +65,8 @@ end
function Conv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity; function Conv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
stride = 1, pad = 0, dilation = 1) where {T,N} stride = 1, pad = 0, dilation = 1) where {T,N}
stride = expand(Val(N-2), stride) stride = expand(Val(N-2), stride)
pad = expand(Val(2*(N-2)), pad)
dilation = expand(Val(N-2), dilation) dilation = expand(Val(N-2), dilation)
pad = calc_padding(pad, size(w)[1:N-2], dilation, stride)
return Conv(σ, w, b, stride, pad, dilation) return Conv(σ, w, b, stride, pad, dilation)
end end
@ -99,6 +123,8 @@ Standard convolutional transpose layer. `size` should be a tuple like `(2, 2)`.
Data should be stored in WHCN order (width, height, # channels, batch size). Data should be stored in WHCN order (width, height, # channels, batch size).
In other words, a 100×100 RGB image would be a `100×100×3×1` array, In other words, a 100×100 RGB image would be a `100×100×3×1` array,
and a batch of 50 would be a `100×100×3×50` array. and a batch of 50 would be a `100×100×3×50` array.
Use `pad=SamePad()` to apply padding so that outputsize == stride * inputsize - stride + 1.
""" """
struct ConvTranspose{N,M,F,A,V} struct ConvTranspose{N,M,F,A,V}
σ::F σ::F
@ -112,8 +138,8 @@ end
function ConvTranspose(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity; function ConvTranspose(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
stride = 1, pad = 0, dilation = 1) where {T,N} stride = 1, pad = 0, dilation = 1) where {T,N}
stride = expand(Val(N-2), stride) stride = expand(Val(N-2), stride)
pad = expand(Val(2*(N-2)), pad)
dilation = expand(Val(N-2), dilation) dilation = expand(Val(N-2), dilation)
pad = calc_padding(pad, size(w)[1:N-2], dilation, stride)
return ConvTranspose(σ, w, b, stride, pad, dilation) return ConvTranspose(σ, w, b, stride, pad, dilation)
end end
@ -174,6 +200,8 @@ Note that `out` must be an integer multiple of `in`.
Data should be stored in WHCN order (width, height, # channels, batch size). Data should be stored in WHCN order (width, height, # channels, batch size).
In other words, a 100×100 RGB image would be a `100×100×3×1` array, In other words, a 100×100 RGB image would be a `100×100×3×1` array,
and a batch of 50 would be a `100×100×3×50` array. and a batch of 50 would be a `100×100×3×50` array.
Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride.
""" """
struct DepthwiseConv{N,M,F,A,V} struct DepthwiseConv{N,M,F,A,V}
σ::F σ::F
@ -187,8 +215,8 @@ end
function DepthwiseConv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity; function DepthwiseConv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
stride = 1, pad = 0, dilation = 1) where {T,N} stride = 1, pad = 0, dilation = 1) where {T,N}
stride = expand(Val(N-2), stride) stride = expand(Val(N-2), stride)
pad = expand(Val(2*(N-2)), pad)
dilation = expand(Val(N-2), dilation) dilation = expand(Val(N-2), dilation)
pad = calc_padding(pad, size(w)[1:N-2], dilation, stride)
return DepthwiseConv(σ, w, b, stride, pad, dilation) return DepthwiseConv(σ, w, b, stride, pad, dilation)
end end
@ -240,6 +268,8 @@ Data should be stored in WHCN order (width, height, # channels, batch size).
In other words, a 100×100 RGB image would be a `100×100×3×1` array, In other words, a 100×100 RGB image would be a `100×100×3×1` array,
and a batch of 50 would be a `100×100×3×50` array. and a batch of 50 would be a `100×100×3×50` array.
Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride.
# Examples # Examples
Apply a `CrossCor` layer to a 1-channel input using a 2×2 window size, giving us a Apply a `CrossCor` layer to a 1-channel input using a 2×2 window size, giving us a
@ -263,8 +293,8 @@ end
function CrossCor(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity; function CrossCor(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
stride = 1, pad = 0, dilation = 1) where {T,N} stride = 1, pad = 0, dilation = 1) where {T,N}
stride = expand(Val(N-2), stride) stride = expand(Val(N-2), stride)
pad = expand(Val(2*(N-2)), pad)
dilation = expand(Val(N-2), dilation) dilation = expand(Val(N-2), dilation)
pad = calc_padding(pad, size(w)[1:N-2], dilation, stride)
return CrossCor(σ, w, b, stride, pad, dilation) return CrossCor(σ, w, b, stride, pad, dilation)
end end
@ -358,6 +388,9 @@ end
MaxPool(k; pad = 0, stride = k) MaxPool(k; pad = 0, stride = k)
Max pooling layer. `k` is the size of the window for each dimension of the input. Max pooling layer. `k` is the size of the window for each dimension of the input.
Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride.
=======
""" """
struct MaxPool{N,M} struct MaxPool{N,M}
k::NTuple{N,Int} k::NTuple{N,Int}
@ -367,8 +400,7 @@ end
function MaxPool(k::NTuple{N,Integer}; pad = 0, stride = k) where N function MaxPool(k::NTuple{N,Integer}; pad = 0, stride = k) where N
stride = expand(Val(N), stride) stride = expand(Val(N), stride)
pad = expand(Val(2*N), pad) pad = calc_padding(pad, k, 1, stride)
return MaxPool(k, pad, stride) return MaxPool(k, pad, stride)
end end
@ -387,6 +419,8 @@ outdims(l::MaxPool{N}, isize) where N = output_size(PoolDims(_paddims(isize, (l.
MeanPool(k; pad = 0, stride = k) MeanPool(k; pad = 0, stride = k)
Mean pooling layer. `k` is the size of the window for each dimension of the input. Mean pooling layer. `k` is the size of the window for each dimension of the input.
Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride.
""" """
struct MeanPool{N,M} struct MeanPool{N,M}
k::NTuple{N,Int} k::NTuple{N,Int}
@ -396,7 +430,7 @@ end
function MeanPool(k::NTuple{N,Integer}; pad = 0, stride = k) where N function MeanPool(k::NTuple{N,Integer}; pad = 0, stride = k) where N
stride = expand(Val(N), stride) stride = expand(Val(N), stride)
pad = expand(Val(2*N), pad) pad = calc_padding(pad, k, 1, stride)
return MeanPool(k, pad, stride) return MeanPool(k, pad, stride)
end end

View File

@ -163,3 +163,27 @@ end
m = MeanPool((2, 2); stride = 2, pad = 3) m = MeanPool((2, 2); stride = 2, pad = 3)
@test Flux.outdims(m, (5, 5)) == (5, 5) @test Flux.outdims(m, (5, 5)) == (5, 5)
end end
@testset "$ltype SamePad kernelsize $k" for ltype in (Conv, ConvTranspose, DepthwiseConv, CrossCor), k in ( (1,), (2,), (3,), (4,5), (6,7,8))
data = ones(Float32, (k .+ 3)..., 1,1)
l = ltype(k, 1=>1, pad=SamePad())
@test size(l(data)) == size(data)
l = ltype(k, 1=>1, pad=SamePad(), dilation = k 2)
@test size(l(data)) == size(data)
stride = 3
l = ltype(k, 1=>1, pad=SamePad(), stride = stride)
if ltype == ConvTranspose
@test size(l(data))[1:end-2] == stride .* size(data)[1:end-2] .- stride .+ 1
else
@test size(l(data))[1:end-2] == ceil.(Int, size(data)[1:end-2] ./ stride)
end
end
@testset "$ltype SamePad windowsize $k" for ltype in (MeanPool, MaxPool), k in ( (1,), (2,), (3,), (4,5), (6,7,8))
data = ones(Float32, (k .+ 3)..., 1,1)
l = ltype(k, pad=SamePad())
@test size(l(data))[1:end-2] == ceil.(Int, size(data)[1:end-2] ./ k)
end