Merge #901
901: Add option for "Same" padding to conv and pooling layers r=dhairyagandhi96 a=DrChainsaw Fixes #813 This adds the possibility to set "pad=SamePad()" to automatically calculate the amount of padding to apply so that outputsize==inputsize (assuming stide == 1). Comments on API more than welcome. I considered the following options: * Call the type just Same and export it, but I was afraid to cause name collisions due to a too generic name * Call the type Same and not export it * Dispatch on type instead of instance (so that one can type pad=Same instead of pad=Same()) * Supply a method instead of a type, giving a similar API as above. Happy to change to any of the above or to anything else. I don't think that same padding is common for pooling layers, but I added it just for the sake of consistency. It is a separate commit so it can easily be removed if not wanted. Co-authored-by: DrChainsaw <Christian.kyril.skarby@gmail.com>
This commit is contained in:
commit
9237cdaf5b
3
NEWS.md
3
NEWS.md
|
@ -1,3 +1,6 @@
|
|||
# v0.10.5
|
||||
* Add option for [same padding](https://github.com/FluxML/Flux.jl/pull/901) to conv and pooling layers by setting `pad=SamePad()`.
|
||||
|
||||
# v0.10.0
|
||||
* The default AD engine has switched from [Tracker to Zygote.jl](https://github.com/FluxML/Flux.jl/pull/669)
|
||||
- The dependency on Tracker.jl has been removed.
|
||||
|
|
|
@ -10,7 +10,7 @@ using Zygote: Params, @adjoint, gradient, pullback, @nograd
|
|||
|
||||
export gradient
|
||||
|
||||
export Chain, Dense, Maxout, RNN, LSTM, GRU, Conv, CrossCor, ConvTranspose,
|
||||
export Chain, Dense, Maxout, RNN, LSTM, GRU, SamePad, Conv, CrossCor, ConvTranspose,
|
||||
GlobalMaxPool, GlobalMeanPool, MaxPool, MeanPool, flatten,
|
||||
DepthwiseConv, Dropout, AlphaDropout, LayerNorm, BatchNorm, InstanceNorm, GroupNorm,
|
||||
SkipConnection, params, fmap, cpu, gpu, f32, f64, testmode!, trainmode!
|
||||
|
|
|
@ -7,6 +7,28 @@ _convtransoutdims(isize, ksize, ssize, dsize, pad) = (isize .- 1).*ssize .+ 1 .+
|
|||
|
||||
expand(N, i::Tuple) = i
|
||||
expand(N, i::Integer) = ntuple(_ -> i, N)
|
||||
|
||||
"""
|
||||
SamePad
|
||||
|
||||
Padding for convolutional layers will be calculated so that outputshape == inputshape when stride = 1.
|
||||
|
||||
For stride > 1 the output shape depends on the type of convolution layer.
|
||||
"""
|
||||
struct SamePad end
|
||||
|
||||
calc_padding(pad, k::NTuple{N,T}, dilation, stride) where {T,N}= expand(Val(2*N), pad)
|
||||
function calc_padding(::SamePad, k::NTuple{N,T}, dilation, stride) where {N,T}
|
||||
#Ref: "A guide to convolution arithmetic for deep learning" https://arxiv.org/pdf/1603.07285
|
||||
|
||||
# Effective kernel size, including dilation
|
||||
k_eff = @. k + (k - 1) * (dilation - 1)
|
||||
# How much total padding needs to be applied?
|
||||
pad_amt = @. k_eff - 1
|
||||
# In case amount of padding is odd we need to apply different amounts to each side.
|
||||
return Tuple(mapfoldl(i -> [ceil(Int, i/2), floor(Int, i/2)], vcat, pad_amt))
|
||||
end
|
||||
|
||||
"""
|
||||
Conv(size, in => out, σ = identity; init = glorot_uniform,
|
||||
stride = 1, pad = 0, dilation = 1)
|
||||
|
@ -18,6 +40,8 @@ Data should be stored in WHCN order (width, height, # channels, batch size).
|
|||
In other words, a 100×100 RGB image would be a `100×100×3×1` array,
|
||||
and a batch of 50 would be a `100×100×3×50` array.
|
||||
|
||||
Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride.
|
||||
|
||||
# Examples
|
||||
|
||||
Apply a `Conv` layer to a 1-channel input using a 2×2 window size, giving us a
|
||||
|
@ -41,8 +65,8 @@ end
|
|||
function Conv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
|
||||
stride = 1, pad = 0, dilation = 1) where {T,N}
|
||||
stride = expand(Val(N-2), stride)
|
||||
pad = expand(Val(2*(N-2)), pad)
|
||||
dilation = expand(Val(N-2), dilation)
|
||||
pad = calc_padding(pad, size(w)[1:N-2], dilation, stride)
|
||||
return Conv(σ, w, b, stride, pad, dilation)
|
||||
end
|
||||
|
||||
|
@ -99,6 +123,8 @@ Standard convolutional transpose layer. `size` should be a tuple like `(2, 2)`.
|
|||
Data should be stored in WHCN order (width, height, # channels, batch size).
|
||||
In other words, a 100×100 RGB image would be a `100×100×3×1` array,
|
||||
and a batch of 50 would be a `100×100×3×50` array.
|
||||
|
||||
Use `pad=SamePad()` to apply padding so that outputsize == stride * inputsize - stride + 1.
|
||||
"""
|
||||
struct ConvTranspose{N,M,F,A,V}
|
||||
σ::F
|
||||
|
@ -112,8 +138,8 @@ end
|
|||
function ConvTranspose(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
|
||||
stride = 1, pad = 0, dilation = 1) where {T,N}
|
||||
stride = expand(Val(N-2), stride)
|
||||
pad = expand(Val(2*(N-2)), pad)
|
||||
dilation = expand(Val(N-2), dilation)
|
||||
pad = calc_padding(pad, size(w)[1:N-2], dilation, stride)
|
||||
return ConvTranspose(σ, w, b, stride, pad, dilation)
|
||||
end
|
||||
|
||||
|
@ -174,6 +200,8 @@ Note that `out` must be an integer multiple of `in`.
|
|||
Data should be stored in WHCN order (width, height, # channels, batch size).
|
||||
In other words, a 100×100 RGB image would be a `100×100×3×1` array,
|
||||
and a batch of 50 would be a `100×100×3×50` array.
|
||||
|
||||
Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride.
|
||||
"""
|
||||
struct DepthwiseConv{N,M,F,A,V}
|
||||
σ::F
|
||||
|
@ -187,8 +215,8 @@ end
|
|||
function DepthwiseConv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
|
||||
stride = 1, pad = 0, dilation = 1) where {T,N}
|
||||
stride = expand(Val(N-2), stride)
|
||||
pad = expand(Val(2*(N-2)), pad)
|
||||
dilation = expand(Val(N-2), dilation)
|
||||
pad = calc_padding(pad, size(w)[1:N-2], dilation, stride)
|
||||
return DepthwiseConv(σ, w, b, stride, pad, dilation)
|
||||
end
|
||||
|
||||
|
@ -240,6 +268,8 @@ Data should be stored in WHCN order (width, height, # channels, batch size).
|
|||
In other words, a 100×100 RGB image would be a `100×100×3×1` array,
|
||||
and a batch of 50 would be a `100×100×3×50` array.
|
||||
|
||||
Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride.
|
||||
|
||||
# Examples
|
||||
|
||||
Apply a `CrossCor` layer to a 1-channel input using a 2×2 window size, giving us a
|
||||
|
@ -263,8 +293,8 @@ end
|
|||
function CrossCor(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
|
||||
stride = 1, pad = 0, dilation = 1) where {T,N}
|
||||
stride = expand(Val(N-2), stride)
|
||||
pad = expand(Val(2*(N-2)), pad)
|
||||
dilation = expand(Val(N-2), dilation)
|
||||
pad = calc_padding(pad, size(w)[1:N-2], dilation, stride)
|
||||
return CrossCor(σ, w, b, stride, pad, dilation)
|
||||
end
|
||||
|
||||
|
@ -358,6 +388,9 @@ end
|
|||
MaxPool(k; pad = 0, stride = k)
|
||||
|
||||
Max pooling layer. `k` is the size of the window for each dimension of the input.
|
||||
|
||||
Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride.
|
||||
=======
|
||||
"""
|
||||
struct MaxPool{N,M}
|
||||
k::NTuple{N,Int}
|
||||
|
@ -367,8 +400,7 @@ end
|
|||
|
||||
function MaxPool(k::NTuple{N,Integer}; pad = 0, stride = k) where N
|
||||
stride = expand(Val(N), stride)
|
||||
pad = expand(Val(2*N), pad)
|
||||
|
||||
pad = calc_padding(pad, k, 1, stride)
|
||||
return MaxPool(k, pad, stride)
|
||||
end
|
||||
|
||||
|
@ -387,6 +419,8 @@ outdims(l::MaxPool{N}, isize) where N = output_size(PoolDims(_paddims(isize, (l.
|
|||
MeanPool(k; pad = 0, stride = k)
|
||||
|
||||
Mean pooling layer. `k` is the size of the window for each dimension of the input.
|
||||
|
||||
Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride.
|
||||
"""
|
||||
struct MeanPool{N,M}
|
||||
k::NTuple{N,Int}
|
||||
|
@ -396,7 +430,7 @@ end
|
|||
|
||||
function MeanPool(k::NTuple{N,Integer}; pad = 0, stride = k) where N
|
||||
stride = expand(Val(N), stride)
|
||||
pad = expand(Val(2*N), pad)
|
||||
pad = calc_padding(pad, k, 1, stride)
|
||||
return MeanPool(k, pad, stride)
|
||||
end
|
||||
|
||||
|
|
|
@ -162,4 +162,28 @@ end
|
|||
@test Flux.outdims(m, (5, 5)) == (4, 4)
|
||||
m = MeanPool((2, 2); stride = 2, pad = 3)
|
||||
@test Flux.outdims(m, (5, 5)) == (5, 5)
|
||||
end
|
||||
end
|
||||
|
||||
@testset "$ltype SamePad kernelsize $k" for ltype in (Conv, ConvTranspose, DepthwiseConv, CrossCor), k in ( (1,), (2,), (3,), (4,5), (6,7,8))
|
||||
data = ones(Float32, (k .+ 3)..., 1,1)
|
||||
l = ltype(k, 1=>1, pad=SamePad())
|
||||
@test size(l(data)) == size(data)
|
||||
|
||||
l = ltype(k, 1=>1, pad=SamePad(), dilation = k .÷ 2)
|
||||
@test size(l(data)) == size(data)
|
||||
|
||||
stride = 3
|
||||
l = ltype(k, 1=>1, pad=SamePad(), stride = stride)
|
||||
if ltype == ConvTranspose
|
||||
@test size(l(data))[1:end-2] == stride .* size(data)[1:end-2] .- stride .+ 1
|
||||
else
|
||||
@test size(l(data))[1:end-2] == ceil.(Int, size(data)[1:end-2] ./ stride)
|
||||
end
|
||||
end
|
||||
|
||||
@testset "$ltype SamePad windowsize $k" for ltype in (MeanPool, MaxPool), k in ( (1,), (2,), (3,), (4,5), (6,7,8))
|
||||
data = ones(Float32, (k .+ 3)..., 1,1)
|
||||
|
||||
l = ltype(k, pad=SamePad())
|
||||
@test size(l(data))[1:end-2] == ceil.(Int, size(data)[1:end-2] ./ k)
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue